From c54d18406ae0d61f1dc556ba8a4c27d23270b206 Mon Sep 17 00:00:00 2001 From: Peter Jacobson Date: Fri, 5 Jun 2026 15:51:04 +1000 Subject: [PATCH] Distinguish truncated vs unsupported Createc .dat load failures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The decoder raised one ambiguous message — "zlib decompression failed after DATA marker" — for two very different situations, so a ThumbnailLoader failure couldn't tell whether the file was incompletely written/copied (re-copy it) or in a Createc layout the reader doesn't support (a code gap). zlib.decompress already tolerates trailing bytes after a complete stream, so reaching the fall-through means no DATA marker yielded an inflatable stream. Track which way it failed: - a zlib header (0x78) was present but the stream would not inflate → report corruption/truncation (with the zlib error + file size), the signature of a half-saved scan or an interrupted network-drive copy; - no 0x78 followed any DATA marker → report an unsupported variant and name the leading format token (e.g. [Paramco32]). Successful decodes are unchanged. Adds tests for both messages plus a guard that trailing bytes after a complete stream still decode. Co-Authored-By: Claude Opus 4.8 --- probeflow/io/readers/createc_dat.py | 42 +++++++++++++++++++++++++---- tests/test_createc_dat_decode.py | 35 ++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 5 deletions(-) diff --git a/probeflow/io/readers/createc_dat.py b/probeflow/io/readers/createc_dat.py index 574e737..95e0428 100644 --- a/probeflow/io/readers/createc_dat.py +++ b/probeflow/io/readers/createc_dat.py @@ -228,20 +228,52 @@ def _split_createc_dat_payload(path: Path, raw: bytes) -> tuple[bytes, bytes]: marker = b"DATA" pos = raw.find(marker) + if pos < 0: + raise ValueError( + f"{path.name}: missing DATA marker — not a valid Createc .dat file" + ) + + # ``zlib.decompress`` already tolerates trailing bytes after a complete + # stream, so reaching the end of this loop means no DATA marker was followed + # by an inflatable zlib stream. Track which way it failed so the message can + # distinguish a truncated/corrupt payload (re-copy the file) from a Createc + # layout this reader does not support (a code gap). + zlib_header_seen = False + last_zlib_error: zlib.error | None = None while pos >= 0: start = pos + len(marker) if start < len(raw) and raw[start] == 0x78: + zlib_header_seen = True try: return raw[:pos], zlib.decompress(raw[start:]) - except zlib.error: - pass + except zlib.error as exc: + last_zlib_error = exc pos = raw.find(marker, start) - if marker not in raw: + token = _createc_format_token(raw) + if zlib_header_seen: + # A zlib header (0x78) was present but the stream would not inflate — + # almost always a file that was incompletely written or copied (a scan + # still being saved, or a partial/interrupted network-drive copy). raise ValueError( - f"{path.name}: missing DATA marker — not a valid Createc .dat file" + f"{path.name}: the compressed image payload after the DATA marker is " + f"corrupt or truncated ({last_zlib_error}); the file may be " + f"incompletely written or copied (file is {len(raw)} bytes, " + f"format token {token!r})" ) - raise ValueError(f"{path.name}: zlib decompression failed after DATA marker") + # No 0x78 zlib header followed any DATA marker: the image block is not in the + # zlib-compressed layout this reader supports. + raise ValueError( + f"{path.name}: no zlib-compressed image payload found after the DATA " + f"marker — unsupported Createc .dat variant (format token {token!r})" + ) + + +def _createc_format_token(raw: bytes) -> str: + """Return the leading Createc format token (e.g. ``[Paramco32]``) for messages.""" + + head = raw[:64].split(b"\r\n", 1)[0].split(b"\n", 1)[0] + return head.decode("ascii", "replace").strip() def _parse_createc_dat_header(hb: bytes) -> dict[str, str]: diff --git a/tests/test_createc_dat_decode.py b/tests/test_createc_dat_decode.py index 7801440..d853160 100644 --- a/tests/test_createc_dat_decode.py +++ b/tests/test_createc_dat_decode.py @@ -449,3 +449,38 @@ def test_createc_ambiguous_header_stays_unknown(): assert meta["acquisition_mode"] == "unknown" assert meta["feedback_mode"] == "unknown" assert meta["confidence"] == "low" + + +def test_truncated_zlib_payload_reports_corruption_not_unsupported(tmp_path): + """A DATA marker followed by a truncated zlib stream (e.g. a half-copied + network file) must report corruption/truncation, not an unsupported variant.""" + dat = tmp_path / "truncated.dat" + header = b"[Paramco32]\nNum.X=2\nNum.Y=2\n" + full = zlib.compress(np.arange(1, 17, dtype="