Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 37 additions & 5 deletions probeflow/io/readers/createc_dat.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,20 +228,52 @@ def _split_createc_dat_payload(path: Path, raw: bytes) -> tuple[bytes, bytes]:

marker = b"DATA"
pos = raw.find(marker)
if pos < 0:
raise ValueError(
f"{path.name}: missing DATA marker — not a valid Createc .dat file"
)

# ``zlib.decompress`` already tolerates trailing bytes after a complete
# stream, so reaching the end of this loop means no DATA marker was followed
# by an inflatable zlib stream. Track which way it failed so the message can
# distinguish a truncated/corrupt payload (re-copy the file) from a Createc
# layout this reader does not support (a code gap).
zlib_header_seen = False
last_zlib_error: zlib.error | None = None
while pos >= 0:
start = pos + len(marker)
if start < len(raw) and raw[start] == 0x78:
zlib_header_seen = True
try:
return raw[:pos], zlib.decompress(raw[start:])
except zlib.error:
pass
except zlib.error as exc:
last_zlib_error = exc
pos = raw.find(marker, start)

if marker not in raw:
token = _createc_format_token(raw)
if zlib_header_seen:
# A zlib header (0x78) was present but the stream would not inflate —
# almost always a file that was incompletely written or copied (a scan
# still being saved, or a partial/interrupted network-drive copy).
raise ValueError(
f"{path.name}: missing DATA marker — not a valid Createc .dat file"
f"{path.name}: the compressed image payload after the DATA marker is "
f"corrupt or truncated ({last_zlib_error}); the file may be "
f"incompletely written or copied (file is {len(raw)} bytes, "
f"format token {token!r})"
)
raise ValueError(f"{path.name}: zlib decompression failed after DATA marker")
# No 0x78 zlib header followed any DATA marker: the image block is not in the
# zlib-compressed layout this reader supports.
raise ValueError(
f"{path.name}: no zlib-compressed image payload found after the DATA "
f"marker — unsupported Createc .dat variant (format token {token!r})"
)


def _createc_format_token(raw: bytes) -> str:
"""Return the leading Createc format token (e.g. ``[Paramco32]``) for messages."""

head = raw[:64].split(b"\r\n", 1)[0].split(b"\n", 1)[0]
return head.decode("ascii", "replace").strip()


def _parse_createc_dat_header(hb: bytes) -> dict[str, str]:
Expand Down
35 changes: 35 additions & 0 deletions tests/test_createc_dat_decode.py
Original file line number Diff line number Diff line change
Expand Up @@ -449,3 +449,38 @@ def test_createc_ambiguous_header_stays_unknown():
assert meta["acquisition_mode"] == "unknown"
assert meta["feedback_mode"] == "unknown"
assert meta["confidence"] == "low"


def test_truncated_zlib_payload_reports_corruption_not_unsupported(tmp_path):
"""A DATA marker followed by a truncated zlib stream (e.g. a half-copied
network file) must report corruption/truncation, not an unsupported variant."""
dat = tmp_path / "truncated.dat"
header = b"[Paramco32]\nNum.X=2\nNum.Y=2\n"
full = zlib.compress(np.arange(1, 17, dtype="<f4").tobytes())
dat.write_bytes(header + b"DATA" + full[: len(full) // 2]) # cut the stream

with pytest.raises(ValueError, match="corrupt or truncated"):
read_createc_dat_report(dat, include_raw=False)


def test_non_zlib_payload_reports_unsupported_variant(tmp_path):
"""A DATA marker with no zlib header (0x78) following it is an unsupported
layout, and the message names the leading format token."""
dat = tmp_path / "uncompressed.dat"
header = b"[Paramco99]\nNum.X=2\nNum.Y=2\n"
dat.write_bytes(header + b"DATA" + b"\x00\x01\x02\x03 raw uncompressed bytes")

with pytest.raises(ValueError, match="unsupported Createc .dat variant"):
read_createc_dat_report(dat, include_raw=False)


def test_trailing_bytes_after_zlib_stream_still_decode(tmp_path):
"""Padding/trailing bytes after a complete zlib stream must not break decode
(zlib.decompress tolerates them); only genuine truncation should fail."""
dat = tmp_path / "trailing.dat"
header = b"[Paramco32]\nNum.X=2\nNum.Y=2\n"
comp = zlib.compress(np.arange(1, 17, dtype="<f4").tobytes())
dat.write_bytes(header + b"DATA" + comp + b"\x00\x00trailing")

report = read_createc_dat_report(dat, include_raw=False)
assert report.detected_channel_count == 4
Loading