Skip to content

Commit 6477a8d

Browse files
authored
Merge pull request #10 from bioforensics/checksum
Verify file copies
2 parents 501c5b4 + 83badfe commit 6477a8d

File tree

2 files changed

+16
-0
lines changed

2 files changed

+16
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
### Added
1212
- Support for renaming samples while copying (#6)
1313
- Support for symbolic linking instead of copying (#9)
14+
- SHA256 checksums to ensure integrity of copied files (#10)
1415

1516

1617
## [0.1.3] 2025-12-03

ezfastq/fastq.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
# -------------------------------------------------------------------------------------------------
99

1010
from dataclasses import dataclass
11+
from hashlib import sha256
1112
from pathlib import Path
1213
from shutil import copy
1314
from subprocess import run
@@ -42,6 +43,8 @@ def copy(self, destination):
4243
destination.mkdir(parents=True, exist_ok=True)
4344
file_copy = destination / self._working_name
4445
copy(self.source_path, file_copy)
46+
if file_sha256(self.source_path) != file_sha256(file_copy): # pragma: no cover
47+
raise CopyError(f"checksum failed for {self.source_path}")
4548
if self.extension == "fastq":
4649
run(["gzip", str(file_copy)])
4750

@@ -71,5 +74,17 @@ def _working_name(self):
7174
return f"{self.stem}.{self.extension}"
7275

7376

77+
def file_sha256(path, block_size=65536):
78+
sha = sha256()
79+
with open(path, "rb") as fh:
80+
for block in iter(lambda: fh.read(block_size), b""):
81+
sha.update(block)
82+
return sha.hexdigest()
83+
84+
85+
class CopyError(RuntimeError):
86+
pass
87+
88+
7489
class LinkError(ValueError):
7590
pass

0 commit comments

Comments
 (0)