Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0


## Unreleased

### Added
- Support for renaming samples while copying (#6)
- Support for symbolic linking instead of copying (#9)


## [0.1.3] 2025-12-03

### Added
- Option to add source and destination paths to the copy log with a `--verbose` flag (#5)

Expand Down
5 changes: 4 additions & 1 deletion ezfastq/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,12 @@ def copy(
prefix="",
workdir=Path("."),
subdir="seq",
link=False,
verbose=False,
):
copier = FastqCopier.from_dir(sample_name_map, seq_path, prefix=prefix, pair_mode=pair_mode)
copier = FastqCopier.from_dir(
sample_name_map, seq_path, prefix=prefix, pair_mode=pair_mode, link=link
)
copier.copy_files(workdir / subdir)
copier.print_copy_log()
nlogs = len(list((workdir / subdir).glob("copy-log-*.toml")))
Expand Down
7 changes: 7 additions & 0 deletions ezfastq/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def main(arglist=None):
prefix=args.prefix,
workdir=args.workdir,
subdir=args.subdir,
link=args.link,
verbose=args.verbose,
)

Expand Down Expand Up @@ -106,6 +107,12 @@ def get_parser():
default=0,
help="specify 1 to indicate that all samples are single-end, or 2 to indicate that all samples are paired-end; by default, read layout is inferred automatically on a per-sample basis",
)
parser.add_argument(
"-l",
"--link",
action="store_true",
help="symbolically link files rather than copying; only supported for gzip-compressed files",
)
parser.add_argument(
"-V",
"--verbose",
Expand Down
15 changes: 10 additions & 5 deletions ezfastq/copier.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,16 @@ class FastqCopier:
skipped_files: List
file_map: SampleFastqMap
prefix: str = ""
link: bool = False

@classmethod
def from_dir(cls, sample_name_map, data_path, prefix="", pair_mode=PairMode.Unspecified):
def from_dir(
cls, sample_name_map, data_path, prefix="", pair_mode=PairMode.Unspecified, link=False
):
copied_files = list()
skipped_files = list()
file_map = SampleFastqMap.new(sample_name_map.keys(), data_path, pair_mode=pair_mode)
copier = cls(sample_name_map, copied_files, skipped_files, file_map, prefix)
copier = cls(sample_name_map, copied_files, skipped_files, file_map, prefix, link)
return copier

def copy_files(self, destination):
Expand All @@ -67,7 +70,7 @@ def copy_files(self, destination):
else:
desc = f"[bold red]{fastq.sample:>16s} R{fastq.read}"
progress.update(task, description=desc)
was_copied = fastq.check_and_copy(destination)
was_copied = fastq.check_and_copy(destination, link=self.link)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch!

progress.update(task, advance=1)
if was_copied:
self.copied_files.append(fastq)
Expand Down Expand Up @@ -112,11 +115,13 @@ def __iter__(self):
def __str__(self):
output = StringIO()
if len(self.copied_files) > 0:
print("[CopiedFiles]", file=output)
header = "[LinkedFiles]" if self.link else "[CopiedFiles]"
print(header, file=output)
for fastq in self.copied_files:
print(fastq, file=output)
if len(self.skipped_files) > 0:
print("\n[SkippedFiles]\nalready_copied = [", file=output)
key = "linked" if self.link else "copied"
print(f"\n[SkippedFiles]\nalready_{key} = [", file=output)
for fastq in self.skipped_files:
print(f' "{fastq.source_path.name}",', file=output)
print("]", file=output)
Expand Down
17 changes: 16 additions & 1 deletion ezfastq/fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,14 @@ class FastqFile:
def __str__(self):
return f'"{self.source_path.name}" = "{self.name}"'

def check_and_copy(self, destination):
def check_and_copy(self, destination, link=False):
destination = Path(destination)
compressed_copy = destination / self.name
if compressed_copy.is_file():
return False
elif link is True:
self.link(destination)
return True
else:
self.copy(destination)
return True
Expand All @@ -42,6 +45,14 @@ def copy(self, destination):
if self.extension == "fastq":
run(["gzip", str(file_copy)])

def link(self, destination):
if self.extension != "fastq.gz":
message = "symbolic linking only supported for gzip-compressed files"
raise LinkError(message)
destination.mkdir(parents=True, exist_ok=True)
sym_link = destination / self._working_name
sym_link.symlink_to(self.source_path)

@property
def name(self):
return f"{self.stem}.fastq.gz"
Expand All @@ -58,3 +69,7 @@ def extension(self):
@property
def _working_name(self):
return f"{self.stem}.{self.extension}"


class LinkError(ValueError):
pass
13 changes: 13 additions & 0 deletions ezfastq/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,19 @@ def test_copy_verbose(tmp_path):
assert Path(log_data["Paths"]["destination"]) == tmp_path / "seq"


def test_link(tmp_path):
seq_path = files("ezfastq") / "tests" / "data" / "flat"
arglist = [seq_path, "test1", "test2", "--workdir", tmp_path, "--link"]
cli.main(arglist)
assert len(list((tmp_path / "seq").glob("*_R?.fastq.gz"))) == 4
copy_log = tmp_path / "seq" / "copy-log-1.toml"
with open(copy_log, "rb") as fh:
log_data = tomllib.load(fh)
assert len(log_data["LinkedFiles"]) == 4
assert "CopiedFiles" not in log_data
assert "SkippedFiles" not in log_data


def test_copy_subdir(tmp_path):
seq_path = files("ezfastq") / "tests" / "data" / "flat"
arglist = [seq_path, "test1", "test2", "--workdir", tmp_path, "--subdir", "seq/PROJa/RUNb"]
Expand Down
27 changes: 27 additions & 0 deletions ezfastq/tests/test_copier.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
# -------------------------------------------------------------------------------------------------

from ezfastq.copier import FastqCopier
from ezfastq.fastq import LinkError
from ezfastq.namemap import NameMap
from importlib.resources import files
from itertools import product
import pytest

try:
import tomllib
Expand Down Expand Up @@ -58,6 +60,31 @@ def test_copier_copy(tmp_path):
assert len(copier3.skipped_files) == 3


def test_copier_link(tmp_path):
sample_names = NameMap.from_arglist(["test1", "test2"])
copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1, link=True)
copier.copy_files(tmp_path)
assert len(copier.copied_files) == 4
destination = tmp_path / "seq"
assert all(fq.is_symlink() for fq in destination.glob("*.fastq.gz"))
observed = str(copier)
expected = """
[LinkedFiles]
"test1_S1_L001_R1_001.fastq.gz" = "test1_R1.fastq.gz"
"test1_S1_L001_R2_001.fastq.gz" = "test1_R2.fastq.gz"
"test2_R1.fq.gz" = "test2_R1.fastq.gz"
"test2_R2.fq.gz" = "test2_R2.fastq.gz"
"""
assert observed.strip() == expected.strip()


def test_copier_link_error(tmp_path):
sample_names = NameMap.from_arglist(["test2", "test3"])
copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1, link=True)
with pytest.raises(LinkError, match="linking only supported for gzip-compressed files"):
copier.copy_files(tmp_path)


def test_copier_prefix(tmp_path):
sample_names = NameMap.from_arglist(["test2", "test3"])
copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1, prefix="abc_")
Expand Down
9 changes: 6 additions & 3 deletions ezfastq/tests/test_fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,16 @@ def test_fastq_file_copy(tmp_path):
assert file_copy.is_file()


def test_fastq_file_check_and_copy(tmp_path):
@pytest.mark.parametrize("link_mode", [True, False])
def test_fastq_file_check_and_copy(tmp_path, link_mode):
destination = tmp_path / "seq"
inpath = files("ezfastq") / "tests" / "data" / "flat" / "test1_S1_L001_R2_001.fastq.gz"
infile = FastqFile(inpath, "test1", 2)
was_copied = infile.check_and_copy(destination)
was_copied = infile.check_and_copy(destination, link=link_mode)
assert was_copied
file_copy = tmp_path / "seq" / "test1_R2.fastq.gz"
assert file_copy.is_file()
was_copied = infile.check_and_copy(destination)
if link_mode is True:
assert file_copy.is_symlink()
was_copied = infile.check_and_copy(destination, link=link_mode)
assert not was_copied