From 1a9dfdbebb07ebdff6067a65c985063ec47c46bc Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Fri, 5 Sep 2025 18:33:47 +0100 Subject: [PATCH 01/28] added intial tests for Context class --- tests/test_context.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_context.py b/tests/test_context.py index 0cc6c45..f9df20b 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -41,3 +41,32 @@ def test_fileparts(examples, schema): suffix='T1w', extension='.nii', ) + +def test_context(examples, schema): + + tree = FileTree.read_from_filesystem(examples / 'synthetic') + ds = context.Dataset(tree, schema) + T1w = tree / 'sub-01' / 'ses-01' / 'anat' / 'sub-01_ses-01_T1w.nii' + + file_context = context.Context(T1w, ds) + + assert file_context.schema == schema + assert file_context.dataset == ds + assert file_context.entiities == {'sub': '01', 'ses': '01'} + assert file_context.path == '/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii' + assert file_context.datatype == 'anat' + assert file_context.suffix == 'T1w' + assert file_context.extension == '.nii' + assert file_context.modality == 'mri' + assert file_context.size == 352 + + ## Tests for: + # subject + # sidecar + # associations + # columns + # json + # gzip + # nifti_header + # ome + # tiff From 2db6ba9a2cdefbf9a582cf2a3e55df3f73437d91 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Fri, 5 Sep 2025 18:34:07 +0100 Subject: [PATCH 02/28] Context class WIP --- src/bids_validator/context.py | 75 +++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 7b940a1..80e4d8f 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -298,3 +298,78 @@ def from_file(cls, file: FileTree, schema: Namespace) -> t.Self: suffix=suffix, extension=extension, ) + +@attrs.define +class Context: + + file: FileTree + dataset: Dataset + schema: Namespace = attrs.field(init=False) + file_parts: FileParts = attrs.field(init=False) + + def __attrs_post_init__(self): + self.schema = self.dataset.schema + self.file_parts = FileParts.from_file(self.file, self.schema) + + @property + def path(self) -> str: + return self.file_parts.path + + @property + def entiities(self) -> dict[str, str] | None: + return self.file_parts.entities + + @property + def datatype(self) -> str | None: + return self.file_parts.datatype + + @property + def suffix(self) -> str | None: + return self.file_parts.suffix + + @property + def extension(self) -> str | None: + return self.file_parts.extension + + @property + def modality(self) -> str | None: + modalities = self.schema.rules.modalities + for mod_name, mod_dtypes in modalities.items(): + if self.datatype in mod_dtypes.datatypes: + return mod_name + + @property + def size(self) -> int: + return self.file.direntry.stat().st_size + + @property + def subject(self) -> ctx.Subject | None: + return ctx.Subject() + + @property + def associations(self) -> ctx.Associations: + return ctx.Associations() + + @property + def columns(self) -> None: + pass + + @property + def json(self) -> None: + pass + + @property + def gzip(self) -> None: + pass + + @property + def nifti_header(self) -> None: + pass + + @property + def ome(self) -> None: + pass + + @property + def tiff(self) -> None: + pass From 5cc92f5ba06c3065162f257746e2eb4511bd25bb Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Mon, 8 Sep 2025 23:38:18 +0100 Subject: [PATCH 03/28] PR feedback changes --- src/bids_validator/context.py | 25 ++++++++++++++++--------- tests/test_context.py | 6 +++--- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 80e4d8f..27db178 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -304,21 +304,24 @@ class Context: file: FileTree dataset: Dataset - schema: Namespace = attrs.field(init=False) file_parts: FileParts = attrs.field(init=False) def __attrs_post_init__(self): - self.schema = self.dataset.schema self.file_parts = FileParts.from_file(self.file, self.schema) - + + @property + def schema(self) -> Namespace: + """The BIDS specification schema.""" + return self.dataset.schema + @property def path(self) -> str: return self.file_parts.path - + @property - def entiities(self) -> dict[str, str] | None: + def entities(self) -> dict[str, str] | None: return self.file_parts.entities - + @property def datatype(self) -> str | None: return self.file_parts.datatype @@ -330,18 +333,18 @@ def suffix(self) -> str | None: @property def extension(self) -> str | None: return self.file_parts.extension - + @property def modality(self) -> str | None: modalities = self.schema.rules.modalities for mod_name, mod_dtypes in modalities.items(): if self.datatype in mod_dtypes.datatypes: return mod_name - + @property def size(self) -> int: return self.file.direntry.stat().st_size - + @property def subject(self) -> ctx.Subject | None: return ctx.Subject() @@ -373,3 +376,7 @@ def ome(self) -> None: @property def tiff(self) -> None: pass + + @property + def sidecar(self) -> None: + pass diff --git a/tests/test_context.py b/tests/test_context.py index f9df20b..f8e72db 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -50,9 +50,9 @@ def test_context(examples, schema): file_context = context.Context(T1w, ds) - assert file_context.schema == schema - assert file_context.dataset == ds - assert file_context.entiities == {'sub': '01', 'ses': '01'} + assert file_context.schema is schema + assert file_context.dataset is ds + assert file_context.entities == {'sub': '01', 'ses': '01'} assert file_context.path == '/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii' assert file_context.datatype == 'anat' assert file_context.suffix == 'T1w' From 0d427f4402058be0abd39648843f84c810fbb7b8 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Mon, 8 Sep 2025 23:43:14 +0100 Subject: [PATCH 04/28] add missing docstrings --- src/bids_validator/context.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 27db178..d7f8622 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -299,8 +299,10 @@ def from_file(cls, file: FileTree, schema: Namespace) -> t.Self: extension=extension, ) + @attrs.define class Context: + """A context object that creates context for file on access.""" file: FileTree dataset: Dataset @@ -316,26 +318,32 @@ def schema(self) -> Namespace: @property def path(self) -> str: + """Path of the current file.""" return self.file_parts.path @property def entities(self) -> dict[str, str] | None: + """Entities parsed from the current filename.""" return self.file_parts.entities @property def datatype(self) -> str | None: + """Datatype of current file, for examples, anat.""" return self.file_parts.datatype @property def suffix(self) -> str | None: + """Suffix of current file.""" return self.file_parts.suffix @property def extension(self) -> str | None: + """Extension of current file including initial dot.""" return self.file_parts.extension @property def modality(self) -> str | None: + """Modality of current file, for examples, MRI.""" modalities = self.schema.rules.modalities for mod_name, mod_dtypes in modalities.items(): if self.datatype in mod_dtypes.datatypes: @@ -343,40 +351,50 @@ def modality(self) -> str | None: @property def size(self) -> int: + """Length of the current file in bytes.""" return self.file.direntry.stat().st_size @property def subject(self) -> ctx.Subject | None: + """Properties and contents of the current subject.""" return ctx.Subject() @property def associations(self) -> ctx.Associations: + """Associated files, indexed by suffix, selected according to the inheritance principle.""" return ctx.Associations() @property def columns(self) -> None: + """TSV columns, indexed by column header, values are arrays with column contents.""" pass @property def json(self) -> None: + """Contents of the current JSON file.""" pass @property def gzip(self) -> None: + """Parsed contents of gzip header.""" pass @property def nifti_header(self) -> None: + """Parsed contents of NIfTI header referenced elsewhere in schema.""" pass @property def ome(self) -> None: + """Parsed contents of OME-XML header, which may be found in OME-TIFF or OME-ZARR files.""" pass @property def tiff(self) -> None: + """TIFF file format metadata.""" pass @property def sidecar(self) -> None: + """Sidecar metadata constructed via the inheritance principle.""" pass From 72dbbeb7f87d8ae6f7b66b1e37363fe16d28cd1b Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Tue, 9 Sep 2025 00:05:38 +0100 Subject: [PATCH 05/28] add and use global mapping for datatype to modality --- src/bids_validator/context.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index d7f8622..ecb31a1 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -46,6 +46,18 @@ class ValidationError(Exception): """TODO: Add issue structure.""" +_DATATYPE_MAP = {} + + +def datatype_to_modality(datatype: str, schema: Namespace) -> str: + """Generate a global map for datatype to modality.""" + global _DATATYPE_MAP + if not _DATATYPE_MAP: + for mod_name, mod_dtypes in schema.rules.modalities.items(): + _DATATYPE_MAP |= dict.fromkeys(mod_dtypes['datatypes'], mod_name) + return _DATATYPE_MAP[datatype] + + @cache def load_tsv(file: FileTree, *, max_rows=0) -> Namespace: """Load TSV contents into a Namespace.""" @@ -130,14 +142,7 @@ def dataset_description(self) -> Namespace: @cached_property def modalities(self) -> list[str]: """List of modalities found in the dataset.""" - result = set() - - modalities = self.schema.rules.modalities - for datatype in self.datatypes: - for mod_name, mod_dtypes in modalities.items(): - if datatype in mod_dtypes.datatypes: - result.add(mod_name) - + result = {datatype_to_modality(datatype, self.schema) for datatype in self.datatypes} return list(result) @cached_property @@ -344,10 +349,7 @@ def extension(self) -> str | None: @property def modality(self) -> str | None: """Modality of current file, for examples, MRI.""" - modalities = self.schema.rules.modalities - for mod_name, mod_dtypes in modalities.items(): - if self.datatype in mod_dtypes.datatypes: - return mod_name + return datatype_to_modality(self.datatype, self.schema) @property def size(self) -> int: From eeb692f8728e2d422de5959d0e4af22efffb6f12 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Tue, 9 Sep 2025 14:08:21 +0100 Subject: [PATCH 06/28] add more tests for next stage for Sessions class and associated tests for Context class --- tests/test_context.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/test_context.py b/tests/test_context.py index f8e72db..0f19284 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -59,9 +59,11 @@ def test_context(examples, schema): assert file_context.extension == '.nii' assert file_context.modality == 'mri' assert file_context.size == 352 + assert isinstance(file_context.subject.sessions, context.Sessions) + assert sorted(file_context.subject.sessions.ses_dirs) == ["ses-01", "ses-02"] + assert sorted(file_context.subject.sessions.session_id) == ["ses-01", "ses-02"] ## Tests for: - # subject # sidecar # associations # columns @@ -70,3 +72,14 @@ def test_context(examples, schema): # nifti_header # ome # tiff + +def test_sessions(examples): + tree = FileTree.read_from_filesystem(examples / 'synthetic') + sub01 = tree / 'sub-01' + + sessions = context.Sessions(sub01) + + assert sorted(sessions.ses_dirs) == ["ses-01", "ses-02"] + assert sorted(sessions.session_id) == ["ses-01", "ses-02"] + assert sessions.phenotype is None + From 7200e9912cbaac913752ebd9a0df2b94276fc2c9 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 13:06:09 +0100 Subject: [PATCH 07/28] remove sessions.phenotype test --- tests/test_context.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_context.py b/tests/test_context.py index de15ee4..db749f1 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -92,5 +92,3 @@ def test_sessions(examples): assert sorted(sessions.ses_dirs) == ["ses-01", "ses-02"] assert sorted(sessions.session_id) == ["ses-01", "ses-02"] - assert sessions.phenotype is None - From 729bf51a01f90db2b1b4be28c744edbd829d117a Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 13:06:48 +0100 Subject: [PATCH 08/28] add sessions class --- src/bids_validator/context.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 18afc65..6949933 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -400,3 +400,34 @@ def tiff(self) -> None: def sidecar(self) -> None: """Sidecar metadata constructed via the inheritance principle.""" pass + +class Sessions: + """Collections of sessions in subject.""" + + def __init__(self, tree: FileTree): + self._tree = tree + + @cached_property + def ses_dirs(self) -> list[str]: + """Sessions as determined by ses-* directories.""" + return [ + child.name + for child in self._tree.children.values() + if child.is_dir and child.name.startswith('ses-') + ] + + @property + def session_id(self) -> list[str] | None: + """The session_id column of *_sessions.tsv.""" + for name, value in self._tree.children.items(): + if name.endswith('_sessions.tsv'): + return self._get_session_id(value) + else: + return None + + @staticmethod + def _get_session_id(phenotype_file: FileTree) -> list[str] | None: + columns = load_tsv(phenotype_file) + if 'session_id' not in columns: + return None + return list(columns['session_id']) From 96dd2dcf6413dc14831b7d182e031cf88fa31664 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 13:08:27 +0100 Subject: [PATCH 09/28] add subject as an input param for Context class --- src/bids_validator/context.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 6949933..b7cdb56 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -311,6 +311,7 @@ class Context: file: FileTree dataset: Dataset + subject: ctx.Subject file_parts: FileParts = attrs.field(init=False) def __attrs_post_init__(self): @@ -356,11 +357,6 @@ def size(self) -> int: """Length of the current file in bytes.""" return self.file.direntry.stat().st_size - @property - def subject(self) -> ctx.Subject | None: - """Properties and contents of the current subject.""" - return ctx.Subject() - @property def associations(self) -> ctx.Associations: """Associated files, indexed by suffix, selected according to the inheritance principle.""" From 1513ff8a093b19ea297cbf1cf015fdc52cc13b73 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 13:15:50 +0100 Subject: [PATCH 10/28] Update and validate function Include schema as input param and instansiate dataset object --- src/bids_validator/__main__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/bids_validator/__main__.py b/src/bids_validator/__main__.py index bbea18e..1048907 100644 --- a/src/bids_validator/__main__.py +++ b/src/bids_validator/__main__.py @@ -12,6 +12,8 @@ from bids_validator import BIDSValidator from bids_validator.types.files import FileTree +from bids_validator.context import Dataset +from bidsschematools.types import Namespace app = typer.Typer() @@ -35,7 +37,7 @@ def walk(tree: FileTree): yield child -def validate(tree: FileTree): +def validate(tree: FileTree, schema: Namespace): """Check if the file path is BIDS compliant. Parameters @@ -45,6 +47,7 @@ def validate(tree: FileTree): """ validator = BIDSValidator() + dataset = Dataset(tree, schema) for file in walk(tree): # The output of the FileTree.relative_path method always drops the initial for the path From 685b978b2213fe4b149bbc386711c87a02652d4f Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 13:17:57 +0100 Subject: [PATCH 11/28] Update walk function now takes dataset as input param, generates subject object if required, and yields context object --- src/bids_validator/__main__.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/bids_validator/__main__.py b/src/bids_validator/__main__.py index 1048907..9a00a6c 100644 --- a/src/bids_validator/__main__.py +++ b/src/bids_validator/__main__.py @@ -12,13 +12,16 @@ from bids_validator import BIDSValidator from bids_validator.types.files import FileTree -from bids_validator.context import Dataset +from bids_validator.context import Dataset, Sessions, Context from bidsschematools.types import Namespace +from bidsschematools.types.context import Subject app = typer.Typer() +def is_subject_dir(tree): + return tree.name.startswith('sub-') -def walk(tree: FileTree): +def walk(tree: FileTree, dataset: Dataset, subject = None): """Iterate over children of a FileTree and check if they are a directory or file. If it's a directory then run again recursively, if it's a file file check the file name is @@ -30,11 +33,14 @@ def walk(tree: FileTree): FileTree object to iterate over """ + if subject is None and is_subject_dir(tree): + subject = Subject(Sessions(tree)) + for child in tree.children.values(): if child.is_dir: - yield from walk(child) + yield from walk(child, dataset, subject) else: - yield child + yield Context(child, dataset, subject) def validate(tree: FileTree, schema: Namespace): @@ -49,7 +55,7 @@ def validate(tree: FileTree, schema: Namespace): validator = BIDSValidator() dataset = Dataset(tree, schema) - for file in walk(tree): + for file in walk(tree, dataset): # The output of the FileTree.relative_path method always drops the initial for the path # which makes it fail the validator.is_bids check. THis may be a Windows specific thing. # This line adds it back. From d2d9975e2a0bc8748de71acb570ad47a8d64d95c Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 13:45:08 +0100 Subject: [PATCH 12/28] update tests --- tests/test_context.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/test_context.py b/tests/test_context.py index db749f1..7ec6dcd 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -1,5 +1,6 @@ import pytest +from bidsschematools.types.context import Subject from bids_validator import context from bids_validator.types.files import FileTree @@ -53,13 +54,14 @@ def test_walkback(synthetic_dataset, schema): assert len(sidecars) == 1 assert sidecars[0] is synthetic_dataset / 'task-nback_bold.json' -def test_context(examples, schema): +def test_context(synthetic_dataset, schema): - tree = FileTree.read_from_filesystem(examples / 'synthetic') - ds = context.Dataset(tree, schema) - T1w = tree / 'sub-01' / 'ses-01' / 'anat' / 'sub-01_ses-01_T1w.nii' - - file_context = context.Context(T1w, ds) + sub01 = synthetic_dataset / 'sub-01' + T1w = sub01 / 'ses-01' / 'anat' / 'sub-01_ses-01_T1w.nii' + + subject = Subject(context.Sessions(sub01)) + ds = context.Dataset(synthetic_dataset, schema) + file_context = context.Context(T1w, ds, subject) assert file_context.schema is schema assert file_context.dataset is ds @@ -84,9 +86,8 @@ def test_context(examples, schema): # ome # tiff -def test_sessions(examples): - tree = FileTree.read_from_filesystem(examples / 'synthetic') - sub01 = tree / 'sub-01' +def test_sessions(synthetic_dataset): + sub01 = synthetic_dataset / 'sub-01' sessions = context.Sessions(sub01) From 441dd49ab3fe9c2f1e624cb6ba8dadc21a829dfb Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 13:50:15 +0100 Subject: [PATCH 13/28] tidy for ruff --- src/bids_validator/__main__.py | 17 +++++++++++++---- src/bids_validator/context.py | 1 + 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/bids_validator/__main__.py b/src/bids_validator/__main__.py index 9a00a6c..9ce4948 100644 --- a/src/bids_validator/__main__.py +++ b/src/bids_validator/__main__.py @@ -10,18 +10,21 @@ import sys from typing import Annotated -from bids_validator import BIDSValidator -from bids_validator.types.files import FileTree -from bids_validator.context import Dataset, Sessions, Context from bidsschematools.types import Namespace from bidsschematools.types.context import Subject +from bids_validator import BIDSValidator +from bids_validator.context import Context, Dataset, Sessions +from bids_validator.types.files import FileTree + app = typer.Typer() + def is_subject_dir(tree): return tree.name.startswith('sub-') -def walk(tree: FileTree, dataset: Dataset, subject = None): + +def walk(tree: FileTree, dataset: Dataset, subject: Subject = None): """Iterate over children of a FileTree and check if they are a directory or file. If it's a directory then run again recursively, if it's a file file check the file name is @@ -31,6 +34,10 @@ def walk(tree: FileTree, dataset: Dataset, subject = None): ---------- tree : FileTree FileTree object to iterate over + dataset: Dataset + Object containing properties for entire dataset + subject: Subject + object containing subject and session info """ if subject is None and is_subject_dir(tree): @@ -50,6 +57,8 @@ def validate(tree: FileTree, schema: Namespace): ---------- tree : FileTree Full FileTree object to iterate over and check + schema : Namespace + Schema object to validate dataset against """ validator = BIDSValidator() diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index b7cdb56..2307dd1 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -397,6 +397,7 @@ def sidecar(self) -> None: """Sidecar metadata constructed via the inheritance principle.""" pass + class Sessions: """Collections of sessions in subject.""" From fa203d1731db459435e370e678edc1fc0638b0c8 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 13:59:52 +0100 Subject: [PATCH 14/28] add schema_path cli option --- src/bids_validator/__main__.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/bids_validator/__main__.py b/src/bids_validator/__main__.py index 9ce4948..a62e5f2 100644 --- a/src/bids_validator/__main__.py +++ b/src/bids_validator/__main__.py @@ -10,6 +10,7 @@ import sys from typing import Annotated +from bidsschematools.schema import load_schema from bidsschematools.types import Namespace from bidsschematools.types.context import Subject @@ -103,6 +104,7 @@ def version_callback(value: bool): @app.command() def main( bids_path: str, + schema_path: str = None, verbose: Annotated[bool, typer.Option('--verbose', '-v', help='Show verbose output')] = False, version: Annotated[ bool, @@ -119,7 +121,10 @@ def main( root_path = FileTree.read_from_filesystem(bids_path) - validate(root_path) + if not schema_path: + schema = load_schema() + + validate(root_path, schema) if __name__ == '__main__': From 3aa3fec5aa00a90e018c0919d534b2f54f061231 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 15:12:07 +0100 Subject: [PATCH 15/28] Update validate function walk now returns context object which needs to be handled differently --- src/bids_validator/__main__.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/bids_validator/__main__.py b/src/bids_validator/__main__.py index a62e5f2..62420cb 100644 --- a/src/bids_validator/__main__.py +++ b/src/bids_validator/__main__.py @@ -66,13 +66,8 @@ def validate(tree: FileTree, schema: Namespace): dataset = Dataset(tree, schema) for file in walk(tree, dataset): - # The output of the FileTree.relative_path method always drops the initial for the path - # which makes it fail the validator.is_bids check. THis may be a Windows specific thing. - # This line adds it back. - path = f'/{file.relative_path}' - - if not validator.is_bids(path): - print(f'{path} is not a valid bids filename') + if not validator.is_bids(file.path): + print(f'{file.path} is not a valid bids filename') def show_version(): From fbbe129114619a16e14165a6c1ea5d6a2e14dad5 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 15:22:21 +0100 Subject: [PATCH 16/28] feedback changes --- src/bids_validator/__main__.py | 3 ++- src/bids_validator/context.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/bids_validator/__main__.py b/src/bids_validator/__main__.py index 62420cb..2c2d375 100644 --- a/src/bids_validator/__main__.py +++ b/src/bids_validator/__main__.py @@ -8,6 +8,7 @@ raise SystemExit(1) from None import sys +from collections.abc import Iterator from typing import Annotated from bidsschematools.schema import load_schema @@ -25,7 +26,7 @@ def is_subject_dir(tree): return tree.name.startswith('sub-') -def walk(tree: FileTree, dataset: Dataset, subject: Subject = None): +def walk(tree: FileTree, dataset: Dataset, subject: Subject = None) -> Iterator[Context]: """Iterate over children of a FileTree and check if they are a directory or file. If it's a directory then run again recursively, if it's a file file check the file name is diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 2307dd1..3044ce1 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -311,7 +311,7 @@ class Context: file: FileTree dataset: Dataset - subject: ctx.Subject + subject: ctx.Subject | None file_parts: FileParts = attrs.field(init=False) def __attrs_post_init__(self): @@ -350,7 +350,7 @@ def extension(self) -> str | None: @property def modality(self) -> str | None: """Modality of current file, for examples, MRI.""" - return datatype_to_modality(self.datatype, self.schema) + return datatype_to_modality(self.file_parts.datatype, self.schema) @property def size(self) -> int: From fdd44083be45cfb4dfc786ce6f0f09adcf00464b Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 17:51:21 +0100 Subject: [PATCH 17/28] added tests for sidecar property --- tests/test_context.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/test_context.py b/tests/test_context.py index 7ec6dcd..029013d 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -75,9 +75,9 @@ def test_context(synthetic_dataset, schema): assert isinstance(file_context.subject.sessions, context.Sessions) assert sorted(file_context.subject.sessions.ses_dirs) == ["ses-01", "ses-02"] assert sorted(file_context.subject.sessions.session_id) == ["ses-01", "ses-02"] + assert file_context.sidecar is None ## Tests for: - # sidecar # associations # columns # json @@ -86,6 +86,18 @@ def test_context(synthetic_dataset, schema): # ome # tiff +def test_sidecar_inheritance(examples): + """Test to ensure inheritance principle is executed correctly""" + dataset = FileTree.read_from_filesystem(examples / 'qmri_mp2rage') + file = dataset / "sub-1" / "anat"/"sub-1_inv-2_part-mag_MP2RAGE.nii" + + sidecar = context.load_sidecar(file) + + assert sidecar["FlipAngle"] == 7 + assert sidecar["InversionTime"] == 2.7 + assert sidecar["RepetitionTimePreparation"] == 5.5 + + def test_sessions(synthetic_dataset): sub01 = synthetic_dataset / 'sub-01' From ecc0f37f6749693d305415f12a4e84ae620cb3c9 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 17:52:45 +0100 Subject: [PATCH 18/28] add sidecar method to context class --- src/bids_validator/context.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 3044ce1..0834314 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -207,6 +207,12 @@ def load_sidecar(file: FileTree) -> dict[str, t.Any]: # Uses walk back algorithm # https://bids-validator.readthedocs.io/en/latest/validation-model/inheritance-principle.html # Accumulates all sidecars + metadata = {} + + for json in walk_back(file, inherit=True): + metadata = load_json(json) | metadata + + return metadata def walk_back( @@ -393,9 +399,14 @@ def tiff(self) -> None: pass @property - def sidecar(self) -> None: + def sidecar(self) -> Namespace | None: """Sidecar metadata constructed via the inheritance principle.""" - pass + sidecar = load_sidecar(self.file) + + if sidecar: + return Namespace.build(sidecar) + + return None class Sessions: From 0343be16405bd8ff732ea9c098d961a76443ee4f Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 17:53:25 +0100 Subject: [PATCH 19/28] use orjson over built-in json package --- pyproject.toml | 1 + src/bids_validator/context.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f45a25c..0ca2857 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,6 +24,7 @@ requires-python = ">=3.9" dependencies = [ "attrs >=24.1", "bidsschematools @ git+https://github.com/bids-standard/bids-specification.git@refs/pull/2133/head#subdirectory=tools/schemacode", + "orjson>=3.11.3", "universal_pathlib >=0.2", ] diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 0834314..4a7c8ed 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -3,10 +3,10 @@ from __future__ import annotations import itertools -import json from functools import cache import attrs +import orjson from bidsschematools.types import Namespace from bidsschematools.types import context as ctx from upath import UPath @@ -72,8 +72,7 @@ def load_tsv(file: FileTree, *, max_rows=0) -> Namespace: @cache def load_json(file: FileTree) -> dict[str]: """Load JSON file contents.""" - with open(file) as fobj: - return json.load(fobj) + return orjson.loads(UPath(file).read_bytes()) class Subjects: From 162a7c9193878ace5943503ee20b1c785d37a55f Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 18:36:03 +0100 Subject: [PATCH 20/28] refine tests for sidecar --- tests/test_context.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/tests/test_context.py b/tests/test_context.py index 029013d..661c6ca 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -58,24 +58,29 @@ def test_context(synthetic_dataset, schema): sub01 = synthetic_dataset / 'sub-01' T1w = sub01 / 'ses-01' / 'anat' / 'sub-01_ses-01_T1w.nii' + bold = sub01 / 'ses-01' / 'func' / 'sub-01_ses-01_task-nback_run-01_bold.nii' subject = Subject(context.Sessions(sub01)) ds = context.Dataset(synthetic_dataset, schema) - file_context = context.Context(T1w, ds, subject) - - assert file_context.schema is schema - assert file_context.dataset is ds - assert file_context.entities == {'sub': '01', 'ses': '01'} - assert file_context.path == '/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii' - assert file_context.datatype == 'anat' - assert file_context.suffix == 'T1w' - assert file_context.extension == '.nii' - assert file_context.modality == 'mri' - assert file_context.size == 352 - assert isinstance(file_context.subject.sessions, context.Sessions) - assert sorted(file_context.subject.sessions.ses_dirs) == ["ses-01", "ses-02"] - assert sorted(file_context.subject.sessions.session_id) == ["ses-01", "ses-02"] - assert file_context.sidecar is None + T1w_context = context.Context(T1w, ds, subject) + + assert T1w_context.schema is schema + assert T1w_context.dataset is ds + assert T1w_context.entities == {'sub': '01', 'ses': '01'} + assert T1w_context.path == '/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii' + assert T1w_context.datatype == 'anat' + assert T1w_context.suffix == 'T1w' + assert T1w_context.extension == '.nii' + assert T1w_context.modality == 'mri' + assert T1w_context.size == 352 + assert isinstance(T1w_context.subject.sessions, context.Sessions) + assert sorted(T1w_context.subject.sessions.ses_dirs) == ["ses-01", "ses-02"] + assert sorted(T1w_context.subject.sessions.session_id) == ["ses-01", "ses-02"] + assert T1w_context.sidecar is None + + bold_context = context.Context(bold, ds, subject) + + assert bold_context.sidecar.to_dict() == {'TaskName': 'N-Back', 'RepetitionTime': 2.5} ## Tests for: # associations From fe42bb714516d58f43149372f28441e89dce0163 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 18:39:31 +0100 Subject: [PATCH 21/28] add test for json property --- tests/test_context.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/test_context.py b/tests/test_context.py index 661c6ca..2384429 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -59,7 +59,7 @@ def test_context(synthetic_dataset, schema): sub01 = synthetic_dataset / 'sub-01' T1w = sub01 / 'ses-01' / 'anat' / 'sub-01_ses-01_T1w.nii' bold = sub01 / 'ses-01' / 'func' / 'sub-01_ses-01_task-nback_run-01_bold.nii' - + subject = Subject(context.Sessions(sub01)) ds = context.Dataset(synthetic_dataset, schema) T1w_context = context.Context(T1w, ds, subject) @@ -77,10 +77,12 @@ def test_context(synthetic_dataset, schema): assert sorted(T1w_context.subject.sessions.ses_dirs) == ["ses-01", "ses-02"] assert sorted(T1w_context.subject.sessions.session_id) == ["ses-01", "ses-02"] assert T1w_context.sidecar is None + assert T1w_context.json is None bold_context = context.Context(bold, ds, subject) assert bold_context.sidecar.to_dict() == {'TaskName': 'N-Back', 'RepetitionTime': 2.5} + assert bold_context.json is None ## Tests for: # associations @@ -91,6 +93,16 @@ def test_context(synthetic_dataset, schema): # ome # tiff +def test_context_json(examples, schema): + + dataset = FileTree.read_from_filesystem(examples / 'qmri_vfa') + file = dataset / 'sub-01' / 'anat'/'sub-01_flip-1_VFA.json' + + ds = context.Dataset(dataset, schema) + file_context = context.Context(file, ds, subject=None) + + assert file_context.json.to_dict() == {'FlipAngle': 3, 'RepetitionTimeExcitation': 0.0150} + def test_sidecar_inheritance(examples): """Test to ensure inheritance principle is executed correctly""" dataset = FileTree.read_from_filesystem(examples / 'qmri_mp2rage') From adb4b754e8cfc19db25e502c7aab30c1b20e31ce Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 18:40:20 +0100 Subject: [PATCH 22/28] ruffed test_context --- tests/test_context.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/test_context.py b/tests/test_context.py index 2384429..6866c59 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -1,6 +1,6 @@ import pytest - from bidsschematools.types.context import Subject + from bids_validator import context from bids_validator.types.files import FileTree @@ -54,8 +54,8 @@ def test_walkback(synthetic_dataset, schema): assert len(sidecars) == 1 assert sidecars[0] is synthetic_dataset / 'task-nback_bold.json' -def test_context(synthetic_dataset, schema): +def test_context(synthetic_dataset, schema): sub01 = synthetic_dataset / 'sub-01' T1w = sub01 / 'ses-01' / 'anat' / 'sub-01_ses-01_T1w.nii' bold = sub01 / 'ses-01' / 'func' / 'sub-01_ses-01_task-nback_run-01_bold.nii' @@ -74,8 +74,8 @@ def test_context(synthetic_dataset, schema): assert T1w_context.modality == 'mri' assert T1w_context.size == 352 assert isinstance(T1w_context.subject.sessions, context.Sessions) - assert sorted(T1w_context.subject.sessions.ses_dirs) == ["ses-01", "ses-02"] - assert sorted(T1w_context.subject.sessions.session_id) == ["ses-01", "ses-02"] + assert sorted(T1w_context.subject.sessions.ses_dirs) == ['ses-01', 'ses-02'] + assert sorted(T1w_context.subject.sessions.session_id) == ['ses-01', 'ses-02'] assert T1w_context.sidecar is None assert T1w_context.json is None @@ -87,32 +87,32 @@ def test_context(synthetic_dataset, schema): ## Tests for: # associations # columns - # json # gzip # nifti_header # ome # tiff -def test_context_json(examples, schema): +def test_context_json(examples, schema): dataset = FileTree.read_from_filesystem(examples / 'qmri_vfa') - file = dataset / 'sub-01' / 'anat'/'sub-01_flip-1_VFA.json' + file = dataset / 'sub-01' / 'anat' / 'sub-01_flip-1_VFA.json' ds = context.Dataset(dataset, schema) file_context = context.Context(file, ds, subject=None) assert file_context.json.to_dict() == {'FlipAngle': 3, 'RepetitionTimeExcitation': 0.0150} + def test_sidecar_inheritance(examples): """Test to ensure inheritance principle is executed correctly""" dataset = FileTree.read_from_filesystem(examples / 'qmri_mp2rage') - file = dataset / "sub-1" / "anat"/"sub-1_inv-2_part-mag_MP2RAGE.nii" + file = dataset / 'sub-1' / 'anat' / 'sub-1_inv-2_part-mag_MP2RAGE.nii' sidecar = context.load_sidecar(file) - assert sidecar["FlipAngle"] == 7 - assert sidecar["InversionTime"] == 2.7 - assert sidecar["RepetitionTimePreparation"] == 5.5 + assert sidecar['FlipAngle'] == 7 + assert sidecar['InversionTime'] == 2.7 + assert sidecar['RepetitionTimePreparation'] == 5.5 def test_sessions(synthetic_dataset): @@ -120,5 +120,5 @@ def test_sessions(synthetic_dataset): sessions = context.Sessions(sub01) - assert sorted(sessions.ses_dirs) == ["ses-01", "ses-02"] - assert sorted(sessions.session_id) == ["ses-01", "ses-02"] + assert sorted(sessions.ses_dirs) == ['ses-01', 'ses-02'] + assert sorted(sessions.session_id) == ['ses-01', 'ses-02'] From 85f7733d7d9cbc360fa041032a42e5281fc34386 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Wed, 24 Sep 2025 18:41:11 +0100 Subject: [PATCH 23/28] add json property to context class --- src/bids_validator/context.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 4a7c8ed..183c029 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -373,9 +373,12 @@ def columns(self) -> None: pass @property - def json(self) -> None: + def json(self) -> Namespace | None: """Contents of the current JSON file.""" - pass + if self.file_parts.extension == '.json': + return Namespace.build(load_json(self.file)) + + return None @property def gzip(self) -> None: From 6a90acc23c3bf18eb15258b9641b7bb433db1189 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Mon, 29 Sep 2025 16:56:57 +0100 Subject: [PATCH 24/28] feedback changes --- src/bids_validator/__main__.py | 5 ++--- src/bids_validator/context.py | 7 ++----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/bids_validator/__main__.py b/src/bids_validator/__main__.py index 2c2d375..e9ffd88 100644 --- a/src/bids_validator/__main__.py +++ b/src/bids_validator/__main__.py @@ -100,7 +100,7 @@ def version_callback(value: bool): @app.command() def main( bids_path: str, - schema_path: str = None, + schema_path: str | None = None, verbose: Annotated[bool, typer.Option('--verbose', '-v', help='Show verbose output')] = False, version: Annotated[ bool, @@ -117,8 +117,7 @@ def main( root_path = FileTree.read_from_filesystem(bids_path) - if not schema_path: - schema = load_schema() + schema = load_schema(schema_path) validate(root_path, schema) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 183c029..3767fad 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -403,12 +403,9 @@ def tiff(self) -> None: @property def sidecar(self) -> Namespace | None: """Sidecar metadata constructed via the inheritance principle.""" - sidecar = load_sidecar(self.file) + sidecar = load_sidecar(self.file) or {} - if sidecar: - return Namespace.build(sidecar) - - return None + return Namespace.build(sidecar) class Sessions: From 37df90aee30c6c7560f614e47eade24329f37f33 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Mon, 29 Sep 2025 17:02:30 +0100 Subject: [PATCH 25/28] update test for context.sidecar --- tests/test_context.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_context.py b/tests/test_context.py index 6866c59..7fdbf75 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -76,7 +76,7 @@ def test_context(synthetic_dataset, schema): assert isinstance(T1w_context.subject.sessions, context.Sessions) assert sorted(T1w_context.subject.sessions.ses_dirs) == ['ses-01', 'ses-02'] assert sorted(T1w_context.subject.sessions.session_id) == ['ses-01', 'ses-02'] - assert T1w_context.sidecar is None + assert T1w_context.sidecar == {} assert T1w_context.json is None bold_context = context.Context(bold, ds, subject) From 1360680623d1a2ead48d0eb6f8b8fd67b70ace54 Mon Sep 17 00:00:00 2001 From: Dan Brady Date: Mon, 29 Sep 2025 17:07:01 +0100 Subject: [PATCH 26/28] update typing for main --- src/bids_validator/__main__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/bids_validator/__main__.py b/src/bids_validator/__main__.py index e9ffd88..f2bd949 100644 --- a/src/bids_validator/__main__.py +++ b/src/bids_validator/__main__.py @@ -9,7 +9,7 @@ import sys from collections.abc import Iterator -from typing import Annotated +from typing import Annotated, Optional from bidsschematools.schema import load_schema from bidsschematools.types import Namespace @@ -100,7 +100,7 @@ def version_callback(value: bool): @app.command() def main( bids_path: str, - schema_path: str | None = None, + schema_path: Optional[str] = None, verbose: Annotated[bool, typer.Option('--verbose', '-v', help='Show verbose output')] = False, version: Annotated[ bool, From e68bc1d12fe4cd659a4abaa61220ac4c392f33c0 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Mon, 29 Sep 2025 16:50:36 -0400 Subject: [PATCH 27/28] rf: Replace direntry with UPath in FileTree --- src/bids_validator/context.py | 6 +- src/bids_validator/types/files.py | 110 ++++++++---------------------- 2 files changed, 30 insertions(+), 86 deletions(-) diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py index 3767fad..9baa58f 100644 --- a/src/bids_validator/context.py +++ b/src/bids_validator/context.py @@ -61,7 +61,7 @@ def datatype_to_modality(datatype: str, schema: Namespace) -> str: @cache def load_tsv(file: FileTree, *, max_rows=0) -> Namespace: """Load TSV contents into a Namespace.""" - with open(file) as fobj: + with file.path_obj.open() as fobj: if max_rows > 0: fobj = itertools.islice(fobj, max_rows) contents = (line.rstrip('\r\n').split('\t') for line in fobj) @@ -72,7 +72,7 @@ def load_tsv(file: FileTree, *, max_rows=0) -> Namespace: @cache def load_json(file: FileTree) -> dict[str]: """Load JSON file contents.""" - return orjson.loads(UPath(file).read_bytes()) + return orjson.loads(file.path_obj.read_bytes()) class Subjects: @@ -360,7 +360,7 @@ def modality(self) -> str | None: @property def size(self) -> int: """Length of the current file in bytes.""" - return self.file.direntry.stat().st_size + return self.file.path_obj.stat().st_size @property def associations(self) -> ctx.Associations: diff --git a/src/bids_validator/types/files.py b/src/bids_validator/types/files.py index a3a6f62..40db627 100644 --- a/src/bids_validator/types/files.py +++ b/src/bids_validator/types/files.py @@ -4,103 +4,50 @@ import os import posixpath -import stat from functools import cached_property from pathlib import Path import attrs +from upath import UPath from . import _typings as t __all__ = ('FileTree',) -@attrs.define -class UserDirEntry: - """Partial reimplementation of :class:`os.DirEntry`. - - :class:`os.DirEntry` can't be instantiated from Python, but this can. - """ - - path: str = attrs.field(repr=False, converter=os.fspath) - name: str = attrs.field(init=False) - _stat: os.stat_result = attrs.field(init=False, repr=False, default=None) - _lstat: os.stat_result = attrs.field(init=False, repr=False, default=None) - - def __attrs_post_init__(self) -> None: - self.name = os.path.basename(self.path) - - def __fspath__(self) -> str: - return self.path - - def stat(self, *, follow_symlinks: bool = True) -> os.stat_result: - """Return stat_result object for the entry; cached per entry.""" - if follow_symlinks: - if self._stat is None: - self._stat = os.stat(self.path, follow_symlinks=True) - return self._stat - else: - if self._lstat is None: - self._lstat = os.stat(self.path, follow_symlinks=False) - return self._lstat - - def is_dir(self, *, follow_symlinks: bool = True) -> bool: - """Return True if the entry is a directory; cached per entry.""" - _stat = self.stat(follow_symlinks=follow_symlinks) - return stat.S_ISDIR(_stat.st_mode) - - def is_file(self, *, follow_symlinks: bool = True) -> bool: - """Return True if the entry is a file; cached per entry.""" - _stat = self.stat(follow_symlinks=follow_symlinks) - return stat.S_ISREG(_stat.st_mode) - - def is_symlink(self) -> bool: - """Return True if the entry is a symlink; cached per entry.""" - _stat = self.stat(follow_symlinks=False) - return stat.S_ISLNK(_stat.st_mode) - - -def as_direntry(obj: os.PathLike) -> os.DirEntry | UserDirEntry: - """Convert PathLike into DirEntry-like object.""" - if isinstance(obj, os.DirEntry): - return obj - return UserDirEntry(obj) - - -@attrs.define +@attrs.define(frozen=True) class FileTree: """Represent a FileTree with cached metadata.""" - direntry: os.DirEntry | UserDirEntry = attrs.field(repr=False, converter=as_direntry) - parent: FileTree | None = attrs.field(repr=False, default=None) - is_dir: bool = attrs.field(default=False) - children: dict[str, FileTree] = attrs.field(repr=False, factory=dict) - name: str = attrs.field(init=False) + path_obj: UPath = attrs.field(repr=False, converter=UPath) + is_dir: bool = attrs.field(repr=False, default=None) + parent: FileTree | None = attrs.field(repr=False, default=None, eq=False) + children: dict[str, FileTree] = attrs.field(repr=False, factory=dict, eq=False) def __attrs_post_init__(self): - self.name = self.direntry.name - self.children = { - name: attrs.evolve(child, parent=self) for name, child in self.children.items() - } + if self.is_dir is None: + object.__setattr__(self, 'is_dir', self.path_obj.is_dir()) + object.__setattr__( + self, + 'children', + {name: attrs.evolve(child, parent=self) for name, child in self.children.items()}, + ) @classmethod - def read_from_filesystem( - cls, - direntry: os.PathLike, - parent: FileTree | None = None, - ) -> t.Self: - """Read a FileTree from the filesystem. - - Uses :func:`os.scandir` to walk the directory tree. - """ - self = cls(direntry, parent=parent) - if self.direntry.is_dir(): - self.is_dir = True - self.children = { - entry.name: FileTree.read_from_filesystem(entry, parent=self) - for entry in os.scandir(self.direntry) + def read_from_filesystem(cls, path_obj: os.PathLike) -> t.Self: + """Read a FileTree from the filesystem.""" + path_obj = UPath(path_obj) + children = {} + if is_dir := path_obj.is_dir(): + children = { + entry.name: FileTree.read_from_filesystem(entry) for entry in path_obj.iterdir() } - return self + return cls(path_obj, is_dir=is_dir, children=children) + + @property + def name(self) -> bool: + """The name of the current FileTree node.""" + return self.path_obj.name def __contains__(self, relpath: os.PathLike) -> bool: parts = Path(relpath).parts @@ -110,10 +57,7 @@ def __contains__(self, relpath: os.PathLike) -> bool: return child and (len(parts) == 1 or posixpath.join(*parts[1:]) in child) def __fspath__(self): - return self.direntry.path - - def __hash__(self): - return hash(self.direntry.path) + return self.path_obj.__fspath__() def __truediv__(self, relpath: str | os.PathLike) -> t.Self: parts = Path(relpath).parts From 0d31404e5706b07c8d8c4c8e0952c8524cfa2495 Mon Sep 17 00:00:00 2001 From: "Christopher J. Markiewicz" Date: Mon, 29 Sep 2025 21:14:24 -0400 Subject: [PATCH 28/28] test: Generate dataset in-memory, test sidecar overrides --- tests/test_context.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/test_context.py b/tests/test_context.py index 7fdbf75..6dcb040 100644 --- a/tests/test_context.py +++ b/tests/test_context.py @@ -1,3 +1,6 @@ +import json + +import fsspec import pytest from bidsschematools.types.context import Subject @@ -10,6 +13,14 @@ def synthetic_dataset(examples): return FileTree.read_from_filesystem(examples / 'synthetic') +@pytest.fixture +def memfs(): + mem = fsspec.filesystem('memory') + mem.store.clear() + yield mem + mem.store.clear() + + def test_load(synthetic_dataset, schema): ds = context.Dataset(synthetic_dataset, schema) @@ -115,6 +126,33 @@ def test_sidecar_inheritance(examples): assert sidecar['RepetitionTimePreparation'] == 5.5 +def test_sidecar_order(memfs): + """Test to ensure inheritance principle is skipped when inherit=False""" + root_json = {'rootOverwriteA': 'root', 'rootOverwriteB': 'root', 'rootValue': 'root'} + subject_json = {'rootOverwriteA': 'subject', 'subOverwrite': 'subject', 'subValue': 'subject'} + anat_json = {'rootOverwriteB': 'anat', 'subOverwrite': 'anat', 'anatValue': 'anat'} + memfs.pipe( + { + '/T1w.json': json.dumps(root_json).encode(), + '/sub-01/sub-01_T1w.json': json.dumps(subject_json).encode(), + '/sub-01/anat/sub-01_T1w.json': json.dumps(anat_json).encode(), + '/sub-01/anat/sub-01_T1w.nii': b'', + } + ) + + dataset = FileTree.read_from_filesystem('memory://') + file = dataset / 'sub-01' / 'anat' / 'sub-01_T1w.nii' + sidecar = context.load_sidecar(file) + assert sidecar == { + 'rootValue': 'root', + 'subValue': 'subject', + 'rootOverwriteA': 'subject', + 'anatValue': 'anat', + 'rootOverwriteB': 'anat', + 'subOverwrite': 'anat', + } + + def test_sessions(synthetic_dataset): sub01 = synthetic_dataset / 'sub-01'