bids-standard · effigies · Oct 13, 2025 · Sep 5, 2025 · Sep 5, 2025 · Sep 8, 2025
diff --git a/src/bids_validator/__main__.py b/src/bids_validator/__main__.py
@@ -10,13 +10,22 @@
 import sys
 from typing import Annotated
 
+from bidsschematools.schema import load_schema
+from bidsschematools.types import Namespace
+from bidsschematools.types.context import Subject
+
 from bids_validator import BIDSValidator
+from bids_validator.context import Context, Dataset, Sessions
 from bids_validator.types.files import FileTree
 
 app = typer.Typer()
 
 
-def walk(tree: FileTree):
+def is_subject_dir(tree):
+    return tree.name.startswith('sub-')
+
+
+def walk(tree: FileTree, dataset: Dataset, subject: Subject = None):
     """Iterate over children of a FileTree and check if they are a directory or file.
 
     If it's a directory then run again recursively, if it's a file file check the file name is
@@ -26,27 +35,37 @@ def walk(tree: FileTree):
     ----------
     tree : FileTree
         FileTree object to iterate over
+    dataset: Dataset
+        Object containing properties for entire dataset
+    subject: Subject
+        object containing subject and session info
 
     """
+    if subject is None and is_subject_dir(tree):
+        subject = Subject(Sessions(tree))
+
     for child in tree.children.values():
         if child.is_dir:
-            yield from walk(child)
+            yield from walk(child, dataset, subject)
         else:
-            yield child
+            yield Context(child, dataset, subject)
 
 
-def validate(tree: FileTree):
+def validate(tree: FileTree, schema: Namespace):
     """Check if the file path is BIDS compliant.
 
     Parameters
     ----------
     tree : FileTree
         Full FileTree object to iterate over and check
+    schema : Namespace
+        Schema object to validate dataset against
 
     """
     validator = BIDSValidator()
+    dataset = Dataset(tree, schema)
 
-    for file in walk(tree):
+    for file in walk(tree, dataset):
         # The output of the FileTree.relative_path method always drops the initial for the path
         # which makes it fail the validator.is_bids check. THis may be a Windows specific thing.
         # This line adds it back.
@@ -85,6 +104,7 @@ def version_callback(value: bool):
 @app.command()
 def main(
     bids_path: str,
+    schema_path: str = None,
     verbose: Annotated[bool, typer.Option('--verbose', '-v', help='Show verbose output')] = False,
     version: Annotated[
         bool,
@@ -101,7 +121,10 @@ def main(
 
     root_path = FileTree.read_from_filesystem(bids_path)
 
-    validate(root_path)
+    if not schema_path:
+        schema = load_schema()
+
+    validate(root_path, schema)
 
 
 if __name__ == '__main__':

diff --git a/src/bids_validator/context.py b/src/bids_validator/context.py
@@ -46,6 +46,18 @@ class ValidationError(Exception):
     """TODO: Add issue structure."""
 
 
+_DATATYPE_MAP = {}
+
+
+def datatype_to_modality(datatype: str, schema: Namespace) -> str:
+    """Generate a global map for datatype to modality."""
+    global _DATATYPE_MAP
+    if not _DATATYPE_MAP:
+        for mod_name, mod_dtypes in schema.rules.modalities.items():
+            _DATATYPE_MAP |= dict.fromkeys(mod_dtypes['datatypes'], mod_name)
+    return _DATATYPE_MAP[datatype]
+
+
 @cache
 def load_tsv(file: FileTree, *, max_rows=0) -> Namespace:
     """Load TSV contents into a Namespace."""
@@ -130,14 +142,7 @@ def dataset_description(self) -> Namespace:
     @cached_property
     def modalities(self) -> list[str]:
         """List of modalities found in the dataset."""
-        result = set()
-
-        modalities = self.schema.rules.modalities
-        for datatype in self.datatypes:
-            for mod_name, mod_dtypes in modalities.items():
-                if datatype in mod_dtypes.datatypes:
-                    result.add(mod_name)
-
+        result = {datatype_to_modality(datatype, self.schema) for datatype in self.datatypes}
         return list(result)
 
     @cached_property
@@ -298,3 +303,128 @@ def from_file(cls, file: FileTree, schema: Namespace | None = None) -> t.Self:
             suffix=suffix,
             extension=extension,
         )
+
+
+@attrs.define
+class Context:
+    """A context object that creates context for file on access."""
+
+    file: FileTree
+    dataset: Dataset
+    subject: ctx.Subject
+    file_parts: FileParts = attrs.field(init=False)
+
+    def __attrs_post_init__(self):
+        self.file_parts = FileParts.from_file(self.file, self.schema)
+
+    @property
+    def schema(self) -> Namespace:
+        """The BIDS specification schema."""
+        return self.dataset.schema
+
+    @property
+    def path(self) -> str:
+        """Path of the current file."""
+        return self.file_parts.path
+
+    @property
+    def entities(self) -> dict[str, str] | None:
+        """Entities parsed from the current filename."""
+        return self.file_parts.entities
+
+    @property
+    def datatype(self) -> str | None:
+        """Datatype of current file, for examples, anat."""
+        return self.file_parts.datatype
+
+    @property
+    def suffix(self) -> str | None:
+        """Suffix of current file."""
+        return self.file_parts.suffix
+
+    @property
+    def extension(self) -> str | None:
+        """Extension of current file including initial dot."""
+        return self.file_parts.extension
+
+    @property
+    def modality(self) -> str | None:
+        """Modality of current file, for examples, MRI."""
+        return datatype_to_modality(self.datatype, self.schema)
+
+    @property
+    def size(self) -> int:
+        """Length of the current file in bytes."""
+        return self.file.direntry.stat().st_size
+
+    @property
+    def associations(self) -> ctx.Associations:
+        """Associated files, indexed by suffix, selected according to the inheritance principle."""
+        return ctx.Associations()
+
+    @property
+    def columns(self) -> None:
+        """TSV columns, indexed by column header, values are arrays with column contents."""
+        pass
+
+    @property
+    def json(self) -> None:
+        """Contents of the current JSON file."""
+        pass
+
+    @property
+    def gzip(self) -> None:
+        """Parsed contents of gzip header."""
+        pass
+
+    @property
+    def nifti_header(self) -> None:
+        """Parsed contents of NIfTI header referenced elsewhere in schema."""
+        pass
+
+    @property
+    def ome(self) -> None:
+        """Parsed contents of OME-XML header, which may be found in OME-TIFF or OME-ZARR files."""
+        pass
+
+    @property
+    def tiff(self) -> None:
+        """TIFF file format metadata."""
+        pass
+
+    @property
+    def sidecar(self) -> None:
+        """Sidecar metadata constructed via the inheritance principle."""
+        pass
+
+
+class Sessions:
+    """Collections of sessions in subject."""
+
+    def __init__(self, tree: FileTree):
+        self._tree = tree
+
+    @cached_property
+    def ses_dirs(self) -> list[str]:
+        """Sessions as determined by ses-* directories."""
+        return [
+            child.name
+            for child in self._tree.children.values()
+            if child.is_dir and child.name.startswith('ses-')
+        ]
+
+    @property
+    def session_id(self) -> list[str] | None:
+        """The session_id column of *_sessions.tsv."""
+        for name, value in self._tree.children.items():
+            if name.endswith('_sessions.tsv'):
+                return self._get_session_id(value)
+        else:
+            return None
+
+    @staticmethod
+    def _get_session_id(phenotype_file: FileTree) -> list[str] | None:
+        columns = load_tsv(phenotype_file)
+        if 'session_id' not in columns:
+            return None
+        return list(columns['session_id'])
diff --git a/tests/test_context.py b/tests/test_context.py
@@ -1,5 +1,6 @@
 import pytest
 
+from bidsschematools.types.context import Subject
 from bids_validator import context
 from bids_validator.types.files import FileTree
 
@@ -52,3 +53,43 @@ def test_walkback(synthetic_dataset, schema):
     sidecars = list(context.walk_back(bold, inherit=True))
     assert len(sidecars) == 1
     assert sidecars[0] is synthetic_dataset / 'task-nback_bold.json'
+
+def test_context(synthetic_dataset, schema):
+
+    sub01 = synthetic_dataset / 'sub-01'
+    T1w = sub01 / 'ses-01' / 'anat' / 'sub-01_ses-01_T1w.nii'
+
+    subject = Subject(context.Sessions(sub01))
+    ds = context.Dataset(synthetic_dataset, schema)
+    file_context = context.Context(T1w, ds, subject)
+
+    assert file_context.schema is schema
+    assert file_context.dataset is ds
+    assert file_context.entities == {'sub': '01', 'ses': '01'}
+    assert file_context.path == '/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii'
+    assert file_context.datatype == 'anat'
+    assert file_context.suffix == 'T1w'
+    assert file_context.extension == '.nii'
+    assert file_context.modality == 'mri'
+    assert file_context.size == 352
+    assert isinstance(file_context.subject.sessions, context.Sessions)
+    assert sorted(file_context.subject.sessions.ses_dirs) == ["ses-01", "ses-02"]
+    assert sorted(file_context.subject.sessions.session_id) == ["ses-01", "ses-02"]
+
+    ## Tests for:
+    #  sidecar
+    #  associations
+    #  columns
+    #  json
+    #  gzip
+    #  nifti_header
+    #  ome
+    #  tiff
+
+def test_sessions(synthetic_dataset):
+    sub01 = synthetic_dataset / 'sub-01'
+
+    sessions = context.Sessions(sub01)
+
+    assert sorted(sessions.ses_dirs) == ["ses-01", "ses-02"]
+    assert sorted(sessions.session_id) == ["ses-01", "ses-02"]