Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
1a9dfdb
added intial tests for Context class
ubdbra001 Sep 5, 2025
2db6ba9
Context class WIP
ubdbra001 Sep 5, 2025
5cc92f5
PR feedback changes
ubdbra001 Sep 8, 2025
0d427f4
add missing docstrings
ubdbra001 Sep 8, 2025
72dbbeb
add and use global mapping for datatype to modality
ubdbra001 Sep 8, 2025
eeb692f
add more tests for next stage
ubdbra001 Sep 9, 2025
3d70d8f
Merge branch 'main' into i32-develop-context-class
ubdbra001 Sep 24, 2025
7200e99
remove sessions.phenotype test
ubdbra001 Sep 24, 2025
729bf51
add sessions class
ubdbra001 Sep 24, 2025
96dd2dc
add subject as an input param for Context class
ubdbra001 Sep 24, 2025
1513ff8
Update and validate function
ubdbra001 Sep 24, 2025
685b978
Update walk function
ubdbra001 Sep 24, 2025
d2d9975
update tests
ubdbra001 Sep 24, 2025
441dd49
tidy for ruff
ubdbra001 Sep 24, 2025
fa203d1
add schema_path cli option
ubdbra001 Sep 24, 2025
3aa3fec
Update validate function
ubdbra001 Sep 24, 2025
fbbe129
feedback changes
ubdbra001 Sep 24, 2025
fdd4408
added tests for sidecar property
ubdbra001 Sep 24, 2025
ecc0f37
add sidecar method to context class
ubdbra001 Sep 24, 2025
0343be1
use orjson over built-in json package
ubdbra001 Sep 24, 2025
162a7c9
refine tests for sidecar
ubdbra001 Sep 24, 2025
fe42bb7
add test for json property
ubdbra001 Sep 24, 2025
adb4b75
ruffed test_context
ubdbra001 Sep 24, 2025
85f7733
add json property to context class
ubdbra001 Sep 24, 2025
6a90acc
feedback changes
ubdbra001 Sep 29, 2025
37df90a
update test for context.sidecar
ubdbra001 Sep 29, 2025
1360680
update typing for main
ubdbra001 Sep 29, 2025
e68bc1d
rf: Replace direntry with UPath in FileTree
effigies Sep 29, 2025
0d31404
test: Generate dataset in-memory, test sidecar overrides
effigies Sep 30, 2025
2fc29ed
Merge pull request #1 from effigies/rf/upath
ubdbra001 Oct 8, 2025
85c250a
add load_tsv_gz function
ubdbra001 Sep 30, 2025
67e6751
add columns method for context class
ubdbra001 Sep 30, 2025
4974176
added tests for tsv and tsv.gz loaders
ubdbra001 Oct 13, 2025
7aeb04e
Commit PR feedback
ubdbra001 Oct 13, 2025
88ce2e5
add decode for gzip binary object
ubdbra001 Oct 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 29 additions & 6 deletions src/bids_validator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,22 @@
import sys
from typing import Annotated

from bidsschematools.schema import load_schema
from bidsschematools.types import Namespace
from bidsschematools.types.context import Subject

from bids_validator import BIDSValidator
from bids_validator.context import Context, Dataset, Sessions
from bids_validator.types.files import FileTree

app = typer.Typer()


def walk(tree: FileTree):
def is_subject_dir(tree):
return tree.name.startswith('sub-')


def walk(tree: FileTree, dataset: Dataset, subject: Subject = None):
"""Iterate over children of a FileTree and check if they are a directory or file.

If it's a directory then run again recursively, if it's a file file check the file name is
Expand All @@ -26,27 +35,37 @@ def walk(tree: FileTree):
----------
tree : FileTree
FileTree object to iterate over
dataset: Dataset
Object containing properties for entire dataset
subject: Subject
object containing subject and session info

"""
if subject is None and is_subject_dir(tree):
subject = Subject(Sessions(tree))

for child in tree.children.values():
if child.is_dir:
yield from walk(child)
yield from walk(child, dataset, subject)
else:
yield child
yield Context(child, dataset, subject)


def validate(tree: FileTree):
def validate(tree: FileTree, schema: Namespace):
"""Check if the file path is BIDS compliant.

Parameters
----------
tree : FileTree
Full FileTree object to iterate over and check
schema : Namespace
Schema object to validate dataset against

"""
validator = BIDSValidator()
dataset = Dataset(tree, schema)

for file in walk(tree):
for file in walk(tree, dataset):
# The output of the FileTree.relative_path method always drops the initial for the path
# which makes it fail the validator.is_bids check. THis may be a Windows specific thing.
# This line adds it back.
Expand Down Expand Up @@ -85,6 +104,7 @@ def version_callback(value: bool):
@app.command()
def main(
bids_path: str,
schema_path: str = None,
verbose: Annotated[bool, typer.Option('--verbose', '-v', help='Show verbose output')] = False,
version: Annotated[
bool,
Expand All @@ -101,7 +121,10 @@ def main(

root_path = FileTree.read_from_filesystem(bids_path)

validate(root_path)
if not schema_path:
schema = load_schema()

validate(root_path, schema)


if __name__ == '__main__':
Expand Down
146 changes: 138 additions & 8 deletions src/bids_validator/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ class ValidationError(Exception):
"""TODO: Add issue structure."""


_DATATYPE_MAP = {}


def datatype_to_modality(datatype: str, schema: Namespace) -> str:
"""Generate a global map for datatype to modality."""
global _DATATYPE_MAP
if not _DATATYPE_MAP:
for mod_name, mod_dtypes in schema.rules.modalities.items():
_DATATYPE_MAP |= dict.fromkeys(mod_dtypes['datatypes'], mod_name)
return _DATATYPE_MAP[datatype]


@cache
def load_tsv(file: FileTree, *, max_rows=0) -> Namespace:
"""Load TSV contents into a Namespace."""
Expand Down Expand Up @@ -130,14 +142,7 @@ def dataset_description(self) -> Namespace:
@cached_property
def modalities(self) -> list[str]:
"""List of modalities found in the dataset."""
result = set()

modalities = self.schema.rules.modalities
for datatype in self.datatypes:
for mod_name, mod_dtypes in modalities.items():
if datatype in mod_dtypes.datatypes:
result.add(mod_name)

result = {datatype_to_modality(datatype, self.schema) for datatype in self.datatypes}
return list(result)

@cached_property
Expand Down Expand Up @@ -298,3 +303,128 @@ def from_file(cls, file: FileTree, schema: Namespace | None = None) -> t.Self:
suffix=suffix,
extension=extension,
)


@attrs.define
class Context:
"""A context object that creates context for file on access."""

file: FileTree
dataset: Dataset
subject: ctx.Subject
file_parts: FileParts = attrs.field(init=False)

def __attrs_post_init__(self):
self.file_parts = FileParts.from_file(self.file, self.schema)

@property
def schema(self) -> Namespace:
"""The BIDS specification schema."""
return self.dataset.schema

@property
def path(self) -> str:
"""Path of the current file."""
return self.file_parts.path

@property
def entities(self) -> dict[str, str] | None:
"""Entities parsed from the current filename."""
return self.file_parts.entities

@property
def datatype(self) -> str | None:
"""Datatype of current file, for examples, anat."""
return self.file_parts.datatype

@property
def suffix(self) -> str | None:
"""Suffix of current file."""
return self.file_parts.suffix

@property
def extension(self) -> str | None:
"""Extension of current file including initial dot."""
return self.file_parts.extension

@property
def modality(self) -> str | None:
"""Modality of current file, for examples, MRI."""
return datatype_to_modality(self.datatype, self.schema)

@property
def size(self) -> int:
"""Length of the current file in bytes."""
return self.file.direntry.stat().st_size

@property
def associations(self) -> ctx.Associations:
"""Associated files, indexed by suffix, selected according to the inheritance principle."""
return ctx.Associations()

@property
def columns(self) -> None:
"""TSV columns, indexed by column header, values are arrays with column contents."""
pass

@property
def json(self) -> None:
"""Contents of the current JSON file."""
pass

@property
def gzip(self) -> None:
"""Parsed contents of gzip header."""
pass

@property
def nifti_header(self) -> None:
"""Parsed contents of NIfTI header referenced elsewhere in schema."""
pass

@property
def ome(self) -> None:
"""Parsed contents of OME-XML header, which may be found in OME-TIFF or OME-ZARR files."""
pass

@property
def tiff(self) -> None:
"""TIFF file format metadata."""
pass

@property
def sidecar(self) -> None:
"""Sidecar metadata constructed via the inheritance principle."""
pass


class Sessions:
"""Collections of sessions in subject."""

def __init__(self, tree: FileTree):
self._tree = tree

@cached_property
def ses_dirs(self) -> list[str]:
"""Sessions as determined by ses-* directories."""
return [
child.name
for child in self._tree.children.values()
if child.is_dir and child.name.startswith('ses-')
]

@property
def session_id(self) -> list[str] | None:
"""The session_id column of *_sessions.tsv."""
for name, value in self._tree.children.items():
if name.endswith('_sessions.tsv'):
return self._get_session_id(value)
else:
return None

@staticmethod
def _get_session_id(phenotype_file: FileTree) -> list[str] | None:
columns = load_tsv(phenotype_file)
if 'session_id' not in columns:
return None
return list(columns['session_id'])
41 changes: 41 additions & 0 deletions tests/test_context.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest

from bidsschematools.types.context import Subject
from bids_validator import context
from bids_validator.types.files import FileTree

Expand Down Expand Up @@ -52,3 +53,43 @@ def test_walkback(synthetic_dataset, schema):
sidecars = list(context.walk_back(bold, inherit=True))
assert len(sidecars) == 1
assert sidecars[0] is synthetic_dataset / 'task-nback_bold.json'

def test_context(synthetic_dataset, schema):

sub01 = synthetic_dataset / 'sub-01'
T1w = sub01 / 'ses-01' / 'anat' / 'sub-01_ses-01_T1w.nii'

subject = Subject(context.Sessions(sub01))
ds = context.Dataset(synthetic_dataset, schema)
file_context = context.Context(T1w, ds, subject)

assert file_context.schema is schema
assert file_context.dataset is ds
assert file_context.entities == {'sub': '01', 'ses': '01'}
assert file_context.path == '/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii'
assert file_context.datatype == 'anat'
assert file_context.suffix == 'T1w'
assert file_context.extension == '.nii'
assert file_context.modality == 'mri'
assert file_context.size == 352
assert isinstance(file_context.subject.sessions, context.Sessions)
assert sorted(file_context.subject.sessions.ses_dirs) == ["ses-01", "ses-02"]
assert sorted(file_context.subject.sessions.session_id) == ["ses-01", "ses-02"]

## Tests for:
# sidecar
# associations
# columns
# json
# gzip
# nifti_header
# ome
# tiff

def test_sessions(synthetic_dataset):
sub01 = synthetic_dataset / 'sub-01'

sessions = context.Sessions(sub01)

assert sorted(sessions.ses_dirs) == ["ses-01", "ses-02"]
assert sorted(sessions.session_id) == ["ses-01", "ses-02"]
Loading