From 16c72a195a88f6e549a344481bcd6e7a33670d32 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Fri, 29 Aug 2025 17:14:13 -0400 Subject: [PATCH] Sanitize "locator" value We observed the Study Description to contain spaces in it. Due to unresolved bug in nipype (https://github.com/nipy/nipype/issues/3604) we must avoid spaces in the file names since then external execution of dcm2niix would fail. While at it, I decided to sanitize it more and replace all other "funny" characters with the special treatment by shells. --- heudiconv/main.py | 5 +++-- heudiconv/tests/test_utils.py | 29 +++++++++++++++++++++++++++++ heudiconv/utils.py | 18 ++++++++++++++++++ 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/heudiconv/main.py b/heudiconv/main.py index 97c3be05..2bd4ddf6 100644 --- a/heudiconv/main.py +++ b/heudiconv/main.py @@ -13,7 +13,7 @@ from .due import Doi, due from .parser import get_study_sessions from .queue import queue_conversion -from .utils import SeqInfo, anonymize_sid, load_heuristic, treat_infofile +from .utils import SeqInfo, anonymize_sid, load_heuristic, sanitize_path, treat_infofile lgr = logging.getLogger(__name__) @@ -445,7 +445,8 @@ def workflow( if locator == "unknown": lgr.warning("Skipping unknown locator dataset") continue - + if locator: + locator = sanitize_path(locator, "locator") if anon_cmd and sid is not None: anon_sid = anonymize_sid(sid, anon_cmd) lgr.info("Anonymized {} to {}".format(sid, anon_sid)) diff --git a/heudiconv/tests/test_utils.py b/heudiconv/tests/test_utils.py index 38603a37..66a3b041 100644 --- a/heudiconv/tests/test_utils.py +++ b/heudiconv/tests/test_utils.py @@ -3,6 +3,7 @@ from datetime import datetime import json from json.decoder import JSONDecodeError +import logging import os import os.path as op from pathlib import Path @@ -22,6 +23,7 @@ load_json, remove_prefix, remove_suffix, + sanitize_path, save_json, strptime_bids, strptime_dcm_da_tm, @@ -294,3 +296,30 @@ def test_remove_prefix() -> None: assert remove_prefix(s, "") == s assert remove_prefix(s, "foo") == s assert remove_prefix(s, "jason") == ".bourne" + + +@pytest.mark.parametrize("value", ["valid-name_123", "valid/name/123"]) +def test_sanitize_path_valid(value: str) -> None: + assert sanitize_path(value) == value + + +@pytest.mark.parametrize( + "value,target", + [ + ("in valid/na me:123*?", "in_valid/na_me_123_"), + (" leading-and-trailing--- ", "_leading-and-trailing---_"), + ("!!!", "_"), + (" ! ", "_"), + ], +) +def test_sanitize_path_invalid( + value: str, target: str, caplog: pytest.LogCaptureFixture +) -> None: + caplog.set_level(logging.WARNING) + assert sanitize_path(value) == target + # should log about replacements only + assert len(caplog.records) == 1 + msg = caplog.records[0].message + assert value in msg + assert target in msg + assert "contained problematic character(s)" in msg diff --git a/heudiconv/utils.py b/heudiconv/utils.py index 16b4808c..a5fb463c 100644 --- a/heudiconv/utils.py +++ b/heudiconv/utils.py @@ -164,6 +164,24 @@ def anonymize_sid(sid: AnyStr, anon_sid_cmd: str) -> AnyStr: return anon_sid +def sanitize_path(path: str, descr: str = "path") -> str: + """Sanitize a path by replacing multiple consecutive unwanted characters with _. + + Due to https://github.com/nipy/nipype/issues/3604 we would like to avoid + spaces in the paths, or any special characters which could cause special treatment in + the shell, e.g. characters like ; or & serving as command separators. + """ + clean_path = re.sub("[ #!$%^&:;*?]+", "_", path) + if clean_path != path: + lgr.warning( + "%r %s contained problematic character(s), it " "was cleaned to be %r", + path, + descr, + clean_path, + ) + return clean_path + + def create_file_if_missing( filename: str, content: str, glob_suffixes: list[str] | None = None ) -> bool: