Skip to content

Commit e774c2d

Browse files
committed
Sanitize "locator" value
We observed the Study Description to contain spaces in it. Due to unresolved bug in nipype (nipy/nipype#3604) we must avoid spaces in the file names since then external execution of dcm2niix would fail. While at it, I decided to sanitize it more and replace all other "funny" characters with the special treatment by shells.
1 parent c5e9b19 commit e774c2d

File tree

3 files changed

+55
-2
lines changed

3 files changed

+55
-2
lines changed

heudiconv/main.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from .due import Doi, due
1414
from .parser import get_study_sessions
1515
from .queue import queue_conversion
16-
from .utils import SeqInfo, anonymize_sid, load_heuristic, treat_infofile
16+
from .utils import SeqInfo, anonymize_sid, load_heuristic, sanitize_path, treat_infofile
1717

1818
lgr = logging.getLogger(__name__)
1919

@@ -445,7 +445,8 @@ def workflow(
445445
if locator == "unknown":
446446
lgr.warning("Skipping unknown locator dataset")
447447
continue
448-
448+
if locator:
449+
locator = sanitize_path(locator, "locator")
449450
if anon_cmd and sid is not None:
450451
anon_sid = anonymize_sid(sid, anon_cmd)
451452
lgr.info("Anonymized {} to {}".format(sid, anon_sid))

heudiconv/tests/test_utils.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from datetime import datetime
44
import json
55
from json.decoder import JSONDecodeError
6+
import logging
67
import os
78
import os.path as op
89
from pathlib import Path
@@ -22,6 +23,7 @@
2223
load_json,
2324
remove_prefix,
2425
remove_suffix,
26+
sanitize_path,
2527
save_json,
2628
strptime_bids,
2729
strptime_dcm_da_tm,
@@ -294,3 +296,30 @@ def test_remove_prefix() -> None:
294296
assert remove_prefix(s, "") == s
295297
assert remove_prefix(s, "foo") == s
296298
assert remove_prefix(s, "jason") == ".bourne"
299+
300+
301+
@pytest.mark.parametrize("value", ["valid-name_123", "valid/name/123"])
302+
def test_sanitize_path_valid(value: str) -> None:
303+
assert sanitize_path(value) == value
304+
305+
306+
@pytest.mark.parametrize(
307+
"value,target",
308+
[
309+
("in valid/na me:123*?", "in_valid/na_me_123_"),
310+
(" leading-and-trailing--- ", "_leading-and-trailing---_"),
311+
("!!!", "_"),
312+
(" ! ", "_"),
313+
],
314+
)
315+
def test_sanitize_path_invalid(
316+
value: str, target: str, caplog: pytest.LogCaptureFixture
317+
) -> None:
318+
caplog.set_level(logging.WARNING)
319+
assert sanitize_path(value) == target
320+
# should log about replacements only
321+
assert len(caplog.records) == 1
322+
msg = caplog.records[0].message
323+
assert value in msg
324+
assert target in msg
325+
assert "contained problematic character(s)" in msg

heudiconv/utils.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,29 @@ def anonymize_sid(sid: AnyStr, anon_sid_cmd: str) -> AnyStr:
164164
return anon_sid
165165

166166

167+
def delete_chars(from_str: str, deletechars: str) -> str:
168+
"""Delete characters from a string"""
169+
return from_str.translate(str.maketrans("", "", deletechars))
170+
171+
172+
def sanitize_path(path: str, descr: str = "path") -> str:
173+
"""Sanitize a path by replacing multiple consecutive unwanted characters with _.
174+
175+
Due to https://github.com/nipy/nipype/issues/3604 we would like to avoid
176+
spaces in the paths, or any special characters which could cause special treatment in
177+
the shell, e.g. characters like ; or & serving as command separators.
178+
"""
179+
clean_path = re.sub("[ #!$%^&:;*?]+", "_", path)
180+
if clean_path != path:
181+
lgr.warning(
182+
"%r %s contained problematic character(s), it " "was cleaned to be %r",
183+
path,
184+
descr,
185+
clean_path,
186+
)
187+
return clean_path
188+
189+
167190
def create_file_if_missing(
168191
filename: str, content: str, glob_suffixes: list[str] | None = None
169192
) -> bool:

0 commit comments

Comments
 (0)