Skip to content

Commit 71cfac9

Browse files
add tab sanitization to fhir metadata
1 parent b6ac545 commit 71cfac9

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

gen3/external/nih/dbgap_fhir.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -419,6 +419,7 @@ def get_simplified_data_from_object(self, fhir_object):
419419
self._flatten_relevant_fields(all_data)
420420
self._remove_unecessary_fields(all_data)
421421
self._capitalize_top_level_keys(all_data)
422+
self._clean_structure(all_data)
422423

423424
return all_data
424425

@@ -569,3 +570,30 @@ def _capitalize_top_level_keys(all_data):
569570

570571
if key != capitalized_key:
571572
del all_data[key]
573+
574+
def _clean_value(self, value):
575+
"""
576+
Replace tab literals in a string
577+
"""
578+
if value is None:
579+
return ""
580+
581+
# Double-escape existing backslashes
582+
# Convert every literal tab into the text “\t”
583+
return value.replace("\\", "\\\\").replace("\t", r"\t")
584+
585+
def _clean_structure(self, obj):
586+
"""
587+
Recursively walk a nested structure (dicts, lists, tuples) and clean every string
588+
"""
589+
if isinstance(obj, dict):
590+
return {k: self._clean_structure(v) for k, v in obj.items()}
591+
592+
if isinstance(obj, (list, tuple)):
593+
typ = type(obj)
594+
return typ(self._clean_structure(v) for v in obj)
595+
596+
if isinstance(obj, str):
597+
return self._clean_value(obj)
598+
599+
return obj

0 commit comments

Comments
 (0)