Skip to content

Commit 18b03a3

Browse files
committed
fix: allow deep attributes in Standoff and OMOP doc2dict converters
1 parent c408d65 commit 18b03a3

File tree

3 files changed

+32
-12
lines changed

3 files changed

+32
-12
lines changed

changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
- Support packaging with poetry 2.0
1616
- Solve pickling issues with multiprocessing when pytorch is installed
17+
- Allow deep attributes like `a.b.c` for `span_attributes` in Standoff and OMOP doc2dict converters
1718

1819
# v0.15.0 (2024-12-13)
1920

edsnlp/data/converters.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,14 @@ def __init__(
355355

356356
def __call__(self, doc):
357357
spans = get_spans(doc, self.span_getter)
358+
span_binding_getters = {
359+
obj_name: BINDING_GETTERS[
360+
("_." + ext_name)
361+
if ext_name.split(".")[0] not in SPAN_BUILTIN_ATTRS
362+
else ext_name
363+
]
364+
for ext_name, obj_name in self.span_attributes.items()
365+
}
358366
obj = {
359367
FILENAME: doc._.note_id,
360368
"doc_id": doc._.note_id,
@@ -369,9 +377,12 @@ def __call__(self, doc):
369377
}
370378
],
371379
"attributes": {
372-
obj_name: getattr(ent._, ext_name)
373-
for ext_name, obj_name in self.span_attributes.items()
374-
if ent._.has(ext_name)
380+
obj_name: value
381+
for obj_name, value in (
382+
(k, getter(ent))
383+
for k, getter in span_binding_getters.items()
384+
)
385+
if value is not None
375386
},
376387
"label": ent.label_,
377388
}
@@ -621,8 +632,11 @@ def __call__(self, doc):
621632
"lexical_variant": ent.text,
622633
"note_nlp_source_value": ent.label_,
623634
**{
624-
obj_name: getter(ent)
625-
for obj_name, getter in span_binding_getters.items()
635+
obj_name: value
636+
for obj_name, value in (
637+
(k, getter(ent))
638+
for k, getter in span_binding_getters.items()
639+
)
626640
},
627641
}
628642
for i, ent in enumerate(sorted(dict.fromkeys(spans)))

edsnlp/utils/bindings.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,14 @@ def _check_path(path: str):
1919
"The label must be a path of valid python identifier to be used as a getter"
2020
"in the following template: span.[YOUR_LABEL], such as `label_` or `_.negated"
2121
)
22-
if path[0].isalpha() or path[0] == "_":
23-
return "." + path
24-
return path
22+
parts = path.split(".")
23+
new_path = "span"
24+
for part in parts:
25+
if " " in part:
26+
new_path = "getattr(" + new_path + f", {part!r})"
27+
elif len(part) > 0:
28+
new_path += "." + part
29+
return new_path
2530

2631

2732
def make_binding_getter(attribute: Union[str, Binding]):
@@ -47,7 +52,7 @@ def make_binding_getter(attribute: Union[str, Binding]):
4752
exec(
4853
f"def getter(span):\n"
4954
f" try:\n"
50-
f" return span{path} == value\n"
55+
f" return {path} == value\n"
5156
f" except AttributeError:\n"
5257
f" return False\n",
5358
ctx,
@@ -60,7 +65,7 @@ def make_binding_getter(attribute: Union[str, Binding]):
6065
exec(
6166
f"def getter(span):\n"
6267
f" try:\n"
63-
f" return span{path}\n"
68+
f" return {path}\n"
6469
f" except AttributeError:\n"
6570
f" return None\n",
6671
ctx,
@@ -88,12 +93,12 @@ def make_binding_setter(binding: Binding):
8893
if isinstance(binding, tuple):
8994
path, value = binding
9095
path = _check_path(path)
91-
fn_string = f"""def setter(span): span{path} = value"""
96+
fn_string = f"""def setter(span): {path} = value"""
9297
ctx = {"value": value}
9398
exec(fn_string, ctx, ctx)
9499
else:
95100
path = _check_path(binding)
96-
fn_string = f"""def setter(span, value): span{path} = value"""
101+
fn_string = f"""def setter(span, value): {path} = value"""
97102
ctx = {}
98103
exec(fn_string, ctx, ctx)
99104
return ctx["setter"]

0 commit comments

Comments
 (0)