diff --git a/changelog.md b/changelog.md index 4c4f6fd17..3cedbe90f 100644 --- a/changelog.md +++ b/changelog.md @@ -15,6 +15,7 @@ - Support packaging with poetry 2.0 - Solve pickling issues with multiprocessing when pytorch is installed - Allow deep attributes like `a.b.c` for `span_attributes` in Standoff and OMOP doc2dict converters +- Take `filter_expr` into account in dependency parsing evaluation # v0.15.0 (2024-12-13) diff --git a/edsnlp/metrics/dep_parsing.py b/edsnlp/metrics/dep_parsing.py index 5247a483f..567277160 100644 --- a/edsnlp/metrics/dep_parsing.py +++ b/edsnlp/metrics/dep_parsing.py @@ -33,6 +33,9 @@ def dependency_parsing_metric( examples = [eg for eg in examples if filter_fn(eg.reference)] for eg_idx, eg in enumerate(examples): + if filter_expr and not eval(filter_expr, {}, eg): + continue + for token in eg.reference: items["uas"][0].add((eg_idx, token.i, token.head.i)) items["las"][0].add((eg_idx, token.i, token.head.i, token.dep_)) diff --git a/tests/training/dep_parser_config.yml b/tests/training/dep_parser_config.yml index 19bd9b036..291247a2d 100644 --- a/tests/training/dep_parser_config.yml +++ b/tests/training/dep_parser_config.yml @@ -23,6 +23,7 @@ scorer: speed: false dep: '@metrics': "eds.dep_parsing" + filter_expr: "doc.text != ''" # 🎛️ OPTIMIZER optimizer: