File tree Expand file tree Collapse file tree 4 files changed +33
-7
lines changed Expand file tree Collapse file tree 4 files changed +33
-7
lines changed Original file line number Diff line number Diff line change 1
- from typing import Any , Dict
1
+ from typing import Any , Dict , List
2
2
3
3
from typing_extensions import Literal
4
4
@@ -28,6 +28,7 @@ def create_component(
28
28
name : str = "cim10" ,
29
29
* ,
30
30
attr : str = "NORM" ,
31
+ cim10 : List [str ] = None ,
31
32
ignore_excluded : bool = False ,
32
33
ignore_space_tokens : bool = False ,
33
34
term_matcher : Literal ["exact" , "simstring" ] = "exact" ,
@@ -75,6 +76,9 @@ def create_component(
75
76
The pipeline object
76
77
name : str
77
78
The name of the component
79
+ cim10 : str
80
+ List of cim10 to retrieve. If None, all cim10 will be searched,
81
+ resulting in higher computation time.
78
82
attr : str
79
83
The default attribute to use for matching.
80
84
ignore_excluded : bool
@@ -104,7 +108,7 @@ def create_component(
104
108
nlp = nlp ,
105
109
name = name ,
106
110
regex = dict (),
107
- terms = get_patterns (),
111
+ terms = get_patterns (cim10 ),
108
112
attr = attr ,
109
113
ignore_excluded = ignore_excluded ,
110
114
ignore_space_tokens = ignore_space_tokens ,
Original file line number Diff line number Diff line change 5
5
from edsnlp import BASE_DIR
6
6
7
7
8
- def get_patterns () -> Dict [str , List [str ]]:
8
+ def filter_dict_by_keys (D : Dict [str , List [str ]], L : List [str ]):
9
+ filtered_dict = {
10
+ k : v for k , v in D .items () if any (k .startswith (prefix ) for prefix in L )
11
+ }
12
+ return filtered_dict
13
+
14
+
15
+ def get_patterns (cim10 : List [str ] = None ) -> Dict [str , List [str ]]:
9
16
df = pd .read_csv (BASE_DIR / "resources" / "cim10.csv.gz" )
10
17
11
18
df ["code_pattern" ] = df ["code" ]
@@ -30,4 +37,6 @@ def get_patterns() -> Dict[str, List[str]]:
30
37
31
38
patterns = df .groupby ("code" )["patterns" ].agg (list ).to_dict ()
32
39
40
+ patterns = filter_dict_by_keys (patterns , cim10 ) if cim10 else patterns
41
+
33
42
return patterns
Original file line number Diff line number Diff line change 1
- from typing import Any , Dict
1
+ from typing import Any , Dict , List
2
2
3
3
from typing_extensions import Literal
4
4
@@ -28,6 +28,7 @@ def create_component(
28
28
name : str = "drugs" ,
29
29
* ,
30
30
attr : str = "NORM" ,
31
+ atc : List [str ] = None ,
31
32
ignore_excluded : bool = False ,
32
33
ignore_space_tokens : bool = False ,
33
34
term_matcher : Literal ["exact" , "simstring" ] = "exact" ,
@@ -83,6 +84,9 @@ def create_component(
83
84
The name of the component
84
85
attr : str
85
86
The default attribute to use for matching.
87
+ atc : str
88
+ List of atc to retrieve. If None, all atc will be searched,
89
+ resulting in higher computation time.
86
90
ignore_excluded : bool
87
91
Whether to skip excluded tokens (requires an upstream
88
92
pipeline to mark excluded tokens).
@@ -111,7 +115,7 @@ def create_component(
111
115
nlp = nlp ,
112
116
name = name ,
113
117
regex = dict (),
114
- terms = get_patterns (),
118
+ terms = get_patterns (atc ),
115
119
attr = attr ,
116
120
ignore_excluded = ignore_excluded ,
117
121
ignore_space_tokens = ignore_space_tokens ,
Original file line number Diff line number Diff line change 6
6
drugs_file = BASE_DIR / "resources" / "drugs.json"
7
7
8
8
9
- def get_patterns () -> Dict [str , List [str ]]:
9
+ def filter_dict_by_keys (D : Dict [str , List [str ]], L : List [str ]):
10
+ filtered_dict = {
11
+ k : v for k , v in D .items () if any (k .startswith (prefix ) for prefix in L )
12
+ }
13
+ return filtered_dict
14
+
15
+
16
+ def get_patterns (atc : List [str ]) -> Dict [str , List [str ]]:
10
17
with open (drugs_file , "r" ) as f :
11
- return json .load (f )
18
+ patterns = json .load (f )
19
+ patterns = filter_dict_by_keys (patterns , atc ) if atc else patterns
20
+ return patterns
You can’t perform that action at this time.
0 commit comments