Skip to content

Commit 51bd60b

Browse files
committed
fix : adding new separator in eds.table and new input check
1 parent 54a3ff8 commit 51bd60b

File tree

3 files changed

+16
-6
lines changed

3 files changed

+16
-6
lines changed

changelog.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# Changelog
22

3+
## Unreleased
4+
5+
### Fixed
6+
- fix : adding new separator in eds.table and new input check
7+
38
## v0.11.2
49

510
### Fixed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
sep = r"¦"
1+
sep = r"¦|\|"
22
regex = dict(
33
tables=rf"(\b.*{sep}.*\n)+",
44
)

edsnlp/pipes/misc/tables/tables.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ class TablesMatcher(GenericMatcher):
102102
tables_pattern : Optional[Dict[str, str]]
103103
The regex pattern to identify tables.
104104
The key of dictionary should be `tables`
105+
sep_pattern : Optional[str]
106+
The regex pattern to identify the separator pattern.
107+
Used when calling `to_pd_table`.
105108
attr : str
106109
spaCy's attribute to use:
107110
a string with the value "TEXT" or "NORM", or a dict with
@@ -124,14 +127,16 @@ def __init__(
124127
attr: Union[Dict[str, str], str] = "TEXT",
125128
ignore_excluded: bool = True,
126129
):
127-
if tables_pattern is None:
130+
if tables_pattern is None and sep_pattern is None:
128131
self.tables_pattern = patterns.regex
129-
else:
130-
self.tables_pattern = tables_pattern
131-
132-
if sep_pattern is None:
133132
self.sep = patterns.sep
133+
elif tables_pattern is None or sep_pattern is None:
134+
raise ValueError(
135+
"Both tables_pattern and sep_pattern must be provided "
136+
"for custom eds.table pipeline."
137+
)
134138
else:
139+
self.tables_pattern = tables_pattern
135140
self.sep = sep_pattern
136141

137142
super().__init__(

0 commit comments

Comments
 (0)