Skip to content

Commit 59665e5

Browse files
committed
add the possibility of nondetermenistic char_substitutes
1 parent e56241e commit 59665e5

File tree

1 file changed

+25
-20
lines changed

1 file changed

+25
-20
lines changed

dawg_python/dawgs.py

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,15 @@ def _similar_keys(self, current_prefix, key, index, replace_chars):
4040
b_step = key[word_pos].encode('utf8')
4141

4242
if b_step in replace_chars:
43-
next_index = index
44-
b_replace_char, u_replace_char = replace_chars[b_step]
43+
for (b_replace_char, u_replace_char) in replace_chars[b_step]:
44+
next_index = index
4545

46-
next_index = self.dct.follow_bytes(b_replace_char, next_index)
46+
next_index = self.dct.follow_bytes(b_replace_char, next_index)
4747

48-
if next_index is not None:
49-
prefix = current_prefix + key[start_pos:word_pos] + u_replace_char
50-
extra_keys = self._similar_keys(prefix, key, next_index, replace_chars)
51-
res += extra_keys
48+
if next_index:
49+
prefix = current_prefix + key[start_pos:word_pos] + u_replace_char
50+
extra_keys = self._similar_keys(prefix, key, next_index, replace_chars)
51+
res += extra_keys
5252

5353
index = self.dct.follow_bytes(b_step, index)
5454
if index is None:
@@ -69,7 +69,7 @@ def similar_keys(self, key, replaces):
6969
7070
``replaces`` is an object obtained from
7171
``DAWG.compile_replaces(mapping)`` where mapping is a dict
72-
that maps single-char unicode sitrings to another single-char
72+
that maps single-char unicode strings to (one or more) single-char
7373
unicode strings.
7474
7575
This may be useful e.g. for handling single-character umlauts.
@@ -80,13 +80,17 @@ def similar_keys(self, key, replaces):
8080
def compile_replaces(cls, replaces):
8181

8282
for k,v in replaces.items():
83-
if len(k) != 1 or len(v) != 1:
84-
raise ValueError("Keys and values must be single-char unicode strings.")
83+
if len(k) != 1:
84+
raise ValueError("Keys must be single-char unicode strings.")
85+
if (isinstance(v, str) and len(v) != 1):
86+
raise ValueError("Values must be single-char unicode strings or non-empty lists of such.")
87+
if isinstance(v, list) and (any(len(v_entry) != 1 for v_entry in v) or len(v) < 1):
88+
raise ValueError("Values must be single-char unicode strings or non-empty lists of such.")
8589

8690
return dict(
8791
(
8892
k.encode('utf8'),
89-
(v.encode('utf8'), v)
93+
[(v_entry.encode('utf8'), v_entry) for v_entry in v]
9094
)
9195
for k, v in replaces.items()
9296
)
@@ -333,14 +337,15 @@ def _similar_items(self, current_prefix, key, index, replace_chars):
333337
b_step = key[word_pos].encode('utf8')
334338

335339
if b_step in replace_chars:
336-
next_index = index
337-
b_replace_char, u_replace_char = replace_chars[b_step]
340+
for (b_replace_char, u_replace_char) in replace_chars[b_step]:
341+
next_index = index
338342

339-
next_index = self.dct.follow_bytes(b_replace_char, next_index)
340-
if next_index:
341-
prefix = current_prefix + key[start_pos:word_pos] + u_replace_char
342-
extra_items = self._similar_items(prefix, key, next_index, replace_chars)
343-
res += extra_items
343+
next_index = self.dct.follow_bytes(b_replace_char, next_index)
344+
345+
if next_index:
346+
prefix = current_prefix + key[start_pos:word_pos] + u_replace_char
347+
extra_items = self._similar_items(prefix, key, next_index, replace_chars)
348+
res += extra_items
344349

345350
index = self.dct.follow_bytes(b_step, index)
346351
if not index:
@@ -363,7 +368,7 @@ def similar_items(self, key, replaces):
363368
364369
``replaces`` is an object obtained from
365370
``DAWG.compile_replaces(mapping)`` where mapping is a dict
366-
that maps single-char unicode sitrings to another single-char
371+
that maps single-char unicode strings to (one or more) single-char
367372
unicode strings.
368373
"""
369374
return self._similar_items("", key, self.dct.ROOT, replaces)
@@ -406,7 +411,7 @@ def similar_item_values(self, key, replaces):
406411
407412
``replaces`` is an object obtained from
408413
``DAWG.compile_replaces(mapping)`` where mapping is a dict
409-
that maps single-char unicode sitrings to another single-char
414+
that maps single-char unicode strings to (one or more) single-char
410415
unicode strings.
411416
"""
412417
return self._similar_item_values(0, key, self.dct.ROOT, replaces)

0 commit comments

Comments
 (0)