@@ -40,15 +40,15 @@ def _similar_keys(self, current_prefix, key, index, replace_chars):
40
40
b_step = key [word_pos ].encode ('utf8' )
41
41
42
42
if b_step in replace_chars :
43
- next_index = index
44
- b_replace_char , u_replace_char = replace_chars [ b_step ]
43
+ for ( b_replace_char , u_replace_char ) in replace_chars [ b_step ]:
44
+ next_index = index
45
45
46
- next_index = self .dct .follow_bytes (b_replace_char , next_index )
46
+ next_index = self .dct .follow_bytes (b_replace_char , next_index )
47
47
48
- if next_index is not None :
49
- prefix = current_prefix + key [start_pos :word_pos ] + u_replace_char
50
- extra_keys = self ._similar_keys (prefix , key , next_index , replace_chars )
51
- res += extra_keys
48
+ if next_index :
49
+ prefix = current_prefix + key [start_pos :word_pos ] + u_replace_char
50
+ extra_keys = self ._similar_keys (prefix , key , next_index , replace_chars )
51
+ res += extra_keys
52
52
53
53
index = self .dct .follow_bytes (b_step , index )
54
54
if index is None :
@@ -69,7 +69,7 @@ def similar_keys(self, key, replaces):
69
69
70
70
``replaces`` is an object obtained from
71
71
``DAWG.compile_replaces(mapping)`` where mapping is a dict
72
- that maps single-char unicode sitrings to another single-char
72
+ that maps single-char unicode strings to (one or more) single-char
73
73
unicode strings.
74
74
75
75
This may be useful e.g. for handling single-character umlauts.
@@ -80,13 +80,17 @@ def similar_keys(self, key, replaces):
80
80
def compile_replaces (cls , replaces ):
81
81
82
82
for k ,v in replaces .items ():
83
- if len (k ) != 1 or len (v ) != 1 :
84
- raise ValueError ("Keys and values must be single-char unicode strings." )
83
+ if len (k ) != 1 :
84
+ raise ValueError ("Keys must be single-char unicode strings." )
85
+ if (isinstance (v , str ) and len (v ) != 1 ):
86
+ raise ValueError ("Values must be single-char unicode strings or non-empty lists of such." )
87
+ if isinstance (v , list ) and (any (len (v_entry ) != 1 for v_entry in v ) or len (v ) < 1 ):
88
+ raise ValueError ("Values must be single-char unicode strings or non-empty lists of such." )
85
89
86
90
return dict (
87
91
(
88
92
k .encode ('utf8' ),
89
- ( v .encode ('utf8' ), v )
93
+ [( v_entry .encode ('utf8' ), v_entry ) for v_entry in v ]
90
94
)
91
95
for k , v in replaces .items ()
92
96
)
@@ -333,14 +337,15 @@ def _similar_items(self, current_prefix, key, index, replace_chars):
333
337
b_step = key [word_pos ].encode ('utf8' )
334
338
335
339
if b_step in replace_chars :
336
- next_index = index
337
- b_replace_char , u_replace_char = replace_chars [ b_step ]
340
+ for ( b_replace_char , u_replace_char ) in replace_chars [ b_step ]:
341
+ next_index = index
338
342
339
- next_index = self .dct .follow_bytes (b_replace_char , next_index )
340
- if next_index :
341
- prefix = current_prefix + key [start_pos :word_pos ] + u_replace_char
342
- extra_items = self ._similar_items (prefix , key , next_index , replace_chars )
343
- res += extra_items
343
+ next_index = self .dct .follow_bytes (b_replace_char , next_index )
344
+
345
+ if next_index :
346
+ prefix = current_prefix + key [start_pos :word_pos ] + u_replace_char
347
+ extra_items = self ._similar_items (prefix , key , next_index , replace_chars )
348
+ res += extra_items
344
349
345
350
index = self .dct .follow_bytes (b_step , index )
346
351
if not index :
@@ -363,7 +368,7 @@ def similar_items(self, key, replaces):
363
368
364
369
``replaces`` is an object obtained from
365
370
``DAWG.compile_replaces(mapping)`` where mapping is a dict
366
- that maps single-char unicode sitrings to another single-char
371
+ that maps single-char unicode strings to (one or more) single-char
367
372
unicode strings.
368
373
"""
369
374
return self ._similar_items ("" , key , self .dct .ROOT , replaces )
@@ -406,7 +411,7 @@ def similar_item_values(self, key, replaces):
406
411
407
412
``replaces`` is an object obtained from
408
413
``DAWG.compile_replaces(mapping)`` where mapping is a dict
409
- that maps single-char unicode sitrings to another single-char
414
+ that maps single-char unicode strings to (one or more) single-char
410
415
unicode strings.
411
416
"""
412
417
return self ._similar_item_values (0 , key , self .dct .ROOT , replaces )
0 commit comments