Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit 4f55394

Browse files
authored
Merge pull request #4 from nuance/random-fixes
Two small fixes
2 parents 1047fa6 + 2951d57 commit 4f55394

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

tensor2tensor/data_generators/text_encoder.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -109,12 +109,12 @@ class TokenTextEncoder(TextEncoder):
109109

110110
def __init__(self, vocab_filename, reverse=False, num_reserved_ids=2):
111111
"""Initialize from a file, one token per line."""
112+
super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids)
113+
112114
self._reverse = reverse
113-
if vocab_filename is None:
115+
if vocab_filename is not None:
114116
self._load_vocab_from_file(vocab_filename)
115117

116-
super(TokenTextEncoder, self).__init__(num_reserved_ids=num_reserved_ids)
117-
118118
def encode(self, sentence):
119119
"""Converts a space-separated string of tokens to a list of ids."""
120120
ret = [self._token_to_id[tok] for tok in sentence.strip().split()]
@@ -285,7 +285,7 @@ def build_to_target_size(cls,
285285
subtokenizer.build_from_token_counts(token_counts, store_filename,
286286
present_count, num_iterations)
287287

288-
if min_val == max_val or subtokenizer.vocab_size == target_size:
288+
if min_val >= max_val or subtokenizer.vocab_size == target_size:
289289
return subtokenizer
290290
elif subtokenizer.vocab_size > target_size:
291291
other_subtokenizer = cls.build_to_target_size(

0 commit comments

Comments
 (0)