Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit b43f833

Browse files
committed
fix
1 parent 9f59a50 commit b43f833

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

tensor2tensor/data_generators/generator_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -338,9 +338,9 @@ def generate():
338338

339339
# Use Tokenizer to count the word occurrences.
340340
with tf.gfile.GFile(filepath, mode="r") as source_file:
341-
file_byte_budget = 1e6 if filepath.endswith("en") else 1e6
341+
file_byte_budget = 1e6
342342
counter = 0
343-
countermax = int(source_file.size() / 1e6)
343+
countermax = int(source_file.size() / file_byte_budget / 2)
344344
for line in source_file:
345345
if counter < countermax:
346346
counter += 1

0 commit comments

Comments
 (0)