Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit 7087807

Browse files
authored
Merge pull request #73 from rsepassi/push
v1.0.9
2 parents a2a6178 + e4fe66c commit 7087807

26 files changed

+395
-245
lines changed

.gitignore

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
# Compiled python modules.
22
*.pyc
3-
# Byte-compiled
4-
__pycache__/
53

64
# Python egg metadata, regenerated from source files by setuptools.
75
/*.egg-info

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ t2t-trainer --registry_help
5757
5858
PROBLEM=wmt_ende_tokens_32k
5959
MODEL=transformer
60-
HPARAMS=transformer_base
60+
HPARAMS=transformer_base_single_gpu
6161
6262
DATA_DIR=$HOME/t2t_data
6363
TMP_DIR=/tmp/t2t_datagen
@@ -209,7 +209,7 @@ and hyperparameter set functions can compose other hyperparameter set functions.
209209
The **trainer** binary is the main entrypoint for training, evaluation, and
210210
inference. Users can easily switch between problems, models, and hyperparameter
211211
sets by using the `--model`, `--problems`, and `--hparams_set` flags. Specific
212-
hyperparameters can be overriden with the `--hparams` flag. `--schedule` and
212+
hyperparameters can be overridden with the `--hparams` flag. `--schedule` and
213213
related flags control local and distributed training/evaluation
214214
([distributed training documentation](https://github.com/tensorflow/tensor2tensor/tree/master/tensor2tensor/docs/distributed_training.md)).
215215

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setup(
77
name='tensor2tensor',
8-
version='1.0.8',
8+
version='1.0.9',
99
description='Tensor2Tensor',
1010
author='Google Inc.',
1111
author_email='[email protected]',

tensor2tensor/bin/make_tf_configs.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232

3333
# Dependency imports
3434

35-
import six
3635
import tensorflow as tf
3736

3837
flags = tf.flags
@@ -51,7 +50,7 @@ def main(_):
5150

5251
cluster = {"ps": ps, "worker": workers}
5352

54-
for task_type, jobs in six.iteritems(cluster):
53+
for task_type, jobs in (("worker", workers), ("ps", ps)):
5554
for idx, job in enumerate(jobs):
5655
if task_type == "worker":
5756
cmd_line_flags = " ".join([
@@ -77,7 +76,7 @@ def main(_):
7776
"index": idx
7877
}
7978
})
80-
print(tf_config + "\t" + cmd_line_flags)
79+
print("'%s'\t%s" % (tf_config, cmd_line_flags))
8180

8281

8382
if __name__ == "__main__":

tensor2tensor/bin/t2t-datagen

100755100644
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,10 @@ from tensor2tensor.data_generators import algorithmic_math
3737
from tensor2tensor.data_generators import audio
3838
from tensor2tensor.data_generators import generator_utils
3939
from tensor2tensor.data_generators import image
40+
from tensor2tensor.data_generators import ptb
4041
from tensor2tensor.data_generators import snli
4142
from tensor2tensor.data_generators import wmt
4243
from tensor2tensor.data_generators import wsj_parsing
43-
from tensor2tensor.data_generators import ptb
4444

4545
import tensorflow as tf
4646

@@ -319,11 +319,11 @@ _SUPPORTED_PROBLEM_GENERATORS = {
319319
vocab_filename="tokens.vocab.%d" % 2**15,
320320
vocab_size=2**15)),
321321
"lmptb_10k": (
322-
lambda: ptb.train_generator(
322+
lambda: ptb.train_generator(
323323
FLAGS.tmp_dir,
324324
FLAGS.data_dir,
325325
False),
326-
lambda: ptb.valid_generator()),
326+
ptb.valid_generator),
327327
}
328328

329329
# pylint: enable=g-long-lambda

tensor2tensor/bin/t2t-trainer

100755100644
File mode changed.

tensor2tensor/data_generators/algorithmic.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def zipf_distribution(nbr_symbols, alpha):
102102
Usually for modelling natural text distribution is in
103103
the range [1.1-1.6].
104104
105-
Return:
105+
Returns:
106106
distr_map: list of float, Zipf's distribution over nbr_symbols.
107107
108108
"""
@@ -118,7 +118,7 @@ def zipf_random_sample(distr_map, sample_len):
118118
distr_map: list of float, Zipf's distribution over nbr_symbols.
119119
sample_len: integer, length of sequence to generate.
120120
121-
Return:
121+
Returns:
122122
sample: list of integer, Zipf's random sample over nbr_symbols.
123123
124124
"""
@@ -131,8 +131,8 @@ def zipf_random_sample(distr_map, sample_len):
131131
return [t+1 if t > 0 else t+2 for t in np.searchsorted(distr_map, u)]
132132

133133

134-
def reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases, \
135-
scale_std_dev=100, alpha=1.5):
134+
def reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases,
135+
scale_std_dev=100, alpha=1.5):
136136
"""Generator for the reversing nlp-like task on sequences of symbols.
137137
138138
The length of the sequence is drawn from a Gaussian(Normal) distribution
@@ -141,6 +141,7 @@ def reverse_generator_nlplike(nbr_symbols, max_length, nbr_cases, \
141141
nbr_cases sequences have been produced.
142142
143143
Args:
144+
nbr_symbols: integer, number of symbols.
144145
max_length: integer, maximum length of sequences to generate.
145146
nbr_cases: the number of cases to generate.
146147
scale_std_dev: float, Normal distribution's standard deviation scale factor

tensor2tensor/data_generators/algorithmic_test.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,13 @@ def testReverseGenerator(self):
4141
self.assertEqual(list(reversed(d["inputs"])) + [1], d["targets"])
4242
self.assertEqual(counter, 10)
4343

44-
def testZipfDistribution(self):
45-
# Following Zipf's Law with alpha equals 1: the first in rank is two times
46-
# more probable/frequent that the second in rank, three times more prob/freq
47-
# that the third in rank and so on.
44+
def testZipfDistribution(self):
45+
# Following Zipf's Law with alpha equals 1: the first in rank is two times
46+
# more probable/frequent that the second in rank, three times more prob/freq
47+
# that the third in rank and so on.
4848
d = algorithmic.zipf_distribution(10, 1.0001)
4949
for i in xrange(len(d[1:])-1):
50-
self.assertEqual("%.4f" % (abs(d[i+1]-d[i+2])*(i+2)), \
51-
"%.4f" % d[1])
50+
self.assertEqual("%.4f" % (abs(d[i+1]-d[i+2])*(i+2)), "%.4f" % d[1])
5251

5352
def testReverseGeneratorNlpLike(self):
5453
counter = 0

tensor2tensor/data_generators/generator_utils.py

100755100644
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,8 @@ def get_or_generate_vocab(tmp_dir, vocab_filename, vocab_size):
244244
if ".gz" in lang_file:
245245
new_filepath = os.path.join(tmp_dir, lang_file[:-3])
246246
if os.path.exists(new_filepath):
247-
tf.logging.info("Subdirectory %s already exists, skipping unpacking" % filepath)
247+
tf.logging.info("Subdirectory %s already exists, skipping unpacking"
248+
% filepath)
248249
else:
249250
tf.logging.info("Unpacking subdirectory %s" % filepath)
250251
gunzip_file(filepath, new_filepath)

tensor2tensor/data_generators/problem_hparams.py

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -340,24 +340,6 @@ def lm1b_16k(model_hparams):
340340
p.target_space_id = 3
341341
return p
342342

343-
def lmptb_10k(model_hparams):
344-
"""Penn Tree Bank language-modeling benchmark, 10k token vocabulary."""
345-
p = default_problem_hparams()
346-
p.input_modality = {}
347-
p.target_modality = (registry.Modalities.SYMBOL, 10000)
348-
349-
vocabulary = text_encoder.TokenTextEncoder(
350-
os.path.join(model_hparams.data_dir,
351-
"lmptb_10k.vocab"))
352-
353-
p.vocabulary = {
354-
"inputs": vocabulary,
355-
"targets": vocabulary,
356-
}
357-
358-
p.input_space_id = 3
359-
p.target_space_id = 3
360-
return p
361343

362344
def lm1b_64k(model_hparams):
363345
"""Billion-word language-modeling benchmark, 64k subtoken vocabulary."""
@@ -374,6 +356,22 @@ def lm1b_64k(model_hparams):
374356
p.target_space_id = 3
375357
return p
376358

359+
360+
def lmptb_10k(model_hparams):
361+
"""Penn Tree Bank language-modeling benchmark, 10k token vocabulary."""
362+
p = default_problem_hparams()
363+
p.input_modality = {}
364+
p.target_modality = (registry.Modalities.SYMBOL, 10000)
365+
vocabulary = text_encoder.TokenTextEncoder(
366+
os.path.join(model_hparams.data_dir, "lmptb_10k.vocab"))
367+
p.vocabulary = {
368+
"targets": vocabulary,
369+
}
370+
p.input_space_id = 3
371+
p.target_space_id = 3
372+
return p
373+
374+
377375
def wmt_enfr_characters(unused_model_hparams):
378376
"""English to French translation benchmark."""
379377
p = default_problem_hparams()

0 commit comments

Comments
 (0)