Skip to content
This repository was archived by the owner on Jul 7, 2023. It is now read-only.

Commit c4ca5a4

Browse files
authored
Merge pull request #692 from brettkoonce/minor_sp
tensor2tensor: minor spelling tweaks
2 parents 56bac6d + 2180390 commit c4ca5a4

19 files changed

+41
-41
lines changed

tensor2tensor/bin/t2t_bleu.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
flags.DEFINE_string("translation", None,
7575
"Path to the MT system translation file")
7676
flags.DEFINE_string("translations_dir", None,
77-
"Directory with translated files to be evaulated.")
77+
"Directory with translated files to be evaluated.")
7878
flags.DEFINE_string("event_dir", None, "Where to store the event file.")
7979

8080
flags.DEFINE_string("bleu_variant", "both",

tensor2tensor/data_generators/algorithmic_math.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def algebra_inverse_solve(left, right, var, solve_ops):
181181
right- Expression on the right side of the op.
182182
to_tree- The tree on the other side of the equal sign. The canceled
183183
out expression will be moved here.
184-
new_from_tree- The resuling from_tree after the algebraic
184+
new_from_tree- The resulting from_tree after the algebraic
185185
manipulation.
186186
new_to_tree- The resulting to_tree after the algebraic manipulation.
187187
@@ -355,7 +355,7 @@ def generate_calculus_integrate_sample(vlist, ops, min_depth, max_depth,
355355
# functions: Dict of special function names. Maps human readable string names to
356356
# single char names used in flist.
357357
# ops: Dict mapping op symbols (chars) to ExprOp instances.
358-
# solve_ops: Encodes rules for how to algebraicly cancel out each operation. See
358+
# solve_ops: Encodes rules for how to algebraically cancel out each operation. See
359359
# doc-string for `algebra_inverse_solve`.
360360
# int_encoder: Function that maps a string to a list of tokens. Use this to
361361
# encode an expression to feed into a model.
@@ -377,7 +377,7 @@ def math_dataset_init(alphabet_size=26, digits=None, functions=None):
377377
378378
Args:
379379
alphabet_size: How many possible variables there are. Max 52.
380-
digits: How many numerical digits to encode as tokens, "0" throuh
380+
digits: How many numerical digits to encode as tokens, "0" through
381381
str(digits-1), or None to encode no digits.
382382
functions: Defines special functions. A dict mapping human readable string
383383
names, like "log", "exp", "sin", "cos", etc., to single chars. Each

tensor2tensor/data_generators/speech_recognition.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16-
"""Common classes for automatic speech recogntion (ASR) datasets.
16+
"""Common classes for automatic speech recognition (ASR) datasets.
1717
1818
The audio import uses sox to generate normalized waveforms, please install
1919
it as appropriate (e.g. using apt-get or yum).

tensor2tensor/data_generators/text_encoder.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ def store_to_file(self, filename):
348348
def _escape_token(token, alphabet):
349349
"""Escape away underscores and OOV characters and append '_'.
350350
351-
This allows the token to be experessed as the concatenation of a list
351+
This allows the token to be expressed as the concatenation of a list
352352
of subtokens from the vocabulary. The underscore acts as a sentinel
353353
which allows us to invertibly concatenate multiple such lists.
354354

tensor2tensor/data_generators/translate_enzh.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ class TranslateEnzhWmt32k(translate.TranslateProblem):
172172
173173
CWMT:
174174
- http://nlp.nju.edu.cn/cwmt-wmt/
175-
- Website contrains instructions for FTP server access.
175+
- Website contains instructions for FTP server access.
176176
- You'll need to download CASIA, CASICT, DATUM2015, DATUM2017,
177177
NEU datasets
178178

tensor2tensor/data_generators/wiki.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def scramble_fraction(self):
190190

191191
@registry.register_problem
192192
class LanguagemodelWikiScrambleL1k(LanguagemodelWikiScramble):
193-
"""Sequence length 1024, 50% scrambed."""
193+
"""Sequence length 1024, 50% scrambled."""
194194

195195
@property
196196
def sequence_length(self):

tensor2tensor/layers/common_attention.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def register_layer(
8383
default_args (list): The default parameters to add to the function.
8484
default_kwargs (dict): The default parameters to add to the function.
8585
Those arguments can be overwritten when calling the function.
86-
use_dp (bool): Wrap the function call within a dataparalellism object if
86+
use_dp (bool): Wrap the function call within a dataparallelism object if
8787
dp is available. Some layers (like MOE) must be called without dp.
8888
recompute_grad (bool): If True, recompute the function during the
8989
backward pass to save memory
@@ -1378,7 +1378,7 @@ def _relative_attention_inner(x, y, z, transpose):
13781378
x: Tensor with shape [batch_size, heads, length, length or depth].
13791379
y: Tensor with shape [batch_size, heads, length, depth].
13801380
z: Tensor with shape [length, length, depth].
1381-
transpose: Whether to tranpose inner matrices of y and z. Should be true if
1381+
transpose: Whether to transpose inner matrices of y and z. Should be true if
13821382
last dimension of x is depth, not length.
13831383
13841384
Returns:
@@ -1422,7 +1422,7 @@ def dot_product_attention_relative(q,
14221422
k: a Tensor with shape [batch, heads, length, depth].
14231423
v: a Tensor with shape [batch, heads, length, depth].
14241424
bias: bias Tensor.
1425-
max_relative_position: an integer specifying the maxmimum distance between
1425+
max_relative_position: an integer specifying the maximum distance between
14261426
inputs that unique position embeddings should be learned for.
14271427
dropout_rate: a floating point number.
14281428
image_shapes: optional tuple of integer scalars.
@@ -2141,7 +2141,7 @@ def gather_indices_2d(x, block_shape, block_stride):
21412141

21422142

21432143
def make_2d_block_raster_mask(query_shape, memory_flange):
2144-
"""creates a mask for 2d block raster scany.
2144+
"""creates a mask for 2d block raster scan.
21452145
21462146
The query mask can look to the left, top left, top, and top right, but
21472147
not to the right. Inside the query, we have the standard raster scan
@@ -2661,7 +2661,7 @@ def ffn_self_attention_layer(x,
26612661
We use self-attention to do feedforward computations. We apply this function
26622662
positionwise where for each position, we linearly transform the output to have
26632663
depth filter_depth, and break up the result depth-wise into num_parts
2664-
contiguous parts. The parts self-attentd, we concatenate the results
2664+
contiguous parts. The parts self-attend, we concatenate the results
26652665
depth-wise, and we linearly transform to a depth of output_depth. The
26662666
goal is to get multiplicative interactions between components of a
26672667
representation.
@@ -2764,7 +2764,7 @@ def parameter_attention(x,
27642764
x, total_key_depth, use_bias=False, name="q_transform")
27652765
if dropout_rate:
27662766
# This is a cheaper form of attention dropout where we use to use
2767-
# the same dropout decisions across batch elemets and query positions,
2767+
# the same dropout decisions across batch elements and query positions,
27682768
# but different decisions across heads and memory positions.
27692769
v = tf.nn.dropout(
27702770
v, 1.0 - dropout_rate, noise_shape=[num_heads, memory_rows, 1])

tensor2tensor/layers/common_hparams.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,13 +102,13 @@ def basic_params1():
102102
moe_loss_coef=1e-2,
103103
# Sequences of operations to perform on layer input and layer output.
104104
# Used by common_layers.layer_preprocess, common_layers.layer_postprocess
105-
# Each character repsesnts an operation:
105+
# Each character represents an operation:
106106
# none: no preprocessing
107107
# d: apply dropout
108108
# n: apply normalization (see norm_type and norm_epsilon)
109109
# a: add layer input (residual connection - only during postprocess)
110110
# The special string "none" is used instead of the empty string
111-
# to indicate no pre/postprocesisng, since the empty string causes
111+
# to indicate no pre/postprocessing, since the empty string causes
112112
# trouble for hyperparameter tuning.
113113
# TODO(noam): The current settings ("", "dan") are the published version
114114
# of the transformer. ("n", "da") seems better for harder-to-learn
@@ -174,13 +174,13 @@ def basic_params1():
174174
# The maximum length of "input" sequence.
175175
# Sequences longer than this value will be truncated. 0 or negative values
176176
# mean there is no maximum or truncation.
177-
# You can change this behavior by overridding preprocess_example() method
177+
# You can change this behavior by overriding preprocess_example() method
178178
# in your problem class.
179179
max_input_seq_length=0,
180180
# The maximum length of "target" sequence.
181181
# Sequences longer than this value will be truncated. 0 or negative values
182182
# mean there is no maximum or truncation.
183-
# You can change this behavior by overridding preprocess_example() method
183+
# You can change this behavior by overriding preprocess_example() method
184184
# in your problem class.
185185
max_target_seq_length=0,
186186
# if nonzero, we split the target sequences on example read.

tensor2tensor/layers/common_layers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1201,7 +1201,7 @@ def add_timing_signal(x, min_timescale=1, max_timescale=1e4, num_timescales=16):
12011201
and the target of the attention.
12021202
12031203
The use of relative position is possible because sin(x+y) and cos(x+y) can be
1204-
experessed in terms of y, sin(x) and cos(x).
1204+
expressed in terms of y, sin(x) and cos(x).
12051205
12061206
In particular, we use a geometric sequence of timescales starting with
12071207
min_timescale and ending with max_timescale. For each timescale, we

tensor2tensor/models/research/attention_lm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ def attention_lm_prepare_decoder(targets, hparams):
7070
Returns:
7171
decoder_input: a Tensor, bottom of decoder stack
7272
decoder_self_attention_bias: a Tensor, containing large negative values
73-
to implement masked attention and possibly baises for diagonal alignments
73+
to implement masked attention and possibly biases for diagonal alignments
7474
"""
7575
if hparams.prepend_mode == "prepend_inputs_full_attention":
7676
decoder_self_attention_bias = (

0 commit comments

Comments
 (0)