From f52007e0c2b9da8cb41ca708f5a557c0c9fcfc43 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Sun, 22 Sep 2024 18:06:23 -0700 Subject: [PATCH 1/8] llava e2e 1/n --- freq_compare.py | 126 +++++++ llava_out.txt | 278 +++++++++++++++ torchchat/cli/convert_hf_checkpoint.py | 132 +++++++- torchchat/cli/download.py | 41 ++- torchchat/generate.py | 7 + torchchat/model.py | 450 ++++++++++++++++++++++++- torchchat/model_config/models.json | 6 + torchchat/model_params/llava-1.5.json | 3 +- 8 files changed, 1029 insertions(+), 14 deletions(-) create mode 100644 freq_compare.py create mode 100644 llava_out.txt diff --git a/freq_compare.py b/freq_compare.py new file mode 100644 index 000000000..13ecbdd2f --- /dev/null +++ b/freq_compare.py @@ -0,0 +1,126 @@ +import torch +from typing import Any, Dict, Optional, Tuple +from torchchat.utils.build_utils import find_multiple, get_precision + +# Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L77 +def hf_precompute_freqs_cis(dim: int, end: int, theta: float): + freqs = 1.0 / ( + theta + ** (torch.arange(0, dim, 2, device="cpu", dtype=torch.int64).float() / dim) + ) + # pyre-ignore Undefined attribute [16]: `float` has no attribute `device`. + t = torch.arange(end, device=freqs.device, dtype=torch.int64).type_as( + freqs # pyre-ignore + ) + freqs = torch.outer(t, freqs).float() # pyre-ignore + emb = torch.cat((freqs, freqs), dim=-1) + freqs_cos = torch.cos(emb) + freqs_sin = torch.sin(emb) + return freqs_cos, freqs_sin + + +def precompute_freqs_cis( + n_elem: int, + seq_len: int, + base: int = 10000, + dtype=None, + rope_scaling: Optional[Dict[str, Any]] = None, +): + if not dtype: + dtype = get_precision() + freqs = 1.0 / ( + base ** (torch.arange(0, n_elem, 2)[: (n_elem // 2)].float() / n_elem) + ) + t = torch.arange(seq_len, device=freqs.device) + if rope_scaling is not None: + freqs = apply_scaling(freqs, rope_scaling) + freqs = torch.outer(t, freqs) + freqs_cis = torch.polar(torch.ones_like(freqs), freqs) + cache = torch.stack([freqs_cis.real, freqs_cis.imag], dim=-1) + return cache.to(dtype=dtype) + +# Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L135 +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + + +def hf_apply_rotary_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1): + """Applies Rotary Position Embedding to the query and key tensors. + + Args: + q (`torch.Tensor`): The query tensor. + k (`torch.Tensor`): The key tensor. + cos (`torch.Tensor`): The cosine part of the rotary embedding. + sin (`torch.Tensor`): The sine part of the rotary embedding. + position_ids (`torch.Tensor`, *optional*): + Deprecated and unused. + unsqueeze_dim (`int`, *optional*, defaults to 1): + The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and + sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note + that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and + k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes + cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have + the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. + Returns: + `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. + """ + cos = cos.unsqueeze(unsqueeze_dim) + sin = sin.unsqueeze(unsqueeze_dim) + q_embed = (q * cos) + (rotate_half(q) * sin) + k_embed = (k * cos) + (rotate_half(k) * sin) + return q_embed, k_embed + +def apply_rotary_emb(x, freqs_cis): + xshaped = x.float().reshape(*x.shape[:-1], -1, 2) + freqs_cis = freqs_cis.view(1, xshaped.size(1), 1, xshaped.size(3), 2) + x_out2 = torch.stack( + [ + xshaped[..., 0] * freqs_cis[..., 0] - xshaped[..., 1] * freqs_cis[..., 1], + xshaped[..., 1] * freqs_cis[..., 0] + xshaped[..., 0] * freqs_cis[..., 1], + ], + -1, + ) + + x_out2 = x_out2.flatten(3) + return x_out2.type_as(x) + + +# 比较函数 +def compare_methods(): + torch.manual_seed(0) + x = torch.randn(1, 636, 32, 128) + + # 设置参数 + n_elem = 128 + seq_len = 1536 + base = 10000 + dtype = None + rope_scaling = None + + all_freq_cis = precompute_freqs_cis(n_elem, seq_len, base, dtype, rope_scaling) + input_pos = torch.arange( + x.shape[1], + device=x.device, + dtype=torch.int, + ) + freq_cis = all_freq_cis[input_pos] + x_out1 = apply_rotary_emb(x, freq_cis) + + + dim = 128 + end = 1536 + theta = 10000.0 + freqs_cos, freqs_sin = hf_precompute_freqs_cis(dim, end, theta) + fc, fs = freqs_cos[:x.shape[1]], freqs_sin[:x.shape[1]] + x_out2, _ = hf_apply_rotary_emb(x, x, fc, fs) + + print(x_out1) + print("************************") + print(x_out2) + + +if __name__ == "__main__": + compare_methods() diff --git a/llava_out.txt b/llava_out.txt new file mode 100644 index 000000000..ba363dc79 --- /dev/null +++ b/llava_out.txt @@ -0,0 +1,278 @@ +Preparing input +Done, Now creating model... +Transformer config: TransformerArgs(block_size=2048, vocab_size=32064, n_layers=32, n_heads=32, dim=4096, hidden_dim=11008, n_local_heads=32, head_dim=128, rope_base=10000, norm_eps=1e-05, multiple_of=256, ffn_dim_multiplier=None, use_tiktoken=False, max_seq_length=768, rope_scaling=None, n_stages=1, stage_idx=0) +Done. Now loading checkpoint... +Done. Now checkpoint remapping... +Done. Now setup caches... +input args for precompute_freqs_cis: +n_elem: 128 +seq_len: 4096 +base: 10000 +dtype: None +rope_scaling: None +tensor([1.0000e+00, 8.6596e-01, 7.4989e-01, 6.4938e-01, 5.6234e-01, 4.8697e-01, + 4.2170e-01, 3.6517e-01, 3.1623e-01, 2.7384e-01, 2.3714e-01, 2.0535e-01, + 1.7783e-01, 1.5399e-01, 1.3335e-01, 1.1548e-01, 1.0000e-01, 8.6596e-02, + 7.4989e-02, 6.4938e-02, 5.6234e-02, 4.8697e-02, 4.2170e-02, 3.6517e-02, + 3.1623e-02, 2.7384e-02, 2.3714e-02, 2.0535e-02, 1.7783e-02, 1.5399e-02, + 1.3335e-02, 1.1548e-02, 1.0000e-02, 8.6596e-03, 7.4989e-03, 6.4938e-03, + 5.6234e-03, 4.8697e-03, 4.2170e-03, 3.6517e-03, 3.1623e-03, 2.7384e-03, + 2.3714e-03, 2.0535e-03, 1.7783e-03, 1.5399e-03, 1.3335e-03, 1.1548e-03, + 1.0000e-03, 8.6596e-04, 7.4989e-04, 6.4938e-04, 5.6234e-04, 4.8697e-04, + 4.2170e-04, 3.6517e-04, 3.1623e-04, 2.7384e-04, 2.3714e-04, 2.0535e-04, + 1.7783e-04, 1.5399e-04, 1.3335e-04, 1.1548e-04], device='cuda:0') +tensor([ 0, 1, 2, ..., 4093, 4094, 4095], device='cuda:0') +Done. Now running prefilling inference... +Hidden state before layer 0 is: +torch.Size([1, 636, 4096]) +tensor([[[ 0.0045, -0.0038, 0.0017, ..., -0.0088, 0.0025, -0.0025], + [-0.0112, -0.0129, -0.0121, ..., 0.0090, 0.0118, -0.0081], + [ 0.0195, -0.0058, 0.0061, ..., 0.0171, -0.0052, -0.0212], + ..., + [-0.0187, -0.0017, 0.0177, ..., 0.0238, 0.0052, 0.0101], + [ 0.0066, -0.0161, 0.0117, ..., -0.0103, 0.0148, 0.0073], + [ 0.0039, 0.0015, 0.0055, ..., -0.0042, 0.0151, 0.0024]]], + device='cuda:0', grad_fn=) +**************************************************************************************************** +q and k, before apply_rotary_emb: +torch.Size([1, 636, 32, 128]) +tensor([[[[ 8.8137e-02, -2.7958e-01, 5.8506e-01, ..., -2.7226e+00, + 1.4709e+00, -2.3085e-01], + [-1.3625e-01, 2.3757e-01, -5.1018e-01, ..., 4.1010e-01, + -5.6383e-01, 4.5404e-01], + [-3.4834e-03, 3.2426e-01, 3.8369e-01, ..., 2.9280e-01, + 2.8449e-01, 2.7460e-01], + ..., + [ 5.8234e-01, -1.2618e+00, -3.6595e-01, ..., 1.3358e+00, + 4.3184e-01, 1.0434e+00], + [ 5.9955e-01, 7.9891e-01, -4.6955e-01, ..., -4.0199e-01, + 4.2534e-01, 3.9606e-01], + [-1.1633e+00, 1.2683e+00, -2.0897e-01, ..., -1.1549e-01, + 5.5778e-01, 1.6868e-01]], + + [[ 1.2024e-01, -1.0154e+00, 1.6093e+00, ..., -5.5972e-01, + 3.1489e-01, -8.1850e-01], + [ 4.2993e-01, 2.0342e-01, -1.0682e+00, ..., -2.5782e-01, + 1.5268e-02, -2.4300e-01], + [-5.3137e-01, -6.0827e-01, -7.6472e-01, ..., -3.1512e-01, + -2.7736e-01, -2.5732e-01], + ..., + [ 2.8136e-01, -5.2945e-01, -1.7826e-01, ..., 1.1955e+00, + 5.4279e-01, 8.5956e-01], + [ 5.3614e-01, 2.0246e+00, -1.2608e+00, ..., -3.7489e-01, + 6.3723e-01, 6.5132e-01], + [-1.8604e+00, -1.2568e+00, -1.6225e+00, ..., -6.7318e-02, + 1.0279e-01, 9.0100e-02]], + + [[ 7.2795e-02, -1.4323e+00, 2.2321e+00, ..., -7.5136e-01, + 5.6175e-01, -9.5831e-01], + [ 1.1465e-01, 1.6517e-01, -4.7936e-01, ..., 4.2424e-01, + -9.5693e-01, 5.2483e-01], + [ 5.8809e-01, -2.5397e-02, -8.4822e-01, ..., 4.9272e-01, + 4.7836e-01, 5.0513e-01], + ..., + [ 4.3381e-01, -3.1210e+00, -1.0508e+00, ..., 1.3402e+00, + 1.0287e+00, 2.3169e+00], + [ 9.3764e-01, 2.3067e+00, -1.5072e+00, ..., -3.8047e-01, + 6.4796e-01, 6.6232e-01], + [-9.0192e-01, -3.0670e-01, -8.5899e-01, ..., -1.1170e-01, + 3.3573e-01, -6.0066e-01]], + + ..., + + [[ 1.8439e-01, -1.0198e+00, 1.9433e+00, ..., -9.0137e-01, + 7.8104e-01, -1.0152e+00], + [ 1.5872e-01, 8.4707e-02, -6.7571e-01, ..., 3.9261e-01, + -6.9446e-01, 4.2872e-01], + [ 3.6119e-01, 3.0833e-01, -3.7900e-01, ..., 8.6110e-02, + 8.1632e-02, 1.0222e-01], + ..., + [-1.6853e-02, -1.1853e+00, -2.1825e-01, ..., 1.2019e+00, + 3.7029e-01, 1.3317e+00], + [ 7.4456e-01, 1.8874e+00, -1.2402e+00, ..., -3.5604e-01, + 6.1179e-01, 6.2754e-01], + [-1.2021e+00, -6.7119e-01, -1.1290e+00, ..., -4.6737e-02, + -2.0528e-02, -1.7886e-01]], + + [[ 3.5354e-01, -8.3204e-01, 1.7218e+00, ..., -7.6154e-01, + 6.9443e-01, -8.8789e-01], + [ 1.0955e-01, 9.3493e-03, -6.4744e-01, ..., 3.6177e-01, + -6.0106e-01, 3.8051e-01], + [ 3.2930e-01, 2.9724e-01, -3.5446e-01, ..., -1.3412e-03, + -8.3974e-03, 7.1401e-03], + ..., + [-1.1190e-01, -7.5404e-01, -1.2496e-01, ..., 1.2401e+00, + 3.3029e-01, 1.1143e+00], + [ 6.3004e-01, 1.7134e+00, -1.1184e+00, ..., -3.2237e-01, + 5.6728e-01, 5.7957e-01], + [-1.2236e+00, -6.8457e-01, -1.1400e+00, ..., -2.8917e-02, + -2.6101e-02, -1.6053e-01]], + + [[ 2.2438e-01, 4.1760e-01, 3.5730e-01, ..., -4.1907e-01, + 1.9779e-01, -3.7496e-01], + [ 3.3656e-01, -5.3128e-02, -1.4364e+00, ..., -2.9568e-01, + 2.5090e-01, -3.4443e-01], + [-5.7520e-01, -2.8830e-01, -1.8663e-01, ..., -3.3193e-01, + -2.9675e-01, -2.8231e-01], + ..., + [ 2.1342e-01, -3.2742e-01, -5.2910e-02, ..., 1.8387e+00, + 4.8990e-01, 8.7483e-01], + [ 4.3541e-01, 1.9233e+00, -1.2351e+00, ..., -3.8134e-01, + 6.0760e-01, 6.1868e-01], + [-2.0587e+00, -1.5213e+00, -1.8633e+00, ..., -9.8056e-02, + 1.6597e-01, 3.9716e-01]]]], device='cuda:0', + grad_fn=) +torch.Size([1, 636, 32, 128]) +tensor([[[[-0.4032, -0.0167, 0.0300, ..., 0.0826, -0.0706, -0.0707], + [ 1.1458, 0.9121, -0.3251, ..., 0.5441, -0.2957, 0.5221], + [ 0.0237, -0.2481, -0.3345, ..., -0.1705, 0.3765, 0.5979], + ..., + [-0.0205, -0.0151, 0.0137, ..., 0.2737, 0.8049, -0.4081], + [ 0.1832, -0.4518, -0.2057, ..., -0.1818, 0.0770, 0.0802], + [-0.3089, 1.2236, 0.1609, ..., 0.6910, 0.1255, 0.2735]], + + [[-0.2899, -0.0028, 0.0520, ..., 0.2636, -0.2013, 0.2681], + [ 0.6155, 0.8224, 0.3304, ..., -0.4969, 0.1219, -0.3481], + [-0.2023, 0.1655, 0.3203, ..., 1.6260, 1.6661, 1.5222], + ..., + [-1.2277, -0.3016, -0.8174, ..., -0.5467, -0.9489, -1.1181], + [-0.4164, -0.4066, -0.4455, ..., -0.3053, 0.0722, 0.0750], + [ 0.0506, -0.1478, -0.3156, ..., -1.2014, 0.4634, -0.1019]], + + [[ 0.3341, -0.5947, 0.6357, ..., 0.0513, 0.2727, 0.1729], + [ 1.0427, 0.6447, -0.7157, ..., -0.0696, 0.3202, -0.1045], + [-0.4113, -0.5721, -0.3090, ..., -0.4902, -0.5272, -0.4597], + ..., + [-0.2218, 0.0770, -0.0610, ..., -0.1540, 0.4397, -0.2679], + [ 0.2970, -0.5468, -1.3979, ..., 0.4245, -0.3060, -0.3003], + [-1.2661, -1.0212, -1.4402, ..., 1.4839, -0.7005, 0.4571]], + + ..., + + [[ 0.4322, -0.5403, 0.3777, ..., 0.1544, 0.0359, 0.1695], + [ 0.8611, 0.6608, -0.4672, ..., 0.4453, 0.0383, 0.3679], + [ 0.1159, -0.2886, -0.3874, ..., -0.8130, -0.8705, -0.8258], + ..., + [-0.5046, -0.0915, -0.3287, ..., -0.2244, -0.0374, -0.5486], + [ 0.3189, -0.4997, -0.9601, ..., 0.3986, -0.3088, -0.3109], + [-0.6580, -0.5876, -1.0235, ..., 0.7111, -0.6454, 0.2257]], + + [[-0.0356, 0.1517, -0.2355, ..., 0.3998, -0.2553, 0.4541], + [ 0.7241, 0.6216, -0.2981, ..., 0.3790, 0.0313, 0.3070], + [ 0.1365, -0.2315, -0.2407, ..., -0.7944, -0.8674, -0.8289], + ..., + [-0.5110, -0.1377, -0.3427, ..., -0.1021, -0.0801, -0.5258], + [ 0.2288, -0.2831, -0.8728, ..., 0.2669, -0.1687, -0.1729], + [-0.3816, -0.4067, -0.7615, ..., 0.4735, -0.5601, 0.1255]], + + [[-0.1380, -0.6229, 0.3423, ..., 0.1316, -0.1960, 0.2663], + [ 0.6976, 1.1722, 0.2912, ..., 0.2839, -0.1349, 0.3306], + [ 0.0694, 0.2533, 0.1255, ..., 1.4579, 1.5409, 1.4390], + ..., + [-1.4628, -0.3196, -1.0152, ..., -0.4717, -1.1734, -1.2208], + [-0.5177, -0.4918, -0.5488, ..., -0.5367, 0.2164, 0.2186], + [ 0.4723, 0.3053, 0.1405, ..., -1.6717, 0.3979, -0.3566]]]], + device='cuda:0', grad_fn=) +q and k, after apply_rotary_emb: +torch.Size([1, 636, 32, 128]) +tensor([[[[ 0.0881, -0.2796, 0.5851, ..., -2.7226, 1.4709, -0.2308], + [-0.1363, 0.2376, -0.5102, ..., 0.4101, -0.5638, 0.4540], + [-0.0035, 0.3243, 0.3837, ..., 0.2928, 0.2845, 0.2746], + ..., + [ 0.5823, -1.2618, -0.3660, ..., 1.3358, 0.4318, 1.0434], + [ 0.5995, 0.7989, -0.4695, ..., -0.4020, 0.4253, 0.3961], + [-1.1633, 1.2683, -0.2090, ..., -0.1155, 0.5578, 0.1687]], + + [[ 0.9194, -0.4475, 0.5493, ..., -0.5596, 0.3150, -0.8185], + [ 0.0611, 0.4717, -0.4997, ..., -0.2578, 0.0153, -0.2430], + [ 0.2247, -0.7758, -0.5275, ..., -0.3152, -0.2773, -0.2574], + ..., + [ 0.5975, -0.0493, -0.0846, ..., 1.1953, 0.5427, 0.8596], + [-1.4139, 1.5450, 0.5793, ..., -0.3748, 0.6372, 0.6514], + [ 0.0524, -2.2445, -0.1922, ..., -0.0674, 0.1028, 0.0901]], + + [[ 1.2721, 0.6622, -1.6639, ..., -0.7511, 0.5620, -0.9582], + [-0.1979, 0.0355, 0.1847, ..., 0.4243, -0.9571, 0.5246], + [-0.2216, 0.5453, 1.0939, ..., 0.4928, 0.4782, 0.5052], + ..., + [ 2.6574, 1.6933, -0.3208, ..., 1.3397, 1.0281, 2.3172], + [-2.4877, -0.1074, 2.4119, ..., -0.3803, 0.6478, 0.6625], + [ 0.6542, -0.6925, 0.6528, ..., -0.1118, 0.3359, -0.6006]], + + ..., + + [[-1.0250, -0.1528, -0.9315, ..., -0.8048, 0.8531, -0.9555], + [ 0.0798, -0.1613, 0.1087, ..., 0.3938, -0.7239, 0.3769], + [ 0.2970, -0.3706, 0.7458, ..., 0.0809, 0.0739, 0.1079], + ..., + [-1.1842, 0.0535, -0.0299, ..., 1.0856, 0.2720, 1.3552], + [ 1.8635, -0.8026, 1.8737, ..., -0.2931, 0.5643, 0.6705], + [-0.6337, 1.2223, 0.5544, ..., -0.0724, -0.0074, -0.1799]], + + [[-0.1795, -0.8860, -1.8381, ..., -0.6720, 0.7575, -0.8347], + [ 0.0956, -0.0543, 0.5367, ..., 0.3598, -0.6273, 0.3355], + [ 0.4397, 0.0586, 0.7529, ..., -0.0108, -0.0089, 0.0065], + ..., + [-0.5191, -0.5583, 0.0824, ..., 1.1312, 0.2479, 1.1355], + [ 1.4893, 1.0558, 2.0221, ..., -0.2650, 0.5234, 0.6195], + [-1.3962, 0.1283, 1.2648, ..., -0.0525, -0.0143, -0.1620]], + + [[ 0.0449, 0.4719, -0.3696, ..., -0.3603, 0.2247, -0.3595], + [ 0.3308, 0.0816, 1.4085, ..., -0.2712, 0.2755, -0.3251], + [-0.4184, -0.4888, 0.2117, ..., -0.3717, -0.2753, -0.3033], + ..., + [ 0.3237, -0.2190, 0.0355, ..., 1.7209, 0.4245, 0.9084], + [-0.3446, 1.9417, 1.0332, ..., -0.3204, 0.5606, 0.6615], + [-1.3076, -2.2007, 1.7241, ..., -0.1305, 0.1364, 0.4083]]]], + device='cuda:0', grad_fn=) +torch.Size([1, 636, 32, 128]) +tensor([[[[-0.4032, -0.0167, 0.0300, ..., 0.0826, -0.0706, -0.0707], + [ 1.1458, 0.9121, -0.3251, ..., 0.5441, -0.2957, 0.5221], + [ 0.0237, -0.2481, -0.3345, ..., -0.1705, 0.3765, 0.5979], + ..., + [-0.0205, -0.0151, 0.0137, ..., 0.2737, 0.8049, -0.4081], + [ 0.1832, -0.4518, -0.2057, ..., -0.1818, 0.0770, 0.0802], + [-0.3089, 1.2236, 0.1609, ..., 0.6910, 0.1255, 0.2735]], + + [[-0.1543, -0.2455, 0.4159, ..., 0.2636, -0.2013, 0.2681], + [-0.3594, 0.9623, 0.6977, ..., -0.4968, 0.1220, -0.3481], + [-0.2486, -0.0808, 0.1940, ..., 1.6261, 1.6660, 1.5224], + ..., + [-0.4096, -1.1961, -1.5964, ..., -0.5465, -0.9488, -1.1182], + [ 0.1171, -0.5701, -0.2101, ..., -0.3053, 0.0722, 0.0750], + [ 0.1517, -0.0373, -0.1310, ..., -1.2013, 0.4634, -0.1018]], + + [[ 0.4018, 0.5513, -0.0035, ..., 0.0514, 0.2727, 0.1729], + [-1.0201, 0.6798, 0.7916, ..., -0.0699, 0.3202, -0.1044], + [ 0.6914, -0.1359, -0.4204, ..., -0.4903, -0.5271, -0.4598], + ..., + [ 0.0223, -0.2337, 0.3496, ..., -0.1539, 0.4398, -0.2678], + [ 0.3736, 0.4976, 0.1018, ..., 0.4244, -0.3059, -0.3004], + [ 1.4554, -0.7262, 1.1798, ..., 1.4838, -0.7006, 0.4569]], + + ..., + + [[-0.5534, -0.4153, 0.1699, ..., 0.1490, 0.0235, 0.1717], + [ 0.6339, -0.8811, 0.5064, ..., 0.3630, 0.0113, 0.3697], + [-0.2921, -0.1069, -0.0323, ..., -0.8665, -0.8079, -0.8872], + ..., + [-0.0759, 0.5072, -0.4380, ..., -0.1906, 0.0027, -0.5499], + [-0.5093, -0.3033, -0.3336, ..., 0.3695, -0.2852, -0.3327], + [-0.5670, 0.6759, 0.3940, ..., 0.6923, -0.6601, 0.1780]], + + [[ 0.0565, 0.1452, 0.6558, ..., 0.3668, -0.2878, 0.4342], + [ 0.9488, 0.1026, 0.5222, ..., 0.3105, 0.0088, 0.3085], + [-0.0185, -0.2681, 0.1706, ..., -0.8421, -0.8044, -0.8901], + ..., + [-0.4992, 0.1758, -0.0446, ..., -0.0725, -0.0414, -0.5302], + [ 0.0284, -0.3629, 0.5567, ..., 0.2499, -0.1556, -0.1848], + [-0.5448, -0.1193, 0.7166, ..., 0.4652, -0.5677, 0.0842]], + + [[ 0.1144, -0.6277, -0.3179, ..., 0.1037, -0.2150, 0.2512], + [ 0.1883, 1.3510, -0.3788, ..., 0.3876, -0.1587, 0.3198], + [-0.0343, 0.2604, -0.1608, ..., 1.5246, 1.4313, 1.5480], + ..., + [-1.2244, -0.8619, 1.1993, ..., -0.3920, -1.0808, -1.3035], + [-0.2864, -0.6541, 0.5345, ..., -0.5218, 0.1998, 0.2339], + [ 0.3169, 0.4646, -0.0869, ..., -1.6035, 0.4230, -0.3265]]]], + device='cuda:0', grad_fn=) diff --git a/torchchat/cli/convert_hf_checkpoint.py b/torchchat/cli/convert_hf_checkpoint.py index adf27885d..3357bf7e3 100644 --- a/torchchat/cli/convert_hf_checkpoint.py +++ b/torchchat/cli/convert_hf_checkpoint.py @@ -19,9 +19,124 @@ from torchchat.model import ModelArgs +def remap_llava_checkpoint(llava_ckpt): + def _translate_state_dict_for_vision_model(hf_state_dict) -> Dict[str, Any]: + translated_state_dict = {} + hf_weight_prefix = "vision_model." + name_mapping = { + f"{hf_weight_prefix}embeddings.class_embedding": "model.encoder.cls_token_embedding.weight", + f"{hf_weight_prefix}embeddings.position_embedding.weight": "model.encoder.token_pos_embedding.positional_embedding", + f"{hf_weight_prefix}embeddings.patch_embedding.weight": "model.encoder.conv.weight", + f"{hf_weight_prefix}pre_layrnorm.weight": "model.encoder.ln_pre.weight", + f"{hf_weight_prefix}pre_layrnorm.bias": "model.encoder.ln_pre.bias", + f"{hf_weight_prefix}post_layernorm.weight": "model.encoder.ln_post.weight", + f"{hf_weight_prefix}post_layernorm.bias": "model.encoder.ln_post.bias", + } + patterns = [ + ( + rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.(k|q|v)_proj\.(weight|bias)", + lambda match: f"model.encoder.layers.{match.group(1)}.attn.{match.group(2)}_proj.{match.group(3)}", + ), + ( + rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.out_proj\.(weight|bias)", + lambda match: f"model.encoder.layers.{match.group(1)}.attn.output_proj.{match.group(2)}", + ), + ( + rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.mlp\.fc(1|2)\.(weight|bias)", + lambda match: f"model.encoder.layers.{match.group(1)}.mlp.w{match.group(2)}.{match.group(3)}", + ), + ( + rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm1\.(weight|bias)", + lambda match: f"model.encoder.layers.{match.group(1)}.sa_norm.{match.group(2)}", + ), + ( + rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm2\.(weight|bias)", + lambda match: f"model.encoder.layers.{match.group(1)}.mlp_norm.{match.group(2)}", + ), + ] + for pattern, replacement in patterns: + for key in list(hf_state_dict.keys()): + if re.match(pattern, key): + new_key = re.sub(pattern, replacement, key) + name_mapping[key] = new_key + temp_state_dict = {} + for k, v in hf_state_dict.items(): + new_k = name_mapping.get(k, k) + if "in_proj_weight" in new_k or "in_proj_bias" in new_k: + if new_k not in temp_state_dict: + temp_state_dict[new_k] = {"q": None, "k": None, "v": None} + if "q_proj" in k: + temp_state_dict[new_k]["q"] = v + elif "k_proj" in k: + temp_state_dict[new_k]["k"] = v + elif "v_proj" in k: + temp_state_dict[new_k]["v"] = v + else: + temp_state_dict[new_k] = v + for k, v in temp_state_dict.items(): + if isinstance(v, dict): + translated_state_dict[k] = torch.cat([v["q"], v["k"], v["v"]], dim=0) + else: + translated_state_dict[k] = v + return translated_state_dict + + def _translate_state_dict_for_text_model(hf_state_dict) -> Dict[str, Any]: + key_map = { + r"model.layers.([0-9]+).self_attn.q_proj.": r"model.decoder.layers.\1.attention.wq.", + r"model.layers.([0-9]+).self_attn.k_proj.": r"model.decoder.layers.\1.attention.wk.", + r"model.layers.([0-9]+).self_attn.v_proj.": r"model.decoder.layers.\1.attention.wv.", + r"model.layers.([0-9]+).self_attn.o_proj.": r"model.decoder.layers.\1.attention.wo.", + r"model.layers.([0-9]+).input_layernorm.": r"model.decoder.layers.\1.attention_norm.", + r"model.layers.([0-9]+).mlp.gate_proj.": r"model.decoder.layers.\1.feed_forward.w1.", + r"model.layers.([0-9]+).mlp.down_proj.": r"model.decoder.layers.\1.feed_forward.w2.", + r"model.layers.([0-9]+).mlp.up_proj.": r"model.decoder.layers.\1.feed_forward.w3.", + r"model.layers.([0-9]+).post_attention_layernorm.": r"model.decoder.layers.\1.ffn_norm.", + r"model.norm.": r"model.decoder.norm.", + # r"model.embed_tokens.": r"tok_embeddings.", # load separately + r"lm_head.": r"model.decoder.output.", + } + new_state_dict = {} + def get_new_key(old_key: str) -> str: + for old_pattern, replacement in key_map.items(): + if (new_key := re.sub(old_pattern, replacement, old_key)) != old_key: + return new_key + return old_key + for old_key in hf_state_dict.keys(): + new_key = get_new_key(old_key) + new_state_dict[new_key] = hf_state_dict[old_key] + return new_state_dict + + def _translate_state_dict_for_mm_projector_model(hf_state_dict) -> Dict[str, Any]: + new_state_dict = {} + for old_key in hf_state_dict.keys(): + new_key = "model.mm_projector." + old_key + new_state_dict[new_key] = hf_state_dict[old_key] + return new_state_dict + + def split_checkpoint(llava_ckpt): + language_model_ckpt = {} + multi_modal_ckpt = {} + vision_tower_ckpt = {} + for key, value in llava_ckpt.items(): + if key.startswith("language_model"): + language_model_ckpt[key[len("language_model") + 1:]] = value + elif key.startswith("multi_modal_projector"): + multi_modal_ckpt[key[len("multi_modal_projector") + 1:]] = value + elif key.startswith("vision_tower"): + vision_tower_ckpt[key[len("vision_tower") + 1:]] = value + return language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt + language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt = split_checkpoint(llava_ckpt) + remapped_state_dict = { + "model.tok_embeddings.weight": language_model_ckpt.pop("model.embed_tokens.weight"), + } + remapped_state_dict.update(_translate_state_dict_for_text_model(language_model_ckpt)) + remapped_state_dict.update(_translate_state_dict_for_vision_model(vision_tower_ckpt)) + remapped_state_dict.update(_translate_state_dict_for_mm_projector_model(multi_modal_ckpt)) + return remapped_state_dict + @torch.inference_mode() -def convert_hf_checkpoint( +def convert_text_only_hf_checkpoint( *, model_dir: Optional[Path] = None, model_name: Optional[str] = None, @@ -129,6 +244,21 @@ def permute(w, n_heads): os.remove(file) +@torch.inference_mode() +def convert_text_only_hf_checkpoint( + *, + model_dir: Optional[Path] = None, + model_name: Optional[str] = None, + remove_bin_files: bool = False, +): + if model_name == "llava-1.5": + print("Converting LLaVA 1.5 checkpoint.") + print(os.listdir(model_dir)) + exit(0) + else: + convert_text_only_hf_checkpoint(model_dir, model_name, remove_bin_files) + + if __name__ == "__main__": import argparse diff --git a/torchchat/cli/download.py b/torchchat/cli/download.py index 4a8f43515..6ea28ddd5 100644 --- a/torchchat/cli/download.py +++ b/torchchat/cli/download.py @@ -28,13 +28,34 @@ def _download_hf_snapshot( # Download and store the HF model artifacts. print(f"Downloading {model_config.name} from HuggingFace...", file=sys.stderr) try: - snapshot_download( - model_config.distribution_path, - local_dir=artifact_dir, - local_dir_use_symlinks=False, - token=hf_token, - ignore_patterns="*safetensors*", + + import huggingface_hub + # 定义模型名称和版本 + model_name = "llava-hf/llava-1.5-7b-hf" + # 下载模型checkpoint + repo_id = model_name + revision = "main" # 默认分支 + # 强制重新下载 + snapshot_dir = huggingface_hub.snapshot_download( + repo_id=repo_id, + revision=revision, + cache_dir=artifact_dir, + force_download=True, ) + print(f"模型下载完成,保存在 {snapshot_dir} 目录下") + + + # snapshot_download( + # model_config.distribution_path, + # cache_dir=artifact_dir, + # local_dir_use_symlinks=False, + # token=hf_token, + # ignore_patterns="*safetensors*", + # ) + print("*****************") + print(os.listdir(artifact_dir)) + shutil.copytree(artifact_dir, "/home/gasoonjia/download/hahaha") + exit(0) except HTTPError as e: if e.response.status_code == 401: # Missing HuggingFace CLI login. print( @@ -78,11 +99,17 @@ def download_and_convert( # location once the download and conversion is complete. This # allows recovery in the event that the download or conversion # fails unexpectedly. - temp_dir = models_dir / "downloads" / model_config.name + # temp_dir = models_dir / "downloads" / model_config.name + temp_dir = Path("/home/gasoonjia") / "downloads" / model_config.name + if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) os.makedirs(temp_dir, exist_ok=True) + print("**************************************************") + print("**************************************************") + print("temp dir: ", temp_dir) + try: if ( model_config.distribution_channel diff --git a/torchchat/generate.py b/torchchat/generate.py index 9e60f9494..68188f513 100644 --- a/torchchat/generate.py +++ b/torchchat/generate.py @@ -723,6 +723,13 @@ def chat( ): if generator_args.chat_mode: print("Starting Interactive Chat") + + print("Generator Args:") + print(generator_args) + print("Builder Args:") + print(self.builder_args) + + exit(0) if generator_args.image_prompts is not None: print("Image prompts", generator_args.image_prompts) diff --git a/torchchat/model.py b/torchchat/model.py index a576d5036..052e5f4a3 100644 --- a/torchchat/model.py +++ b/torchchat/model.py @@ -56,6 +56,51 @@ def identity(**kwargs): return list(kwargs.values())[0] +# Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L77 +def hf_precompute_freqs_cis(dim: int, end: int, theta: float, **kwargs): + freqs = 1.0 / ( + theta + ** (torch.arange(0, dim, 2, dtype=torch.int64).float() / dim) + ) + # pyre-ignore Undefined attribute [16]: `float` has no attribute `device`. + t = torch.arange(end, device=freqs.device, dtype=torch.int64).type_as( + freqs # pyre-ignore + ) + freqs = torch.outer(t, freqs).float() # pyre-ignore + emb = torch.cat((freqs, freqs), dim=-1) + freqs_cos = torch.cos(emb) + freqs_sin = torch.sin(emb) + return torch.stack((freqs_cos, freqs_sin), dim=-1) + +# Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L135 +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + + +def hf_apply_rotary_emb(x, freq_cis, unsqueeze_dim=1, **kwargs): + """Applies Rotary Position Embedding to the query and key tensors. + + Args: + q (`torch.Tensor`): The query tensor. + k (`torch.Tensor`): The key tensor. + cos (`torch.Tensor`): The cosine part of the rotary embedding. + sin (`torch.Tensor`): The sine part of the rotary embedding. + unsqueeze_dim (`int`, *optional*, defaults to 1): + The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and + sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note + that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and + k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes + cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have + the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. + Returns: + `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. + """ + cos = freq_cis[..., 0].unsqueeze(unsqueeze_dim) + sin = freq_cis[..., 1].unsqueeze(unsqueeze_dim) + return (x * cos) + (rotate_half(x) * sin) class MultiModalProjector(nn.Module): def __init__(self, in_channels: int, out_channels: int, act: nn.Module): @@ -126,7 +171,10 @@ def forward( dtype=torch.int, ) - return self.decoder(decoder_input, input_pos=input_pos) + return decoder_input.shape[1], self.decoder(decoder_input, input_pos=input_pos) + else: + return self.decoder(decoder_input, input_pos=input_pos) + def setup_caches(self, batch_size, max_seq_len) -> None: self.decoder.setup_caches(batch_size, max_seq_len) @@ -148,7 +196,7 @@ def _get_decoder_input( ) -> Tensor: if encoder_output is None: assert post_tokens is None - return self.tok_embeddings(tokens) + return self.tok_embeddings(tokens).unsqueeze(0) else: pre_img_embed = self.tok_embeddings(tokens) image_embeds = self.mm_projector(encoder_output) @@ -262,6 +310,7 @@ class TransformerArgs: use_tiktoken: bool = False max_seq_length: int = 8192 rope_scaling: Optional[Dict[str, Any]] = None + use_hf_rope: bool = False # For pipeline parallel n_stages: int = 1 stage_idx: int = 0 @@ -607,6 +656,11 @@ def __init__(self, config: TransformerArgs) -> None: self.max_seq_length = -1 # For supporting sequence parallel (default is off, thus value of 1) self.seq_parallel_degree = 1 + if config.use_hf_rope: + self.precompute_freqs_cis = hf_precompute_freqs_cis + else: + self.precompute_freqs_cis = precompute_freqs_cis + def setup_caches(self, max_batch_size, max_seq_length): if ( @@ -625,7 +679,7 @@ def setup_caches(self, max_batch_size, max_seq_length): max_batch_size, max_seq_length, ) - freqs_cis = precompute_freqs_cis( + freqs_cis = self.precompute_freqs_cis( self.config.dim // self.config.n_heads, self.config.block_size * 2, self.config.rope_base, @@ -737,6 +791,10 @@ def __init__(self, config: TransformerArgs): self.n_local_heads = config.n_local_heads self.dim = config.dim self._register_load_state_dict_pre_hook(self.load_hook) + if config.use_hf_rope: + self.apply_rotary_emb = hf_apply_rotary_emb + else: + self.apply_rotary_emb = apply_rotary_emb def setup_cache(self, max_batch_size, max_seq_length): n_local_heads = self.n_local_heads @@ -825,8 +883,8 @@ def forward( # -1 = self.n_local_heads v = v.view(bsz, seqlen, -1, self.head_dim) - q = apply_rotary_emb(q, freqs_cis) - k = apply_rotary_emb(k, freqs_cis) + q = self.apply_rotary_emb(q, freqs_cis) + k = self.apply_rotary_emb(k, freqs_cis) q, k, v = (x.transpose(1, 2) for x in (q, k, v)) @@ -987,3 +1045,385 @@ def setup_caches(self, max_batch_size, max_seq_length): except: pass + + +if __name__ == "__main__": + import re + from PIL import Image + import requests + + def prepare_image(target_h: int, target_w: int) -> torch.Tensor: + """Read image into a tensor and resize the image so that it fits in + a target_h x target_w canvas. + + Args: + image (Image): An Image object. + target_h (int): Target height. + target_w (int): Target width. + + Returns: + torch.Tensor: resized image tensor. + """ + image = Image.open( + requests.get( + "https://llava-vl.github.io/static/images/view.jpg", stream=True + ).raw) + + img = torchvision.transforms.functional.pil_to_tensor(image) + # height ratio + ratio_h = img.shape[1] / target_h + # width ratio + ratio_w = img.shape[2] / target_w + # resize the image so that it fits in a target_h x target_w canvas + ratio = max(ratio_h, ratio_w) + output_size = (int(img.shape[1] / ratio), int(img.shape[2] / ratio)) + img = torchvision.transforms.Resize(size=output_size)(img) + return img + + + def image_preprocess(img: torch.Tensor, target_h: int, target_w: int, rescale_factor, image_mean, image_std) -> torch.Tensor: + # pad the image with median rgb value, to make a square + l_pad = (target_w - img.shape[2]) // 2 + t_pad = (target_h - img.shape[1]) // 2 + # ceil division + r_pad = -((target_w - img.shape[2]) // -2) + b_pad = -((target_h - img.shape[1]) // -2) + + torch._check(l_pad >= 0) + torch._check(t_pad >= 0) + torch._check(r_pad >= 0) + torch._check(b_pad >= 0) + + # This is different from the original implementation, due to export limitations. + resized = torch.nn.functional.pad( + img, + (l_pad, r_pad, t_pad, b_pad), + ) + + scaled = resized * rescale_factor + from torchvision.transforms.v2 import functional as tvF + normed = tvF.normalize( + scaled, image_mean, image_std + ) + return normed.unsqueeze(0) + + + # def checkpoint_remap(llava_model, llava_ckpt): + # def _translate_state_dict_for_vision_model(hf_state_dict) -> Dict[str, Any]: + # translated_state_dict = {} + + # # Define the mapping from old names to new names + # hf_weight_prefix = "vision_model." + # name_mapping = { + # f"{hf_weight_prefix}embeddings.class_embedding": "cls_token_embedding.weight", + # f"{hf_weight_prefix}embeddings.position_embedding.weight": "token_pos_embedding.positional_embedding", + # f"{hf_weight_prefix}embeddings.patch_embedding.weight": "conv.weight", + # f"{hf_weight_prefix}pre_layrnorm.weight": "ln_pre.weight", + # f"{hf_weight_prefix}pre_layrnorm.bias": "ln_pre.bias", + # f"{hf_weight_prefix}post_layernorm.weight": "ln_post.weight", + # f"{hf_weight_prefix}post_layernorm.bias": "ln_post.bias", + # } + + # # Use regular expressions to define the mapping for each layer + # patterns = [ + # ( + # rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.(k|q|v)_proj\.(weight|bias)", + # lambda match: f"layers.{match.group(1)}.attn.{match.group(2)}_proj.{match.group(3)}", + # ), + # ( + # rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.out_proj\.(weight|bias)", + # lambda match: f"layers.{match.group(1)}.attn.output_proj.{match.group(2)}", + # ), + # ( + # rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.mlp\.fc(1|2)\.(weight|bias)", + # lambda match: f"layers.{match.group(1)}.mlp.w{match.group(2)}.{match.group(3)}", + # ), + # ( + # rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm1\.(weight|bias)", + # lambda match: f"layers.{match.group(1)}.sa_norm.{match.group(2)}", + # ), + # ( + # rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm2\.(weight|bias)", + # lambda match: f"layers.{match.group(1)}.mlp_norm.{match.group(2)}", + # ), + # ] + + # # Apply the patterns to update the name mapping + # for pattern, replacement in patterns: + # for key in list(hf_state_dict.keys()): + # if re.match(pattern, key): + # new_key = re.sub(pattern, replacement, key) + # name_mapping[key] = new_key + + # # Process the combined self-attention weights and biases + # temp_state_dict = {} + # for k, v in hf_state_dict.items(): + # new_k = name_mapping[k] + # if "in_proj_weight" in new_k or "in_proj_bias" in new_k: + # if new_k not in temp_state_dict: + # temp_state_dict[new_k] = {"q": None, "k": None, "v": None} + # if "q_proj" in k: + # temp_state_dict[new_k]["q"] = v + # elif "k_proj" in k: + # temp_state_dict[new_k]["k"] = v + # elif "v_proj" in k: + # temp_state_dict[new_k]["v"] = v + # else: + # temp_state_dict[new_k] = v + + # # Final processing of the combined self-attention weights and biases + # for k, v in temp_state_dict.items(): + # if isinstance(v, dict): + # translated_state_dict[k] = torch.cat([v["q"], v["k"], v["v"]], dim=0) + # else: + # translated_state_dict[k] = v + + # return translated_state_dict + + # new_state_dict = {} + # for k, v in state_dict.items(): + # if k.startswith("model.model."): + # new_state_dict[k.replace("model.model.", "")] = v + # elif k.startswith("model."): + # new_state_dict[k.replace("model.", "")] = v + # else: + # new_state_dict[k] = v + # return new_state_dict + + # def _translate_state_dict_for_text_model(hf_state_dict) -> Dict[str, Any]: + # key_map = { + # # fmt: off + # r"model.layers.([0-9]+).self_attn.q_proj.": r"layers.\1.attention.wq.", + # r"model.layers.([0-9]+).self_attn.k_proj.": r"layers.\1.attention.wk.", + # r"model.layers.([0-9]+).self_attn.v_proj.": r"layers.\1.attention.wv.", + # r"model.layers.([0-9]+).self_attn.o_proj.": r"layers.\1.attention.wo.", + # r"model.layers.([0-9]+).input_layernorm.": r"layers.\1.attention_norm.", + # r"model.layers.([0-9]+).mlp.gate_proj.": r"layers.\1.feed_forward.w1.", + # r"model.layers.([0-9]+).mlp.down_proj.": r"layers.\1.feed_forward.w2.", + # r"model.layers.([0-9]+).mlp.up_proj.": r"layers.\1.feed_forward.w3.", + # r"model.layers.([0-9]+).post_attention_layernorm.": r"layers.\1.ffn_norm.", + # r"model.norm.": r"norm.", + # # r"model.embed_tokens.": r"tok_embeddings.", # load separately + # r"lm_head.": r"output.", + # # fmt: on + # } + + # new_state_dict = {} + + # def get_new_key(old_key: str) -> str: + # for old_pattern, replacement in key_map.items(): + # if (new_key := re.sub(old_pattern, replacement, old_key)) != old_key: + # return new_key + + # return old_key + + # # Convert module keys from hf transformer to Llama transformer. + # for old_key in hf_state_dict.keys(): + # new_key = get_new_key(old_key) + + # new_state_dict[new_key] = hf_state_dict[old_key] + + # return new_state_dict + + # def split_checkpoint(llava_ckpt): + # from collections import OrderedDict + # language_model_ckpt = OrderedDict() + # multi_modal_ckpt = OrderedDict() + # vision_tower_ckpt = OrderedDict() + # for key, value in llava_ckpt.items(): + # if key.startswith("language_model"): + # language_model_ckpt[key[len("language_model") + 1:]] = value + # elif key.startswith("multi_modal_projector"): + # multi_modal_ckpt[key[len("multi_modal_projector") + 1:]] = value + # elif key.startswith("vision_tower"): + # vision_tower_ckpt[key[len("vision_tower") + 1:]] = value + # return language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt + + # llava_model = llava_model.model + + # language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt = split_checkpoint(llava_ckpt) + + # llava_model.tok_embeddings.load_state_dict({"weight": language_model_ckpt.pop("model.embed_tokens.weight")}) + + # llava_model.encoder.load_state_dict(state_dict=_translate_state_dict_for_vision_model(vision_tower_ckpt), + # strict=True, + # assign=True, + # ) + + # llava_model.decoder.load_state_dict(state_dict=_translate_state_dict_for_text_model(language_model_ckpt), + # strict=True, + # assign=True, + # ) + + # llava_model.mm_projector.load_state_dict(state_dict=multi_modal_ckpt, + # strict=True, + # assign=True, + # ) + + def remap_llava_checkpoint(llava_ckpt): + def _translate_state_dict_for_vision_model(hf_state_dict) -> Dict[str, Any]: + translated_state_dict = {} + hf_weight_prefix = "vision_model." + name_mapping = { + f"{hf_weight_prefix}embeddings.class_embedding": "model.encoder.cls_token_embedding.weight", + f"{hf_weight_prefix}embeddings.position_embedding.weight": "model.encoder.token_pos_embedding.positional_embedding", + f"{hf_weight_prefix}embeddings.patch_embedding.weight": "model.encoder.conv.weight", + f"{hf_weight_prefix}pre_layrnorm.weight": "model.encoder.ln_pre.weight", + f"{hf_weight_prefix}pre_layrnorm.bias": "model.encoder.ln_pre.bias", + f"{hf_weight_prefix}post_layernorm.weight": "model.encoder.ln_post.weight", + f"{hf_weight_prefix}post_layernorm.bias": "model.encoder.ln_post.bias", + } + patterns = [ + ( + rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.(k|q|v)_proj\.(weight|bias)", + lambda match: f"model.encoder.layers.{match.group(1)}.attn.{match.group(2)}_proj.{match.group(3)}", + ), + ( + rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.out_proj\.(weight|bias)", + lambda match: f"model.encoder.layers.{match.group(1)}.attn.output_proj.{match.group(2)}", + ), + ( + rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.mlp\.fc(1|2)\.(weight|bias)", + lambda match: f"model.encoder.layers.{match.group(1)}.mlp.w{match.group(2)}.{match.group(3)}", + ), + ( + rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm1\.(weight|bias)", + lambda match: f"model.encoder.layers.{match.group(1)}.sa_norm.{match.group(2)}", + ), + ( + rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm2\.(weight|bias)", + lambda match: f"model.encoder.layers.{match.group(1)}.mlp_norm.{match.group(2)}", + ), + ] + for pattern, replacement in patterns: + for key in list(hf_state_dict.keys()): + if re.match(pattern, key): + new_key = re.sub(pattern, replacement, key) + name_mapping[key] = new_key + temp_state_dict = {} + for k, v in hf_state_dict.items(): + new_k = name_mapping.get(k, k) + if "in_proj_weight" in new_k or "in_proj_bias" in new_k: + if new_k not in temp_state_dict: + temp_state_dict[new_k] = {"q": None, "k": None, "v": None} + if "q_proj" in k: + temp_state_dict[new_k]["q"] = v + elif "k_proj" in k: + temp_state_dict[new_k]["k"] = v + elif "v_proj" in k: + temp_state_dict[new_k]["v"] = v + else: + temp_state_dict[new_k] = v + for k, v in temp_state_dict.items(): + if isinstance(v, dict): + translated_state_dict[k] = torch.cat([v["q"], v["k"], v["v"]], dim=0) + else: + translated_state_dict[k] = v + return translated_state_dict + + def _translate_state_dict_for_text_model(hf_state_dict) -> Dict[str, Any]: + key_map = { + r"model.layers.([0-9]+).self_attn.q_proj.": r"model.decoder.layers.\1.attention.wq.", + r"model.layers.([0-9]+).self_attn.k_proj.": r"model.decoder.layers.\1.attention.wk.", + r"model.layers.([0-9]+).self_attn.v_proj.": r"model.decoder.layers.\1.attention.wv.", + r"model.layers.([0-9]+).self_attn.o_proj.": r"model.decoder.layers.\1.attention.wo.", + r"model.layers.([0-9]+).input_layernorm.": r"model.decoder.layers.\1.attention_norm.", + r"model.layers.([0-9]+).mlp.gate_proj.": r"model.decoder.layers.\1.feed_forward.w1.", + r"model.layers.([0-9]+).mlp.down_proj.": r"model.decoder.layers.\1.feed_forward.w2.", + r"model.layers.([0-9]+).mlp.up_proj.": r"model.decoder.layers.\1.feed_forward.w3.", + r"model.layers.([0-9]+).post_attention_layernorm.": r"model.decoder.layers.\1.ffn_norm.", + r"model.norm.": r"model.decoder.norm.", + # r"model.embed_tokens.": r"tok_embeddings.", # load separately + r"lm_head.": r"model.decoder.output.", + } + new_state_dict = {} + def get_new_key(old_key: str) -> str: + for old_pattern, replacement in key_map.items(): + if (new_key := re.sub(old_pattern, replacement, old_key)) != old_key: + return new_key + return old_key + for old_key in hf_state_dict.keys(): + new_key = get_new_key(old_key) + new_state_dict[new_key] = hf_state_dict[old_key] + return new_state_dict + + def _translate_state_dict_for_mm_projector_model(hf_state_dict) -> Dict[str, Any]: + new_state_dict = {} + for old_key in hf_state_dict.keys(): + new_key = "model.mm_projector." + old_key + new_state_dict[new_key] = hf_state_dict[old_key] + return new_state_dict + + def split_checkpoint(llava_ckpt): + language_model_ckpt = {} + multi_modal_ckpt = {} + vision_tower_ckpt = {} + for key, value in llava_ckpt.items(): + if key.startswith("language_model"): + language_model_ckpt[key[len("language_model") + 1:]] = value + elif key.startswith("multi_modal_projector"): + multi_modal_ckpt[key[len("multi_modal_projector") + 1:]] = value + elif key.startswith("vision_tower"): + vision_tower_ckpt[key[len("vision_tower") + 1:]] = value + return language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt + language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt = split_checkpoint(llava_ckpt) + remapped_state_dict = { + "model.tok_embeddings.weight": language_model_ckpt.pop("model.embed_tokens.weight"), + } + remapped_state_dict.update(_translate_state_dict_for_text_model(language_model_ckpt)) + remapped_state_dict.update(_translate_state_dict_for_vision_model(vision_tower_ckpt)) + remapped_state_dict.update(_translate_state_dict_for_mm_projector_model(multi_modal_ckpt)) + return remapped_state_dict + + with torch.device("cuda"): + print("Preparing input") + pre_tokens = torch.tensor([[ 1, 319, 13563, 1546, 263, 12758, 5199, 322, 385, 23116, + 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, + 322, 1248, 568, 6089, 304, 278, 5199, 29915, 29879, 5155, + 29889, 3148, 1001, 29901, 29871]]) + img = prepare_image(336, 336) + post_tokens = torch.tensor([[29871, 13, 462, 9651, 1724, 526, 278, 2712, 306, 881, + 367, 274, 1300, 2738, 1048, 746, 306, 6493, 1244, 29973, + 319, 1799, 9047, 13566, 29901]]) + img = image_preprocess(img=img, target_h=336, target_w=336, image_mean=[0.48145466, 0.4578275, 0.40821073], image_std=[0.26862954, 0.26130258, 0.27577711], rescale_factor=0.00392156862745098) + + print("Done, Now creating model...") + llava_model = Model.from_params("/home/gasoonjia/torchchat/torchchat/model_params/llava-1.5.json") + + llava_model = llava_model.eval() + + print("Done. Now loading checkpoint...") + llava_ckpt = torch.load("/home/gasoonjia/executorch/examples/models/llava/llava_checkpoint.pth", map_location="cuda") + + print("Done. Now checkpoint remapping...") + remapped_state_dict = remap_llava_checkpoint(llava_ckpt) + llava_model.load_state_dict(remapped_state_dict, strict=True) + + print("Done. Now setup caches...") + + llava_model.setup_caches(1, 768) + + print("Done. Now running prefilling inference...") + # being tested, using llama_transformer + context_len, prefill_logits = llava_model(tokens=pre_tokens, encoder_input=img, post_tokens=post_tokens) + print("prefill_logits: ") + print(prefill_logits[0, -1].shape) + print(prefill_logits[0, -1]) + print("context_len: \n", context_len) + # Always generate one token at a time. + new_tokens = [torch.argmax(prefill_logits[0, -1], dim=-1).item()] + print(new_tokens) + print(prefill_logits.shape) + print("Done. Now running generation inference...") + for i in range(10): + logits = llava_model( + torch.tensor([new_tokens[i]]), input_pos=torch.tensor([context_len + i]) + ) + print(f"{i}-th logits: ") + print(logits) + + print(f"{i}-th logits.shape: ") + print(logits.shape) + new_tokens.append(torch.argmax(logits[-1, :]).item()) + + print("Done. The output is:", new_tokens) diff --git a/torchchat/model_config/models.json b/torchchat/model_config/models.json index ca8c5acdf..f437d43ca 100644 --- a/torchchat/model_config/models.json +++ b/torchchat/model_config/models.json @@ -1,4 +1,10 @@ { + "llava-hf/llava-1.5-7b-hf": { + "aliases": ["llava-1.5"], + "distribution_channel": "HuggingFaceSnapshot", + "distribution_path": "llava-hf/llava-1.5-7b-hf", + "transformer_params_key": "llava-1.5" + }, "meta-llama/Llama-2-7b-hf": { "aliases": ["llama2-base", "llama2-7b"], "distribution_channel": "HuggingFaceSnapshot", diff --git a/torchchat/model_params/llava-1.5.json b/torchchat/model_params/llava-1.5.json index 992cc2c69..5974702d5 100644 --- a/torchchat/model_params/llava-1.5.json +++ b/torchchat/model_params/llava-1.5.json @@ -20,6 +20,7 @@ "n_heads": 32, "dim": 4096, "vocab_size": 32064, - "max_seq_length": 768 + "max_seq_length": 768, + "use_hf_rope": true } } From 32d969ea98a17650ad5ddaf2bd0f08ef117b1e0e Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Mon, 23 Sep 2024 11:34:33 -0700 Subject: [PATCH 2/8] 2/n llava e2e init --- torchchat/cli/convert_hf_checkpoint.py | 119 +++++++++++---- torchchat/cli/download.py | 36 +---- torchchat/generate.py | 71 ++++++--- torchchat/model.py | 202 ++++++++++++++++++------- torchchat/model_config/model_config.py | 2 +- torchchat/model_params/llava-1.5.json | 1 - torchchat/utils/preprocessors.py | 80 ++++++++++ 7 files changed, 378 insertions(+), 133 deletions(-) create mode 100644 torchchat/utils/preprocessors.py diff --git a/torchchat/cli/convert_hf_checkpoint.py b/torchchat/cli/convert_hf_checkpoint.py index 3357bf7e3..9e8c15ba9 100644 --- a/torchchat/cli/convert_hf_checkpoint.py +++ b/torchchat/cli/convert_hf_checkpoint.py @@ -7,10 +7,13 @@ import os import re import sys +import glob from pathlib import Path -from typing import Optional +from typing import Any, Dict, Optional import torch +import safetensors.torch +import shutil # support running without installing as a package wd = Path(__file__).parent.parent @@ -24,34 +27,34 @@ def _translate_state_dict_for_vision_model(hf_state_dict) -> Dict[str, Any]: translated_state_dict = {} hf_weight_prefix = "vision_model." name_mapping = { - f"{hf_weight_prefix}embeddings.class_embedding": "model.encoder.cls_token_embedding.weight", - f"{hf_weight_prefix}embeddings.position_embedding.weight": "model.encoder.token_pos_embedding.positional_embedding", - f"{hf_weight_prefix}embeddings.patch_embedding.weight": "model.encoder.conv.weight", - f"{hf_weight_prefix}pre_layrnorm.weight": "model.encoder.ln_pre.weight", - f"{hf_weight_prefix}pre_layrnorm.bias": "model.encoder.ln_pre.bias", - f"{hf_weight_prefix}post_layernorm.weight": "model.encoder.ln_post.weight", - f"{hf_weight_prefix}post_layernorm.bias": "model.encoder.ln_post.bias", + f"{hf_weight_prefix}embeddings.class_embedding": "encoder.cls_token_embedding.weight", + f"{hf_weight_prefix}embeddings.position_embedding.weight": "encoder.token_pos_embedding.positional_embedding", + f"{hf_weight_prefix}embeddings.patch_embedding.weight": "encoder.conv.weight", + f"{hf_weight_prefix}pre_layrnorm.weight": "encoder.ln_pre.weight", + f"{hf_weight_prefix}pre_layrnorm.bias": "encoder.ln_pre.bias", + f"{hf_weight_prefix}post_layernorm.weight": "encoder.ln_post.weight", + f"{hf_weight_prefix}post_layernorm.bias": "encoder.ln_post.bias", } patterns = [ ( rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.(k|q|v)_proj\.(weight|bias)", - lambda match: f"model.encoder.layers.{match.group(1)}.attn.{match.group(2)}_proj.{match.group(3)}", + lambda match: f"encoder.layers.{match.group(1)}.attn.{match.group(2)}_proj.{match.group(3)}", ), ( rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.out_proj\.(weight|bias)", - lambda match: f"model.encoder.layers.{match.group(1)}.attn.output_proj.{match.group(2)}", + lambda match: f"encoder.layers.{match.group(1)}.attn.output_proj.{match.group(2)}", ), ( rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.mlp\.fc(1|2)\.(weight|bias)", - lambda match: f"model.encoder.layers.{match.group(1)}.mlp.w{match.group(2)}.{match.group(3)}", + lambda match: f"encoder.layers.{match.group(1)}.mlp.w{match.group(2)}.{match.group(3)}", ), ( rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm1\.(weight|bias)", - lambda match: f"model.encoder.layers.{match.group(1)}.sa_norm.{match.group(2)}", + lambda match: f"encoder.layers.{match.group(1)}.sa_norm.{match.group(2)}", ), ( rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm2\.(weight|bias)", - lambda match: f"model.encoder.layers.{match.group(1)}.mlp_norm.{match.group(2)}", + lambda match: f"encoder.layers.{match.group(1)}.mlp_norm.{match.group(2)}", ), ] for pattern, replacement in patterns: @@ -82,18 +85,18 @@ def _translate_state_dict_for_vision_model(hf_state_dict) -> Dict[str, Any]: def _translate_state_dict_for_text_model(hf_state_dict) -> Dict[str, Any]: key_map = { - r"model.layers.([0-9]+).self_attn.q_proj.": r"model.decoder.layers.\1.attention.wq.", - r"model.layers.([0-9]+).self_attn.k_proj.": r"model.decoder.layers.\1.attention.wk.", - r"model.layers.([0-9]+).self_attn.v_proj.": r"model.decoder.layers.\1.attention.wv.", - r"model.layers.([0-9]+).self_attn.o_proj.": r"model.decoder.layers.\1.attention.wo.", - r"model.layers.([0-9]+).input_layernorm.": r"model.decoder.layers.\1.attention_norm.", - r"model.layers.([0-9]+).mlp.gate_proj.": r"model.decoder.layers.\1.feed_forward.w1.", - r"model.layers.([0-9]+).mlp.down_proj.": r"model.decoder.layers.\1.feed_forward.w2.", - r"model.layers.([0-9]+).mlp.up_proj.": r"model.decoder.layers.\1.feed_forward.w3.", - r"model.layers.([0-9]+).post_attention_layernorm.": r"model.decoder.layers.\1.ffn_norm.", - r"model.norm.": r"model.decoder.norm.", + r"model.layers.([0-9]+).self_attn.q_proj.": r"decoder.layers.\1.attention.wq.", + r"model.layers.([0-9]+).self_attn.k_proj.": r"decoder.layers.\1.attention.wk.", + r"model.layers.([0-9]+).self_attn.v_proj.": r"decoder.layers.\1.attention.wv.", + r"model.layers.([0-9]+).self_attn.o_proj.": r"decoder.layers.\1.attention.wo.", + r"model.layers.([0-9]+).input_layernorm.": r"decoder.layers.\1.attention_norm.", + r"model.layers.([0-9]+).mlp.gate_proj.": r"decoder.layers.\1.feed_forward.w1.", + r"model.layers.([0-9]+).mlp.down_proj.": r"decoder.layers.\1.feed_forward.w2.", + r"model.layers.([0-9]+).mlp.up_proj.": r"decoder.layers.\1.feed_forward.w3.", + r"model.layers.([0-9]+).post_attention_layernorm.": r"decoder.layers.\1.ffn_norm.", + r"model.norm.": r"decoder.norm.", # r"model.embed_tokens.": r"tok_embeddings.", # load separately - r"lm_head.": r"model.decoder.output.", + r"lm_head.": r"decoder.output.", } new_state_dict = {} def get_new_key(old_key: str) -> str: @@ -109,7 +112,7 @@ def get_new_key(old_key: str) -> str: def _translate_state_dict_for_mm_projector_model(hf_state_dict) -> Dict[str, Any]: new_state_dict = {} for old_key in hf_state_dict.keys(): - new_key = "model.mm_projector." + old_key + new_key = "mm_projector." + old_key new_state_dict[new_key] = hf_state_dict[old_key] return new_state_dict @@ -127,13 +130,65 @@ def split_checkpoint(llava_ckpt): return language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt = split_checkpoint(llava_ckpt) remapped_state_dict = { - "model.tok_embeddings.weight": language_model_ckpt.pop("model.embed_tokens.weight"), + "tok_embeddings.weight": language_model_ckpt.pop("model.embed_tokens.weight"), } remapped_state_dict.update(_translate_state_dict_for_text_model(language_model_ckpt)) remapped_state_dict.update(_translate_state_dict_for_vision_model(vision_tower_ckpt)) remapped_state_dict.update(_translate_state_dict_for_mm_projector_model(multi_modal_ckpt)) return remapped_state_dict + +@torch.inference_mode +def convert_llava_checkpoint( + *, + model_dir: Optional[Path] = None, +) -> None: + + """ + Process safetensor files from a specific directory structure and save the remapped model. + + Args: + model_dir (str): Base directory containing the model subdirectories. + """ + + def _get_llava_files_with_pattern(pattern): + pattern = os.path.join(model_dir, f"models--llava-hf--llava-1.5-7b-hf/snapshots/*/{pattern}") + return glob.glob(pattern) + + # get all safetensor files in the model directory + safetensor_files = _get_llava_files_with_pattern("*.safetensors") + + if not safetensor_files: + raise ValueError("No safetensor files found.") + + merged_weights = {} + + # Merge safetensor files into a whole + for file in safetensor_files: + # Load weights from the current file + part_weights = safetensors.torch.load_file(file) + + # Iterate over each weight in the current file + for key, value in part_weights.items(): + if key in merged_weights: + # If the key already exists, concatenate tensors + merged_weights[key] = torch.cat((merged_weights[key], value), dim=0) + else: + # If the key does not exist, add it to the dictionary + merged_weights[key] = value + + # Remap the checkpoint and save it as pth + remapped_weights = remap_llava_checkpoint(merged_weights) + model_path = model_dir / "model.pth" + torch.save(remapped_weights, model_path) + + # copy tokenizer + tokenizer_files = _get_llava_files_with_pattern("tokenizer.model") + assert len(tokenizer_files) == 1, "Should get only one tokenizer file, but got {}".format(tokenizer_files) + + tokenizer_path = model_dir / "tokenizer.model" + shutil.copy(tokenizer_files[0], tokenizer_path) + @torch.inference_mode() def convert_text_only_hf_checkpoint( @@ -245,18 +300,18 @@ def permute(w, n_heads): @torch.inference_mode() -def convert_text_only_hf_checkpoint( +def convert_hf_checkpoint( *, model_dir: Optional[Path] = None, model_name: Optional[str] = None, remove_bin_files: bool = False, ): - if model_name == "llava-1.5": - print("Converting LLaVA 1.5 checkpoint.") - print(os.listdir(model_dir)) - exit(0) + print(model_name) + print("***********************") + if "llava" in model_name: + convert_llava_checkpoint(model_dir=model_dir) else: - convert_text_only_hf_checkpoint(model_dir, model_name, remove_bin_files) + convert_text_only_hf_checkpoint(model_dir=model_dir, model_name=model_name, remove_bin_files=remove_bin_files) if __name__ == "__main__": diff --git a/torchchat/cli/download.py b/torchchat/cli/download.py index 6ea28ddd5..eb96e99b4 100644 --- a/torchchat/cli/download.py +++ b/torchchat/cli/download.py @@ -28,34 +28,14 @@ def _download_hf_snapshot( # Download and store the HF model artifacts. print(f"Downloading {model_config.name} from HuggingFace...", file=sys.stderr) try: - - import huggingface_hub - # 定义模型名称和版本 - model_name = "llava-hf/llava-1.5-7b-hf" - # 下载模型checkpoint - repo_id = model_name - revision = "main" # 默认分支 - # 强制重新下载 - snapshot_dir = huggingface_hub.snapshot_download( - repo_id=repo_id, - revision=revision, + snapshot_download( + model_config.distribution_path, cache_dir=artifact_dir, - force_download=True, + local_dir_use_symlinks=False, + token=hf_token, + ignore_patterns=None if "llava" in model_config.name else "*safetensors*", ) - print(f"模型下载完成,保存在 {snapshot_dir} 目录下") - - - # snapshot_download( - # model_config.distribution_path, - # cache_dir=artifact_dir, - # local_dir_use_symlinks=False, - # token=hf_token, - # ignore_patterns="*safetensors*", - # ) - print("*****************") - print(os.listdir(artifact_dir)) - shutil.copytree(artifact_dir, "/home/gasoonjia/download/hahaha") - exit(0) + except HTTPError as e: if e.response.status_code == 401: # Missing HuggingFace CLI login. print( @@ -99,8 +79,8 @@ def download_and_convert( # location once the download and conversion is complete. This # allows recovery in the event that the download or conversion # fails unexpectedly. - # temp_dir = models_dir / "downloads" / model_config.name - temp_dir = Path("/home/gasoonjia") / "downloads" / model_config.name + temp_dir = models_dir / "downloads" / model_config.name + # temp_dir = Path("/home/gasoonjia") / "downloads" / model_config.name if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) diff --git a/torchchat/generate.py b/torchchat/generate.py index 68188f513..44931fdea 100644 --- a/torchchat/generate.py +++ b/torchchat/generate.py @@ -36,6 +36,7 @@ from torchchat.model import Model, ModelType from torchchat.utils.build_utils import device_sync, set_precision from torchchat.utils.device_info import get_device_info +from torchchat.utils.preprocessors import llava_image_preprocess # torchtune model definition dependencies from torchtune.data import Message @@ -622,6 +623,13 @@ def generate( sequential_prefill=sequential_prefill, **sampling_kwargs, ) + + # For llava, we need to extract next pos id from prefill result + if self.model.config.model_type == ModelType.Llava: + next_token, context_len = next_token + else: + next_token, context_len = next_token, T + if is_speculative: self.prefill( draft_model, @@ -636,7 +644,7 @@ def generate( # max_new_tokens <= 2 means we are effectively not calling decode_n_tokens(). callback(next_token.clone().view(-1), done_generating=max_new_tokens <= 2) - input_pos = torch.tensor([start_pos + T], device=device, dtype=torch.int) + input_pos = torch.tensor([start_pos + context_len], device=device, dtype=torch.int) accept_counts = [0] * ( speculate_k + 1 ) # creates array of [0, 0, 0, ...] that is speculate_k + 1 long @@ -729,31 +737,54 @@ def chat( print("Builder Args:") print(self.builder_args) - exit(0) - if generator_args.image_prompts is not None: print("Image prompts", generator_args.image_prompts) - # Support for just the first image prompt for now images = [Image.open(generator_args.image_prompts[0])] - messages = [ - Message( - role="user", - content=[ - {"type": "image", "content": images[0]}, - {"type": "text", "content": generator_args.prompt}, - ], - eot=True, - ), - Message(role="assistant", content=""), - ] - transform = flamingo_transform(str(self.tokenizer_args.tokenizer_path)) - data = transform({"messages": messages}, inference=True) - batch = padded_collate([data], self.builder_args.device) - batch.pop("mask") - encoded = batch["tokens"] + assert len(images) == 1, "Only one image prompt is supported for now" + + #TODO: updated encoded variable for multi-modality models to include image tokens. + if self.model.config.model_type == ModelType.Flamingo: + messages = [ + Message( + role="user", + content=[ + {"type": "image", "content": images[0]}, + {"type": "text", "content": generator_args.prompt}, + ], + eot=True, + ), + Message(role="assistant", content=""), + ] + transform = flamingo_transform(str(self.tokenizer_args.tokenizer_path)) + data = transform({"messages": messages}, inference=True) + batch = padded_collate([data], self.builder_args.device) + batch.pop("mask") + encoded = batch["tokens"] + elif self.model.config.model_type == ModelType.Llava: + #TODO: double check the tokenizer. + def find_subtensor(tensor, target): + target_len = len(target) + for i in range(len(tensor) - target_len + 1): + if torch.all(tensor[i:i+target_len] == target): + return i + return -1 + + input_ids = self.encode_tokens(generator_args.prompt, bos=True, device=self.builder_args.device) + image_token_indices = self.encode_tokens("", device=self.builder_args.device)[1:] + index = find_subtensor(input_ids, image_token_indices) + + batch = { + "tokens": input_ids[:index].unsqueeze(0), + "encoder_input": llava_image_preprocess(images[0], device=self.builder_args.device), + "post_tokens": input_ids[index + len(image_token_indices) :].unsqueeze(0), + } + print("BATTTTTTTCHCHHHHHHHHH") + print(batch) + encoded = torch.cat([batch["tokens"].view(1, -1), batch["post_tokens"].view(1, -1)], dim=-1).view(-1) + else: encoded = self.encode_tokens( generator_args.prompt, bos=True, device=self.builder_args.device diff --git a/torchchat/model.py b/torchchat/model.py index 052e5f4a3..5b923874c 100644 --- a/torchchat/model.py +++ b/torchchat/model.py @@ -14,7 +14,7 @@ import torchvision -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Dict, List, Optional, Union from collections.abc import Hashable import torch @@ -56,52 +56,6 @@ def identity(**kwargs): return list(kwargs.values())[0] -# Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L77 -def hf_precompute_freqs_cis(dim: int, end: int, theta: float, **kwargs): - freqs = 1.0 / ( - theta - ** (torch.arange(0, dim, 2, dtype=torch.int64).float() / dim) - ) - # pyre-ignore Undefined attribute [16]: `float` has no attribute `device`. - t = torch.arange(end, device=freqs.device, dtype=torch.int64).type_as( - freqs # pyre-ignore - ) - freqs = torch.outer(t, freqs).float() # pyre-ignore - emb = torch.cat((freqs, freqs), dim=-1) - freqs_cos = torch.cos(emb) - freqs_sin = torch.sin(emb) - return torch.stack((freqs_cos, freqs_sin), dim=-1) - -# Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L135 -def rotate_half(x): - """Rotates half the hidden dims of the input.""" - x1 = x[..., : x.shape[-1] // 2] - x2 = x[..., x.shape[-1] // 2 :] - return torch.cat((-x2, x1), dim=-1) - - -def hf_apply_rotary_emb(x, freq_cis, unsqueeze_dim=1, **kwargs): - """Applies Rotary Position Embedding to the query and key tensors. - - Args: - q (`torch.Tensor`): The query tensor. - k (`torch.Tensor`): The key tensor. - cos (`torch.Tensor`): The cosine part of the rotary embedding. - sin (`torch.Tensor`): The sine part of the rotary embedding. - unsqueeze_dim (`int`, *optional*, defaults to 1): - The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and - sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note - that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and - k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes - cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have - the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. - Returns: - `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. - """ - cos = freq_cis[..., 0].unsqueeze(unsqueeze_dim) - sin = freq_cis[..., 1].unsqueeze(unsqueeze_dim) - return (x * cos) + (rotate_half(x) * sin) - class MultiModalProjector(nn.Module): def __init__(self, in_channels: int, out_channels: int, act: nn.Module): super().__init__() @@ -204,6 +158,10 @@ def _get_decoder_input( return torch.cat((pre_img_embed, image_embeds), dim=1) post_img_embed = self.tok_embeddings(post_tokens) + print("embeddings sizes:") + print(pre_img_embed.shape) + print(image_embeds.shape) + print(post_img_embed.shape) return torch.cat((pre_img_embed, image_embeds, post_img_embed), dim=1) @@ -462,7 +420,6 @@ def __init__( # print(f"dtype on entry {dtype}") if not dtype: dtype = get_precision() - # print(f"dtype on get_prec {dtype}") cache_shape = (max_batch_size, n_heads, max_seq_length, head_dim) self.register_buffer("k_cache", torch.zeros(cache_shape, dtype=dtype)) self.register_buffer("v_cache", torch.zeros(cache_shape, dtype=dtype)) @@ -731,10 +688,16 @@ def forward(self, x: Tensor, input_pos: Optional[Tensor] = None) -> Tensor: input_pos = input_pos if input_pos is not None else self._input_pos mask = self.causal_mask[None, None, input_pos] freqs_cis = self.freqs_cis[input_pos] + + print("before tok_embedding", x.dtype) + if self.tok_embeddings: x = self.tok_embeddings(x) + + print("after tok_embedding", x.dtype) - for _, layer in self.layers.items(): + for idx, (_, layer) in enumerate(self.layers.items()): + print(f"before entering layer {idx} tok_embedding", x.dtype) x = layer(x, input_pos, freqs_cis, mask) if self.norm: @@ -1011,6 +974,58 @@ def apply_rotary_emb(x: Tensor, freqs_cis: Tensor) -> Tensor: return x_out2.type_as(x) + +# Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L77 +def hf_precompute_freqs_cis(dim: int, end: int, theta: float, dtype=None, **kwargs): + if not dtype: + dtype = get_precision() + + freqs = 1.0 / ( + theta + ** (torch.arange(0, dim, 2, dtype=torch.int64).float() / dim) + ) + # pyre-ignore Undefined attribute [16]: `float` has no attribute `device`. + t = torch.arange(end, device=freqs.device, dtype=torch.int64).type_as( + freqs # pyre-ignore + ) + freqs = torch.outer(t, freqs).float() # pyre-ignore + emb = torch.cat((freqs, freqs), dim=-1) + freqs_cos = torch.cos(emb) + freqs_sin = torch.sin(emb) + return torch.stack((freqs_cos, freqs_sin), dim=-1).to(dtype=dtype) + +# Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L135 +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + + +def hf_apply_rotary_emb(x, freq_cis, unsqueeze_dim=1, **kwargs): + """Applies Rotary Position Embedding to the query and key tensors. + + Args: + q (`torch.Tensor`): The query tensor. + k (`torch.Tensor`): The key tensor. + cos (`torch.Tensor`): The cosine part of the rotary embedding. + sin (`torch.Tensor`): The sine part of the rotary embedding. + unsqueeze_dim (`int`, *optional*, defaults to 1): + The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and + sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note + that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and + k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes + cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have + the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. + Returns: + `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. + """ + cos = freq_cis[..., 0].unsqueeze(unsqueeze_dim) + sin = freq_cis[..., 1].unsqueeze(unsqueeze_dim) + x_out = (x * cos) + (rotate_half(x) * sin) + return x_out.type_as(x) + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # ExecuTorch model components # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1047,6 +1062,90 @@ def setup_caches(self, max_batch_size, max_seq_length): pass +from torchvision import transforms as tvT + +def llava_image_preprocess( + # img_address: str, + target_h: int, + target_w: int, + rescale_factor: float, + image_mean: List[float], + image_std: List[float], + ) -> torch.Tensor: + """ + Preprocess an image by resizing it to fit a target height and width, + padding with median RGB value to make a square, scaling, and normalizing. + + Args: + img_address (str): Address of the local image file will be forwarded to the model. + target_h (int): Target height. + target_w (int): Target width. + rescale_factor (float): Rescaling factor. + image_mean (list): Mean values for normalization. + image_std (list): Standard deviation values for normalization. + + Returns: + torch.Tensor: Preprocessed image tensor. + + Raises: + FileNotFoundError: If the image file does not exist. + ValueError: If the target height or width is not positive. + """ + + # # Check if the image file exists + # if not os.path.exists(img_address): + # raise FileNotFoundError("Image file not found") + + # Check if the target height and width are positive + if target_h <= 0 or target_w <= 0: + raise ValueError("Target height and width must be positive") + + # Load the image from the given address + image = Image.open( + requests.get( + "https://llava-vl.github.io/static/images/view.jpg", stream=True + ).raw) + # Convert the image to a tensor + img = tvT.functional.pil_to_tensor(image) + + # Calculate the height and width ratios + ratio_h = img.shape[1] / target_h + ratio_w = img.shape[2] / target_w + + # Resize the image to fit in a target_h x target_w canvas + ratio = max(ratio_h, ratio_w) + output_size = (int(img.shape[1] / ratio), int(img.shape[2] / ratio)) + img = tvT.Resize(size=output_size)(img) + + # Pad the image with median RGB value to make a square + l_pad = (target_w - img.shape[2]) // 2 + t_pad = (target_h - img.shape[1]) // 2 + r_pad = -((target_w - img.shape[2]) // -2) + b_pad = -((target_h - img.shape[1]) // -2) + + torch._check(l_pad >= 0) + torch._check(t_pad >= 0) + torch._check(r_pad >= 0) + torch._check(b_pad >= 0) + + # Pad the image + resized = torch.nn.functional.pad( + img, + (l_pad, r_pad, t_pad, b_pad), + ) + + # Scale the image + scaled = resized * rescale_factor + + # Normalize the image + normed = tvT.Normalize(image_mean, image_std)(scaled) + + return normed.unsqueeze(0) + + + + + if __name__ == "__main__": import re from PIL import Image @@ -1381,11 +1480,12 @@ def split_checkpoint(llava_ckpt): 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, 322, 1248, 568, 6089, 304, 278, 5199, 29915, 29879, 5155, 29889, 3148, 1001, 29901, 29871]]) - img = prepare_image(336, 336) + # img = prepare_image(336, 336) post_tokens = torch.tensor([[29871, 13, 462, 9651, 1724, 526, 278, 2712, 306, 881, 367, 274, 1300, 2738, 1048, 746, 306, 6493, 1244, 29973, 319, 1799, 9047, 13566, 29901]]) - img = image_preprocess(img=img, target_h=336, target_w=336, image_mean=[0.48145466, 0.4578275, 0.40821073], image_std=[0.26862954, 0.26130258, 0.27577711], rescale_factor=0.00392156862745098) + img = llava_image_preprocess(target_h=336, target_w=336, image_mean=[0.48145466, 0.4578275, 0.40821073], image_std=[0.26862954, 0.26130258, 0.27577711], rescale_factor=0.00392156862745098) + print(img) print("Done, Now creating model...") llava_model = Model.from_params("/home/gasoonjia/torchchat/torchchat/model_params/llava-1.5.json") diff --git a/torchchat/model_config/model_config.py b/torchchat/model_config/model_config.py index 584a87a74..079f31629 100644 --- a/torchchat/model_config/model_config.py +++ b/torchchat/model_config/model_config.py @@ -86,6 +86,6 @@ def resolve_model_config(model: str) -> ModelConfig: model = model_aliases[model] if model not in model_configs: - raise ValueError(f"Unknown model '{model}'.") + raise ValueError(f"Unknown model '{model}'. Supported models: {model_configs.keys()}") return model_configs[model] diff --git a/torchchat/model_params/llava-1.5.json b/torchchat/model_params/llava-1.5.json index 5974702d5..c84889452 100644 --- a/torchchat/model_params/llava-1.5.json +++ b/torchchat/model_params/llava-1.5.json @@ -1,6 +1,5 @@ { "model_type": "llava", - "use_tiktoken": true, "encoder": { "tile_size": 336, "patch_size": 14, diff --git a/torchchat/utils/preprocessors.py b/torchchat/utils/preprocessors.py new file mode 100644 index 000000000..abca2a7ea --- /dev/null +++ b/torchchat/utils/preprocessors.py @@ -0,0 +1,80 @@ +import torch +import torchvision as tv +from torchvision import transforms as tvT +from PIL import Image +import os + +from typing import List + + +def llava_image_preprocess( + image: Image, + *, + target_h: int = 336, + target_w: int = 336, + rescale_factor: float = 0.00392156862745098, + image_mean: List[float] = [0.48145466, 0.4578275, 0.40821073], + image_std: List[float] = [0.26862954, 0.26130258, 0.27577711], + device: torch.device = torch.device("cpu"), + dtype: torch.dtype = torch.bfloat16, + ) -> torch.Tensor: + """ + Preprocess an image by resizing it to fit a target height and width, + padding with median RGB value to make a square, scaling, and normalizing. + + Args: + img_address (str): Address of the local image file will be forwarded to the model. + target_h (int): Target height. + target_w (int): Target width. + rescale_factor (float): Rescaling factor. + image_mean (list): Mean values for normalization. + image_std (list): Standard deviation values for normalization. + + Returns: + torch.Tensor: Preprocessed image tensor. + + Raises: + FileNotFoundError: If the image file does not exist. + ValueError: If the target height or width is not positive. + """ + + # Check if the target height and width are positive + if target_h <= 0 or target_w <= 0: + raise ValueError("Target height and width must be positive") + + # Convert the image to a tensor + img = tvT.functional.pil_to_tensor(image) + + # Calculate the height and width ratios + ratio_h = img.shape[1] / target_h + ratio_w = img.shape[2] / target_w + + # Resize the image to fit in a target_h x target_w canvas + ratio = max(ratio_h, ratio_w) + output_size = (int(img.shape[1] / ratio), int(img.shape[2] / ratio)) + img = tvT.Resize(size=output_size)(img) + + # Pad the image with median RGB value to make a square + l_pad = (target_w - img.shape[2]) // 2 + t_pad = (target_h - img.shape[1]) // 2 + r_pad = -((target_w - img.shape[2]) // -2) + b_pad = -((target_h - img.shape[1]) // -2) + + torch._check(l_pad >= 0) + torch._check(t_pad >= 0) + torch._check(r_pad >= 0) + torch._check(b_pad >= 0) + + # Pad the image + resized = torch.nn.functional.pad( + img, + (l_pad, r_pad, t_pad, b_pad), + ) + + # Scale the image + scaled = resized * rescale_factor + + # Normalize the image + normed = tvT.Normalize(image_mean, image_std)(scaled) + + return normed.unsqueeze(0).to(device).to(dtype) From 9e4350d7b98fe57740067172bc657ecafcc46f7c Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Mon, 23 Sep 2024 11:47:10 -0700 Subject: [PATCH 3/8] 3/n llava e2e init --- torchchat/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchchat/generate.py b/torchchat/generate.py index 44931fdea..b0aa49cef 100644 --- a/torchchat/generate.py +++ b/torchchat/generate.py @@ -778,7 +778,7 @@ def find_subtensor(tensor, target): batch = { "tokens": input_ids[:index].unsqueeze(0), - "encoder_input": llava_image_preprocess(images[0], device=self.builder_args.device), + "encoder_input": llava_image_preprocess(images[0], device=self.builder_args.device, dtype=self.builder_args.precision), "post_tokens": input_ids[index + len(image_token_indices) :].unsqueeze(0), } print("BATTTTTTTCHCHHHHHHHHH") From 72d7b96a622968ca6dce12e83ea81352886ab7e6 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Mon, 23 Sep 2024 20:26:43 -0700 Subject: [PATCH 4/8] n/n llava e2e --- out.txt | 9523 +++++++++++++++++++++++++++++++++++++++++ torchchat/generate.py | 36 +- torchchat/model.py | 14 +- 3 files changed, 9548 insertions(+), 25 deletions(-) create mode 100644 out.txt diff --git a/out.txt b/out.txt new file mode 100644 index 000000000..c9df43314 --- /dev/null +++ b/out.txt @@ -0,0 +1,9523 @@ +Using device=cuda NVIDIA PG509-210 +Loading model... +Time to load model: 6.96 seconds +----------------------------------------------------------- +Generator Args: +GeneratorArgs(prompt="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: What are the things I should be cautious about when I visit here? ASSISTANT:", encoded_prompt=None, image_prompts=['../view.jpg'], chat_mode=False, gui_mode=False, num_samples=1, max_new_tokens=200, top_k=200, temperature=0.8, compile=False, compile_prefill=False, speculate_k=5, sequential_prefill=False, max_autotune=False, is_torchtune_model=False) +Builder Args: +BuilderArgs(checkpoint_path=PosixPath('/home/gasoonjia/.torchchat/model-cache/llava-hf/llava-1.5-7b-hf/model.pth'), checkpoint_dir=None, dcp_dir=None, params_path=None, params_table='llava-1.5', gguf_path=None, gguf_kwargs=None, dso_path=None, pte_path=None, device='cuda', precision=torch.float32, setup_caches=False, use_distributed=False, is_chat_model=False, prefill_possible=True, dynamic_shapes=False, max_seq_length=None) +Image prompts ['../view.jpg'] +BATTTTTTTCHCHHHHHHHHH +{'tokens': tensor([[ 1, 319, 13563, 1546, 263, 12758, 5199, 322, 385, 23116, + 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, + 322, 1248, 568, 6089, 304, 278, 5199, 29915, 29879, 5155, + 29889, 3148, 1001, 29901]], device='cuda:0', dtype=torch.int32), 'encoder_input': tensor([[[[-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923], + [-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923], + [-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923], + ..., + [-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923], + [-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923], + [-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923]], + + [[-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521], + [-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521], + [-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521], + ..., + [-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521], + [-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521], + [-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521]], + + [[-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802], + [-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802], + [-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802], + ..., + [-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802], + [-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802], + [-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802]]]], + device='cuda:0'), 'post_tokens': tensor([[ 1724, 526, 278, 2712, 306, 881, 367, 274, 1300, 2738, + 1048, 746, 306, 6493, 1244, 29973, 319, 1799, 9047, 13566, + 29901]], device='cuda:0', dtype=torch.int32)} +5777777666666666 879 +embeddings sizes: +torch.Size([1, 34, 4096]) +torch.Size([1, 576, 4096]) +torch.Size([1, 21, 4096]) +input_pos torch.int32 torch.Size([631]) +tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, + 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, + 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, + 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, + 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, + 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, + 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, + 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, + 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, + 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, + 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, + 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, + 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, + 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, + 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, + 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, + 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, + 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, + 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, + 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, + 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, + 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, + 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, + 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, + 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, + 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, + 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, + 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, + 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, + 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, + 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, + 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, + 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, + 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, + 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, + 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, + 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, + 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, + 630], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +(631, tensor([[[ 1.4072, 0.7952, -0.3502, ..., 0.5492, 0.5503, 0.5589], + [-9.5823, -5.0563, -1.2308, ..., -0.5064, -0.5130, -0.5142], + [-5.5127, -6.3697, 8.1133, ..., 0.0801, 0.0749, 0.0935], + ..., + [-6.2617, -3.9745, 5.8128, ..., 0.1045, 0.1087, 0.1012], + [-4.2650, -2.9318, 7.1847, ..., -0.1931, -0.1915, -0.1842], + [-1.8455, -2.3284, 8.4773, ..., 0.0422, 0.0611, 0.0587]]], + device='cuda:0')) +********** +tensor([[1932]], device='cuda:0', dtype=torch.int32) +tensor([631], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([631], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[6493]], device='cuda:0', dtype=torch.int32) +tensor([632], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([632], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: What are the things I should be cautious about when I visit here? ASSISTANT: When visiting********** +tensor([[292]], device='cuda:0', dtype=torch.int32) +tensor([633], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([633], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[263]], device='cuda:0', dtype=torch.int32) +tensor([634], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([634], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[325]], device='cuda:0', dtype=torch.int32) +tensor([635], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([635], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[4626]], device='cuda:0', dtype=torch.int32) +tensor([636], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([636], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + a vibrant********** +tensor([[424]], device='cuda:0', dtype=torch.int32) +tensor([637], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([637], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[2058]], device='cuda:0', dtype=torch.int32) +tensor([638], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([638], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[411]], device='cuda:0', dtype=torch.int32) +tensor([639], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([639], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[2381]], device='cuda:0', dtype=torch.int32) +tensor([640], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([640], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + place with swir********** +tensor([[381]], device='cuda:0', dtype=torch.int32) +tensor([641], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([641], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1847]], device='cuda:0', dtype=torch.int32) +tensor([642], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([642], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[322]], device='cuda:0', dtype=torch.int32) +tensor([643], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([643], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[325]], device='cuda:0', dtype=torch.int32) +tensor([644], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([644], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +ling and vibr********** +tensor([[4626]], device='cuda:0', dtype=torch.int32) +tensor([645], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([645], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[424]], device='cuda:0', dtype=torch.int32) +tensor([646], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([646], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[11955]], device='cuda:0', dtype=torch.int32) +tensor([647], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([647], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[763]], device='cuda:0', dtype=torch.int32) +tensor([648], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([648], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +ant colors like the********** +tensor([[278]], device='cuda:0', dtype=torch.int32) +tensor([649], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([649], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[697]], device='cuda:0', dtype=torch.int32) +tensor([650], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([650], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[3595]], device='cuda:0', dtype=torch.int32) +tensor([651], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([651], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[297]], device='cuda:0', dtype=torch.int32) +tensor([652], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([652], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + one seen in the********** +tensor([[278]], device='cuda:0', dtype=torch.int32) +tensor([653], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([653], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1967]], device='cuda:0', dtype=torch.int32) +tensor([654], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([654], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29892]], device='cuda:0', dtype=torch.int32) +tensor([655], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([655], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[727]], device='cuda:0', dtype=torch.int32) +tensor([656], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([656], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + image, there are********** +tensor([[526]], device='cuda:0', dtype=torch.int32) +tensor([657], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([657], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[263]], device='cuda:0', dtype=torch.int32) +tensor([658], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([658], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[2846]], device='cuda:0', dtype=torch.int32) +tensor([659], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([659], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[2712]], device='cuda:0', dtype=torch.int32) +tensor([660], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([660], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + a few things you********** +tensor([[366]], device='cuda:0', dtype=torch.int32) +tensor([661], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([661], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[881]], device='cuda:0', dtype=torch.int32) +tensor([662], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([662], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[367]], device='cuda:0', dtype=torch.int32) +tensor([663], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([663], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[274]], device='cuda:0', dtype=torch.int32) +tensor([664], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([664], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + should be caut********** +tensor([[1300]], device='cuda:0', dtype=torch.int32) +tensor([665], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([665], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[2738]], device='cuda:0', dtype=torch.int32) +tensor([666], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([666], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1048]], device='cuda:0', dtype=torch.int32) +tensor([667], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([667], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29901]], device='cuda:0', dtype=torch.int32) +tensor([668], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([668], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +ious about: +********** +tensor([[13]], device='cuda:0', dtype=torch.int32) +tensor([669], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([669], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[13]], device='cuda:0', dtype=torch.int32) +tensor([670], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([670], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29896]], device='cuda:0', dtype=torch.int32) +tensor([671], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([671], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29889]], device='cuda:0', dtype=torch.int32) +tensor([672], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([672], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + +1. Saf********** +tensor([[14795]], device='cuda:0', dtype=torch.int32) +tensor([673], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([673], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[3305]], device='cuda:0', dtype=torch.int32) +tensor([674], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([674], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29901]], device='cuda:0', dtype=torch.int32) +tensor([675], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([675], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1522]], device='cuda:0', dtype=torch.int32) +tensor([676], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([676], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +ety: Be aware********** +tensor([[9543]], device='cuda:0', dtype=torch.int32) +tensor([677], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([677], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[310]], device='cuda:0', dtype=torch.int32) +tensor([678], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([678], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[596]], device='cuda:0', dtype=torch.int32) +tensor([679], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([679], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[8388]], device='cuda:0', dtype=torch.int32) +tensor([680], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([680], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + of your surround********** +tensor([[618]], device='cuda:0', dtype=torch.int32) +tensor([681], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([681], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[886]], device='cuda:0', dtype=torch.int32) +tensor([682], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([682], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[322]], device='cuda:0', dtype=torch.int32) +tensor([683], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([683], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[6505]], device='cuda:0', dtype=torch.int32) +tensor([684], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([684], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +ings and watch out********** +tensor([[714]], device='cuda:0', dtype=torch.int32) +tensor([685], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([685], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[363]], device='cuda:0', dtype=torch.int32) +tensor([686], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([686], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[738]], device='cuda:0', dtype=torch.int32) +tensor([687], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([687], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[7037]], device='cuda:0', dtype=torch.int32) +tensor([688], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([688], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + for any potential ha********** +tensor([[447]], device='cuda:0', dtype=torch.int32) +tensor([689], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([689], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29920]], device='cuda:0', dtype=torch.int32) +tensor([690], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([690], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[3163]], device='cuda:0', dtype=torch.int32) +tensor([691], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([691], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29889]], device='cuda:0', dtype=torch.int32) +tensor([692], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([692], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +zards. The********** +tensor([[450]], device='cuda:0', dtype=torch.int32) +tensor([693], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([693], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1967]], device='cuda:0', dtype=torch.int32) +tensor([694], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([694], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[5680]], device='cuda:0', dtype=torch.int32) +tensor([695], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([695], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[263]], device='cuda:0', dtype=torch.int32) +tensor([696], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([696], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + image features a color********** +tensor([[2927]], device='cuda:0', dtype=torch.int32) +tensor([697], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([697], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1319]], device='cuda:0', dtype=torch.int32) +tensor([698], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([698], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[4766]], device='cuda:0', dtype=torch.int32) +tensor([699], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([699], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[287]], device='cuda:0', dtype=torch.int32) +tensor([700], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([700], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +ful patterned wall********** +tensor([[10090]], device='cuda:0', dtype=torch.int32) +tensor([701], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([701], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29892]], device='cuda:0', dtype=torch.int32) +tensor([702], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([702], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[577]], device='cuda:0', dtype=torch.int32) +tensor([703], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([703], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[7344]], device='cuda:0', dtype=torch.int32) +tensor([704], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([704], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +, so maintaining********** +tensor([[292]], device='cuda:0', dtype=torch.int32) +tensor([705], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([705], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[9109]], device='cuda:0', dtype=torch.int32) +tensor([706], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([706], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[22049]], device='cuda:0', dtype=torch.int32) +tensor([707], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([707], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[5418]], device='cuda:0', dtype=torch.int32) +tensor([708], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([708], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + safe walking distance and********** +tensor([[322]], device='cuda:0', dtype=torch.int32) +tensor([709], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([709], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[5146]], device='cuda:0', dtype=torch.int32) +tensor([710], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([710], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[292]], device='cuda:0', dtype=torch.int32) +tensor([711], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([711], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[8570]], device='cuda:0', dtype=torch.int32) +tensor([712], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([712], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + paying attention to********** +tensor([[304]], device='cuda:0', dtype=torch.int32) +tensor([713], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([713], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[278]], device='cuda:0', dtype=torch.int32) +tensor([714], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([714], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[11904]], device='cuda:0', dtype=torch.int32) +tensor([715], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([715], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[7101]], device='cuda:0', dtype=torch.int32) +tensor([716], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([716], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + the floor surface is********** +tensor([[338]], device='cuda:0', dtype=torch.int32) +tensor([717], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([717], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[18853]], device='cuda:0', dtype=torch.int32) +tensor([718], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([718], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29889]], device='cuda:0', dtype=torch.int32) +tensor([719], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([719], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[13]], device='cuda:0', dtype=torch.int32) +tensor([720], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([720], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + essential. + +********** +tensor([[13]], device='cuda:0', dtype=torch.int32) +tensor([721], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([721], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29906]], device='cuda:0', dtype=torch.int32) +tensor([722], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([722], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29889]], device='cuda:0', dtype=torch.int32) +tensor([723], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([723], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[12790]], device='cuda:0', dtype=torch.int32) +tensor([724], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([724], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +2. Lighting********** +tensor([[292]], device='cuda:0', dtype=torch.int32) +tensor([725], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([725], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29901]], device='cuda:0', dtype=torch.int32) +tensor([726], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([726], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[512]], device='cuda:0', dtype=torch.int32) +tensor([727], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([727], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[325]], device='cuda:0', dtype=torch.int32) +tensor([728], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([728], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +: In vibr********** +tensor([[4626]], device='cuda:0', dtype=torch.int32) +tensor([729], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([729], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[424]], device='cuda:0', dtype=torch.int32) +tensor([730], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([730], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[23136]], device='cuda:0', dtype=torch.int32) +tensor([731], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([731], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29892]], device='cuda:0', dtype=torch.int32) +tensor([732], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([732], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +ant environments, it********** +tensor([[372]], device='cuda:0', dtype=torch.int32) +tensor([733], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([733], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1795]], device='cuda:0', dtype=torch.int32) +tensor([734], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([734], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[367]], device='cuda:0', dtype=torch.int32) +tensor([735], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([735], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[5189]], device='cuda:0', dtype=torch.int32) +tensor([736], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([736], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + might be difficult to********** +tensor([[304]], device='cuda:0', dtype=torch.int32) +tensor([737], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([737], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1074]], device='cuda:0', dtype=torch.int32) +tensor([738], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([738], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[278]], device='cuda:0', dtype=torch.int32) +tensor([739], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([739], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[11904]], device='cuda:0', dtype=torch.int32) +tensor([740], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([740], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + see the floor clearly********** +tensor([[9436]], device='cuda:0', dtype=torch.int32) +tensor([741], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([741], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[2861]], device='cuda:0', dtype=torch.int32) +tensor([742], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([742], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[304]], device='cuda:0', dtype=torch.int32) +tensor([743], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([743], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[278]], device='cuda:0', dtype=torch.int32) +tensor([744], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([744], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + due to the bright********** +tensor([[11785]], device='cuda:0', dtype=torch.int32) +tensor([745], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([745], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[11955]], device='cuda:0', dtype=torch.int32) +tensor([746], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([746], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29889]], device='cuda:0', dtype=torch.int32) +tensor([747], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([747], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[22521]], device='cuda:0', dtype=torch.int32) +tensor([748], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([748], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + colors. Ensure********** +tensor([[545]], device='cuda:0', dtype=torch.int32) +tensor([749], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([749], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[393]], device='cuda:0', dtype=torch.int32) +tensor([750], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([750], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[278]], device='cuda:0', dtype=torch.int32) +tensor([751], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([751], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[4038]], device='cuda:0', dtype=torch.int32) +tensor([752], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([752], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + that the area has********** +tensor([[756]], device='cuda:0', dtype=torch.int32) +tensor([753], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([753], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[19967]], device='cuda:0', dtype=torch.int32) +tensor([754], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([754], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[339]], device='cuda:0', dtype=torch.int32) +tensor([755], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([755], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[403]], device='cuda:0', dtype=torch.int32) +tensor([756], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([756], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + adequate light********** +tensor([[3578]], device='cuda:0', dtype=torch.int32) +tensor([757], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([757], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[292]], device='cuda:0', dtype=torch.int32) +tensor([758], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([758], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[304]], device='cuda:0', dtype=torch.int32) +tensor([759], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([759], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[5557]], device='cuda:0', dtype=torch.int32) +tensor([760], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([760], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +ing to prevent acc********** +tensor([[1035]], device='cuda:0', dtype=torch.int32) +tensor([761], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([761], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[16719]], device='cuda:0', dtype=torch.int32) +tensor([762], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([762], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[322]], device='cuda:0', dtype=torch.int32) +tensor([763], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([763], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[9801]], device='cuda:0', dtype=torch.int32) +tensor([764], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([764], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +idents and ensure safety********** +tensor([[15332]], device='cuda:0', dtype=torch.int32) +tensor([765], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([765], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[363]], device='cuda:0', dtype=torch.int32) +tensor([766], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([766], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[14332]], device='cuda:0', dtype=torch.int32) +tensor([767], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([767], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29889]], device='cuda:0', dtype=torch.int32) +tensor([768], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([768], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + for everyone. +********** +tensor([[13]], device='cuda:0', dtype=torch.int32) +tensor([769], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([769], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[13]], device='cuda:0', dtype=torch.int32) +tensor([770], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([770], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29941]], device='cuda:0', dtype=torch.int32) +tensor([771], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([771], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29889]], device='cuda:0', dtype=torch.int32) +tensor([772], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([772], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + +3. Ex********** +tensor([[1222]], device='cuda:0', dtype=torch.int32) +tensor([773], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([773], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[572]], device='cuda:0', dtype=torch.int32) +tensor([774], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([774], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[12418]], device='cuda:0', dtype=torch.int32) +tensor([775], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([775], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29901]], device='cuda:0', dtype=torch.int32) +tensor([776], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([776], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +ploration: If********** +tensor([[960]], device='cuda:0', dtype=torch.int32) +tensor([777], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([777], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[278]], device='cuda:0', dtype=torch.int32) +tensor([778], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([778], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[4038]], device='cuda:0', dtype=torch.int32) +tensor([779], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([779], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[338]], device='cuda:0', dtype=torch.int32) +tensor([780], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([780], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + the area is a********** +tensor([[263]], device='cuda:0', dtype=torch.int32) +tensor([781], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([781], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[5613]], device='cuda:0', dtype=torch.int32) +tensor([782], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([782], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[17570]], device='cuda:0', dtype=torch.int32) +tensor([783], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([783], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29892]], device='cuda:0', dtype=torch.int32) +tensor([784], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([784], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + natural habitat, avoid********** +tensor([[4772]], device='cuda:0', dtype=torch.int32) +tensor([785], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([785], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29543]], device='cuda:0', dtype=torch.int32) +tensor([786], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([786], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[292]], device='cuda:0', dtype=torch.int32) +tensor([787], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([787], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[470]], device='cuda:0', dtype=torch.int32) +tensor([788], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([788], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + disturbing or dam********** +tensor([[5625]], device='cuda:0', dtype=torch.int32) +tensor([789], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([789], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[6751]], device='cuda:0', dtype=torch.int32) +tensor([790], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([790], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[278]], device='cuda:0', dtype=torch.int32) +tensor([791], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([791], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[5177]], device='cuda:0', dtype=torch.int32) +tensor([792], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([792], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +aging the environment.********** +tensor([[29889]], device='cuda:0', dtype=torch.int32) +tensor([793], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([793], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[7419]], device='cuda:0', dtype=torch.int32) +tensor([794], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([794], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[363]], device='cuda:0', dtype=torch.int32) +tensor([795], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([795], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[738]], device='cuda:0', dtype=torch.int32) +tensor([796], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([796], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + Look for any signs********** +tensor([[18906]], device='cuda:0', dtype=torch.int32) +tensor([797], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([797], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[393]], device='cuda:0', dtype=torch.int32) +tensor([798], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([798], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[12266]], device='cuda:0', dtype=torch.int32) +tensor([799], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([799], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[5764]], device='cuda:0', dtype=torch.int32) +tensor([800], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([800], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + that indicate closed-********** +tensor([[29899]], device='cuda:0', dtype=torch.int32) +tensor([801], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([801], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[2696]], device='cuda:0', dtype=torch.int32) +tensor([802], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([802], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[470]], device='cuda:0', dtype=torch.int32) +tensor([803], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([803], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[22078]], device='cuda:0', dtype=torch.int32) +tensor([804], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([804], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +off or restricted areas********** +tensor([[10161]], device='cuda:0', dtype=torch.int32) +tensor([805], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([805], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29892]], device='cuda:0', dtype=torch.int32) +tensor([806], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([806], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[322]], device='cuda:0', dtype=torch.int32) +tensor([807], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([807], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1101]], device='cuda:0', dtype=torch.int32) +tensor([808], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([808], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +, and follow the********** +tensor([[278]], device='cuda:0', dtype=torch.int32) +tensor([809], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([809], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1410]], device='cuda:0', dtype=torch.int32) +tensor([810], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([810], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[10652]], device='cuda:0', dtype=torch.int32) +tensor([811], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([811], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1475]], device='cuda:0', dtype=torch.int32) +tensor([812], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([812], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + guidelines provided********** +tensor([[4944]], device='cuda:0', dtype=torch.int32) +tensor([813], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([813], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[491]], device='cuda:0', dtype=torch.int32) +tensor([814], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([814], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[278]], device='cuda:0', dtype=torch.int32) +tensor([815], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([815], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1887]], device='cuda:0', dtype=torch.int32) +tensor([816], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([816], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + by the local authorities********** +tensor([[21142]], device='cuda:0', dtype=torch.int32) +tensor([817], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([817], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[470]], device='cuda:0', dtype=torch.int32) +tensor([818], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([818], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[8775]], device='cuda:0', dtype=torch.int32) +tensor([819], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([819], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[19264]], device='cuda:0', dtype=torch.int32) +tensor([820], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([820], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + or wildlife exper********** +tensor([[2902]], device='cuda:0', dtype=torch.int32) +tensor([821], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([821], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[1372]], device='cuda:0', dtype=torch.int32) +tensor([822], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([822], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29889]], device='cuda:0', dtype=torch.int32) +tensor([823], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([823], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[13]], device='cuda:0', dtype=torch.int32) +tensor([824], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([824], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +ts. + +********** +tensor([[13]], device='cuda:0', dtype=torch.int32) +tensor([825], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([825], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29946]], device='cuda:0', dtype=torch.int32) +tensor([826], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([826], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[29889]], device='cuda:0', dtype=torch.int32) +tensor([827], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([827], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +********** +tensor([[19040]], device='cuda:0', dtype=torch.int32) +tensor([828], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([828], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 +4. Photography2024-09-23:20:10:15,721 INFO [generate.py:1047] +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Generated 199 tokens +Time for inference 1: 21.2711 sec total +Time to first token: 1.2352 sec with parallel prefill. + + Total throughput: 9.4024 tokens/sec, 0.1064 s/token +First token throughput: 0.8096 tokens/sec, 1.2352 s/token + Next token throughput: 9.9322 tokens/sec, 0.1007 s/token +2024-09-23:20:10:15,721 INFO [generate.py:1058] +Bandwidth achieved: 265.65 GB/s +2024-09-23:20:10:15,721 INFO [generate.py:1062] *** This first iteration will include cold start effects for dynamic import, hardware caches. *** +********** +tensor([[2]], device='cuda:0', dtype=torch.int32) +tensor([829], device='cuda:0', dtype=torch.int32) +input_pos torch.int32 torch.Size([1]) +tensor([829], device='cuda:0', dtype=torch.int32) +causal_mask: torch.bool torch.Size([880, 880]) +tensor([[ True, False, False, ..., False, False, False], + [ True, True, False, ..., False, False, False], + [ True, True, True, ..., False, False, False], + ..., + [ True, True, True, ..., True, False, False], + [ True, True, True, ..., True, True, False], + [ True, True, True, ..., True, True, True]], device='cuda:0') +before tok_embedding torch.float32 +after tok_embedding torch.float32 +before entering layer 0 tok_embedding torch.float32 +before entering layer 1 tok_embedding torch.float32 +before entering layer 2 tok_embedding torch.float32 +before entering layer 3 tok_embedding torch.float32 +before entering layer 4 tok_embedding torch.float32 +before entering layer 5 tok_embedding torch.float32 +before entering layer 6 tok_embedding torch.float32 +before entering layer 7 tok_embedding torch.float32 +before entering layer 8 tok_embedding torch.float32 +before entering layer 9 tok_embedding torch.float32 +before entering layer 10 tok_embedding torch.float32 +before entering layer 11 tok_embedding torch.float32 +before entering layer 12 tok_embedding torch.float32 +before entering layer 13 tok_embedding torch.float32 +before entering layer 14 tok_embedding torch.float32 +before entering layer 15 tok_embedding torch.float32 +before entering layer 16 tok_embedding torch.float32 +before entering layer 17 tok_embedding torch.float32 +before entering layer 18 tok_embedding torch.float32 +before entering layer 19 tok_embedding torch.float32 +before entering layer 20 tok_embedding torch.float32 +before entering layer 21 tok_embedding torch.float32 +before entering layer 22 tok_embedding torch.float32 +before entering layer 23 tok_embedding torch.float32 +before entering layer 24 tok_embedding torch.float32 +before entering layer 25 tok_embedding torch.float32 +before entering layer 26 tok_embedding torch.float32 +before entering layer 27 tok_embedding torch.float32 +before entering layer 28 tok_embedding torch.float32 +before entering layer 29 tok_embedding torch.float32 +before entering layer 30 tok_embedding torch.float32 +before entering layer 31 tok_embedding torch.float32 + +======================================== + + + Average tokens/sec (total): 9.40 +Average tokens/sec (first token): 0.81 +Average tokens/sec (next tokens): 9.93 + +Memory used: 35.51 GB diff --git a/torchchat/generate.py b/torchchat/generate.py index b0aa49cef..52f1e45fa 100644 --- a/torchchat/generate.py +++ b/torchchat/generate.py @@ -358,8 +358,13 @@ def prefill( if batch is not None: # TODO: Verify sequential prefill works with multimodal models - logits = model(**batch)[:, -1] - return tune_sample(logits, 0, 500) + logits = model(**batch) + if model.config.model_type == ModelType.Llava: + context_len, logits = logits[0], logits[1][:, -1] + return context_len, tune_sample(logits, 0, 500) + else: + logits = logits[:, -1] + return tune_sample(logits, 0, 500) elif sequential_prefill: for i in range(width): x_sliced, ip_sliced = x[:, i].view(-1, 1), input_pos[i].view(-1) @@ -369,7 +374,7 @@ def prefill( logits = model(x) else: # input_pos: [B, S] - logits = model(x, input_pos) + logits = model(x, input_pos=input_pos) # print(f"logits {logits.shape}") # print(f"x: {x},\n input_pos: {input_pos}\n") @@ -393,7 +398,7 @@ def decode_one_token( else: logits = model(x) else: - logits = model(x, input_pos) + logits = model(x, input_pos=input_pos) # print(f"x: {x},\n input_pos: {input_pos}\n") return self.sample(logits, need_probs=need_probs, **sampling_kwargs) @@ -624,11 +629,11 @@ def generate( **sampling_kwargs, ) - # For llava, we need to extract next pos id from prefill result - if self.model.config.model_type == ModelType.Llava: - next_token, context_len = next_token + # For llava with image input, we need to extract next pos id from prefill result + if batch and self.model.config.model_type == ModelType.Llava: + context_len, next_token = next_token else: - next_token, context_len = next_token, T + context_len, next_token = T, next_token if is_speculative: self.prefill( @@ -731,11 +736,6 @@ def chat( ): if generator_args.chat_mode: print("Starting Interactive Chat") - - print("Generator Args:") - print(generator_args) - print("Builder Args:") - print(self.builder_args) if generator_args.image_prompts is not None: print("Image prompts", generator_args.image_prompts) @@ -776,14 +776,18 @@ def find_subtensor(tensor, target): image_token_indices = self.encode_tokens("", device=self.builder_args.device)[1:] index = find_subtensor(input_ids, image_token_indices) + if index == -1: + raise ValueError("Image token not found in prompt") + batch = { "tokens": input_ids[:index].unsqueeze(0), "encoder_input": llava_image_preprocess(images[0], device=self.builder_args.device, dtype=self.builder_args.precision), "post_tokens": input_ids[index + len(image_token_indices) :].unsqueeze(0), } - print("BATTTTTTTCHCHHHHHHHHH") - print(batch) - encoded = torch.cat([batch["tokens"].view(1, -1), batch["post_tokens"].view(1, -1)], dim=-1).view(-1) + + # can not get actual encoded image feature before model inference; pseudo one + pseudo_vision_encoded = torch.zeros(1, 624).to(device=self.builder_args.device, dtype=self.builder_args.precision) + encoded = torch.cat([batch["tokens"].view(1, -1), pseudo_vision_encoded, batch["post_tokens"].view(1, -1)], dim=-1).view(-1) else: encoded = self.encode_tokens( diff --git a/torchchat/model.py b/torchchat/model.py index 5b923874c..9e18a9ea0 100644 --- a/torchchat/model.py +++ b/torchchat/model.py @@ -150,7 +150,7 @@ def _get_decoder_input( ) -> Tensor: if encoder_output is None: assert post_tokens is None - return self.tok_embeddings(tokens).unsqueeze(0) + return self.tok_embeddings(tokens) else: pre_img_embed = self.tok_embeddings(tokens) image_embeds = self.mm_projector(encoder_output) @@ -158,10 +158,6 @@ def _get_decoder_input( return torch.cat((pre_img_embed, image_embeds), dim=1) post_img_embed = self.tok_embeddings(post_tokens) - print("embeddings sizes:") - print(pre_img_embed.shape) - print(image_embeds.shape) - print(post_img_embed.shape) return torch.cat((pre_img_embed, image_embeds, post_img_embed), dim=1) @@ -559,6 +555,10 @@ def reset_caches(self): class LlavaModel(Model): + def __init__(self, config: ModelArgs) -> None: + super().__init__(config) + self.text_transformer_args = self.model.decoder.config + def forward( self, tokens: Tensor, @@ -689,15 +689,11 @@ def forward(self, x: Tensor, input_pos: Optional[Tensor] = None) -> Tensor: mask = self.causal_mask[None, None, input_pos] freqs_cis = self.freqs_cis[input_pos] - print("before tok_embedding", x.dtype) - if self.tok_embeddings: x = self.tok_embeddings(x) - print("after tok_embedding", x.dtype) for idx, (_, layer) in enumerate(self.layers.items()): - print(f"before entering layer {idx} tok_embedding", x.dtype) x = layer(x, input_pos, freqs_cis, mask) if self.norm: From dfe37b82a8f269571af72c4468643694d60f51c9 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Mon, 23 Sep 2024 20:27:25 -0700 Subject: [PATCH 5/8] remove extra debug files --- llava_out.txt | 278 -- out.txt | 9523 ------------------------------------------------- 2 files changed, 9801 deletions(-) delete mode 100644 llava_out.txt delete mode 100644 out.txt diff --git a/llava_out.txt b/llava_out.txt deleted file mode 100644 index ba363dc79..000000000 --- a/llava_out.txt +++ /dev/null @@ -1,278 +0,0 @@ -Preparing input -Done, Now creating model... -Transformer config: TransformerArgs(block_size=2048, vocab_size=32064, n_layers=32, n_heads=32, dim=4096, hidden_dim=11008, n_local_heads=32, head_dim=128, rope_base=10000, norm_eps=1e-05, multiple_of=256, ffn_dim_multiplier=None, use_tiktoken=False, max_seq_length=768, rope_scaling=None, n_stages=1, stage_idx=0) -Done. Now loading checkpoint... -Done. Now checkpoint remapping... -Done. Now setup caches... -input args for precompute_freqs_cis: -n_elem: 128 -seq_len: 4096 -base: 10000 -dtype: None -rope_scaling: None -tensor([1.0000e+00, 8.6596e-01, 7.4989e-01, 6.4938e-01, 5.6234e-01, 4.8697e-01, - 4.2170e-01, 3.6517e-01, 3.1623e-01, 2.7384e-01, 2.3714e-01, 2.0535e-01, - 1.7783e-01, 1.5399e-01, 1.3335e-01, 1.1548e-01, 1.0000e-01, 8.6596e-02, - 7.4989e-02, 6.4938e-02, 5.6234e-02, 4.8697e-02, 4.2170e-02, 3.6517e-02, - 3.1623e-02, 2.7384e-02, 2.3714e-02, 2.0535e-02, 1.7783e-02, 1.5399e-02, - 1.3335e-02, 1.1548e-02, 1.0000e-02, 8.6596e-03, 7.4989e-03, 6.4938e-03, - 5.6234e-03, 4.8697e-03, 4.2170e-03, 3.6517e-03, 3.1623e-03, 2.7384e-03, - 2.3714e-03, 2.0535e-03, 1.7783e-03, 1.5399e-03, 1.3335e-03, 1.1548e-03, - 1.0000e-03, 8.6596e-04, 7.4989e-04, 6.4938e-04, 5.6234e-04, 4.8697e-04, - 4.2170e-04, 3.6517e-04, 3.1623e-04, 2.7384e-04, 2.3714e-04, 2.0535e-04, - 1.7783e-04, 1.5399e-04, 1.3335e-04, 1.1548e-04], device='cuda:0') -tensor([ 0, 1, 2, ..., 4093, 4094, 4095], device='cuda:0') -Done. Now running prefilling inference... -Hidden state before layer 0 is: -torch.Size([1, 636, 4096]) -tensor([[[ 0.0045, -0.0038, 0.0017, ..., -0.0088, 0.0025, -0.0025], - [-0.0112, -0.0129, -0.0121, ..., 0.0090, 0.0118, -0.0081], - [ 0.0195, -0.0058, 0.0061, ..., 0.0171, -0.0052, -0.0212], - ..., - [-0.0187, -0.0017, 0.0177, ..., 0.0238, 0.0052, 0.0101], - [ 0.0066, -0.0161, 0.0117, ..., -0.0103, 0.0148, 0.0073], - [ 0.0039, 0.0015, 0.0055, ..., -0.0042, 0.0151, 0.0024]]], - device='cuda:0', grad_fn=) -**************************************************************************************************** -q and k, before apply_rotary_emb: -torch.Size([1, 636, 32, 128]) -tensor([[[[ 8.8137e-02, -2.7958e-01, 5.8506e-01, ..., -2.7226e+00, - 1.4709e+00, -2.3085e-01], - [-1.3625e-01, 2.3757e-01, -5.1018e-01, ..., 4.1010e-01, - -5.6383e-01, 4.5404e-01], - [-3.4834e-03, 3.2426e-01, 3.8369e-01, ..., 2.9280e-01, - 2.8449e-01, 2.7460e-01], - ..., - [ 5.8234e-01, -1.2618e+00, -3.6595e-01, ..., 1.3358e+00, - 4.3184e-01, 1.0434e+00], - [ 5.9955e-01, 7.9891e-01, -4.6955e-01, ..., -4.0199e-01, - 4.2534e-01, 3.9606e-01], - [-1.1633e+00, 1.2683e+00, -2.0897e-01, ..., -1.1549e-01, - 5.5778e-01, 1.6868e-01]], - - [[ 1.2024e-01, -1.0154e+00, 1.6093e+00, ..., -5.5972e-01, - 3.1489e-01, -8.1850e-01], - [ 4.2993e-01, 2.0342e-01, -1.0682e+00, ..., -2.5782e-01, - 1.5268e-02, -2.4300e-01], - [-5.3137e-01, -6.0827e-01, -7.6472e-01, ..., -3.1512e-01, - -2.7736e-01, -2.5732e-01], - ..., - [ 2.8136e-01, -5.2945e-01, -1.7826e-01, ..., 1.1955e+00, - 5.4279e-01, 8.5956e-01], - [ 5.3614e-01, 2.0246e+00, -1.2608e+00, ..., -3.7489e-01, - 6.3723e-01, 6.5132e-01], - [-1.8604e+00, -1.2568e+00, -1.6225e+00, ..., -6.7318e-02, - 1.0279e-01, 9.0100e-02]], - - [[ 7.2795e-02, -1.4323e+00, 2.2321e+00, ..., -7.5136e-01, - 5.6175e-01, -9.5831e-01], - [ 1.1465e-01, 1.6517e-01, -4.7936e-01, ..., 4.2424e-01, - -9.5693e-01, 5.2483e-01], - [ 5.8809e-01, -2.5397e-02, -8.4822e-01, ..., 4.9272e-01, - 4.7836e-01, 5.0513e-01], - ..., - [ 4.3381e-01, -3.1210e+00, -1.0508e+00, ..., 1.3402e+00, - 1.0287e+00, 2.3169e+00], - [ 9.3764e-01, 2.3067e+00, -1.5072e+00, ..., -3.8047e-01, - 6.4796e-01, 6.6232e-01], - [-9.0192e-01, -3.0670e-01, -8.5899e-01, ..., -1.1170e-01, - 3.3573e-01, -6.0066e-01]], - - ..., - - [[ 1.8439e-01, -1.0198e+00, 1.9433e+00, ..., -9.0137e-01, - 7.8104e-01, -1.0152e+00], - [ 1.5872e-01, 8.4707e-02, -6.7571e-01, ..., 3.9261e-01, - -6.9446e-01, 4.2872e-01], - [ 3.6119e-01, 3.0833e-01, -3.7900e-01, ..., 8.6110e-02, - 8.1632e-02, 1.0222e-01], - ..., - [-1.6853e-02, -1.1853e+00, -2.1825e-01, ..., 1.2019e+00, - 3.7029e-01, 1.3317e+00], - [ 7.4456e-01, 1.8874e+00, -1.2402e+00, ..., -3.5604e-01, - 6.1179e-01, 6.2754e-01], - [-1.2021e+00, -6.7119e-01, -1.1290e+00, ..., -4.6737e-02, - -2.0528e-02, -1.7886e-01]], - - [[ 3.5354e-01, -8.3204e-01, 1.7218e+00, ..., -7.6154e-01, - 6.9443e-01, -8.8789e-01], - [ 1.0955e-01, 9.3493e-03, -6.4744e-01, ..., 3.6177e-01, - -6.0106e-01, 3.8051e-01], - [ 3.2930e-01, 2.9724e-01, -3.5446e-01, ..., -1.3412e-03, - -8.3974e-03, 7.1401e-03], - ..., - [-1.1190e-01, -7.5404e-01, -1.2496e-01, ..., 1.2401e+00, - 3.3029e-01, 1.1143e+00], - [ 6.3004e-01, 1.7134e+00, -1.1184e+00, ..., -3.2237e-01, - 5.6728e-01, 5.7957e-01], - [-1.2236e+00, -6.8457e-01, -1.1400e+00, ..., -2.8917e-02, - -2.6101e-02, -1.6053e-01]], - - [[ 2.2438e-01, 4.1760e-01, 3.5730e-01, ..., -4.1907e-01, - 1.9779e-01, -3.7496e-01], - [ 3.3656e-01, -5.3128e-02, -1.4364e+00, ..., -2.9568e-01, - 2.5090e-01, -3.4443e-01], - [-5.7520e-01, -2.8830e-01, -1.8663e-01, ..., -3.3193e-01, - -2.9675e-01, -2.8231e-01], - ..., - [ 2.1342e-01, -3.2742e-01, -5.2910e-02, ..., 1.8387e+00, - 4.8990e-01, 8.7483e-01], - [ 4.3541e-01, 1.9233e+00, -1.2351e+00, ..., -3.8134e-01, - 6.0760e-01, 6.1868e-01], - [-2.0587e+00, -1.5213e+00, -1.8633e+00, ..., -9.8056e-02, - 1.6597e-01, 3.9716e-01]]]], device='cuda:0', - grad_fn=) -torch.Size([1, 636, 32, 128]) -tensor([[[[-0.4032, -0.0167, 0.0300, ..., 0.0826, -0.0706, -0.0707], - [ 1.1458, 0.9121, -0.3251, ..., 0.5441, -0.2957, 0.5221], - [ 0.0237, -0.2481, -0.3345, ..., -0.1705, 0.3765, 0.5979], - ..., - [-0.0205, -0.0151, 0.0137, ..., 0.2737, 0.8049, -0.4081], - [ 0.1832, -0.4518, -0.2057, ..., -0.1818, 0.0770, 0.0802], - [-0.3089, 1.2236, 0.1609, ..., 0.6910, 0.1255, 0.2735]], - - [[-0.2899, -0.0028, 0.0520, ..., 0.2636, -0.2013, 0.2681], - [ 0.6155, 0.8224, 0.3304, ..., -0.4969, 0.1219, -0.3481], - [-0.2023, 0.1655, 0.3203, ..., 1.6260, 1.6661, 1.5222], - ..., - [-1.2277, -0.3016, -0.8174, ..., -0.5467, -0.9489, -1.1181], - [-0.4164, -0.4066, -0.4455, ..., -0.3053, 0.0722, 0.0750], - [ 0.0506, -0.1478, -0.3156, ..., -1.2014, 0.4634, -0.1019]], - - [[ 0.3341, -0.5947, 0.6357, ..., 0.0513, 0.2727, 0.1729], - [ 1.0427, 0.6447, -0.7157, ..., -0.0696, 0.3202, -0.1045], - [-0.4113, -0.5721, -0.3090, ..., -0.4902, -0.5272, -0.4597], - ..., - [-0.2218, 0.0770, -0.0610, ..., -0.1540, 0.4397, -0.2679], - [ 0.2970, -0.5468, -1.3979, ..., 0.4245, -0.3060, -0.3003], - [-1.2661, -1.0212, -1.4402, ..., 1.4839, -0.7005, 0.4571]], - - ..., - - [[ 0.4322, -0.5403, 0.3777, ..., 0.1544, 0.0359, 0.1695], - [ 0.8611, 0.6608, -0.4672, ..., 0.4453, 0.0383, 0.3679], - [ 0.1159, -0.2886, -0.3874, ..., -0.8130, -0.8705, -0.8258], - ..., - [-0.5046, -0.0915, -0.3287, ..., -0.2244, -0.0374, -0.5486], - [ 0.3189, -0.4997, -0.9601, ..., 0.3986, -0.3088, -0.3109], - [-0.6580, -0.5876, -1.0235, ..., 0.7111, -0.6454, 0.2257]], - - [[-0.0356, 0.1517, -0.2355, ..., 0.3998, -0.2553, 0.4541], - [ 0.7241, 0.6216, -0.2981, ..., 0.3790, 0.0313, 0.3070], - [ 0.1365, -0.2315, -0.2407, ..., -0.7944, -0.8674, -0.8289], - ..., - [-0.5110, -0.1377, -0.3427, ..., -0.1021, -0.0801, -0.5258], - [ 0.2288, -0.2831, -0.8728, ..., 0.2669, -0.1687, -0.1729], - [-0.3816, -0.4067, -0.7615, ..., 0.4735, -0.5601, 0.1255]], - - [[-0.1380, -0.6229, 0.3423, ..., 0.1316, -0.1960, 0.2663], - [ 0.6976, 1.1722, 0.2912, ..., 0.2839, -0.1349, 0.3306], - [ 0.0694, 0.2533, 0.1255, ..., 1.4579, 1.5409, 1.4390], - ..., - [-1.4628, -0.3196, -1.0152, ..., -0.4717, -1.1734, -1.2208], - [-0.5177, -0.4918, -0.5488, ..., -0.5367, 0.2164, 0.2186], - [ 0.4723, 0.3053, 0.1405, ..., -1.6717, 0.3979, -0.3566]]]], - device='cuda:0', grad_fn=) -q and k, after apply_rotary_emb: -torch.Size([1, 636, 32, 128]) -tensor([[[[ 0.0881, -0.2796, 0.5851, ..., -2.7226, 1.4709, -0.2308], - [-0.1363, 0.2376, -0.5102, ..., 0.4101, -0.5638, 0.4540], - [-0.0035, 0.3243, 0.3837, ..., 0.2928, 0.2845, 0.2746], - ..., - [ 0.5823, -1.2618, -0.3660, ..., 1.3358, 0.4318, 1.0434], - [ 0.5995, 0.7989, -0.4695, ..., -0.4020, 0.4253, 0.3961], - [-1.1633, 1.2683, -0.2090, ..., -0.1155, 0.5578, 0.1687]], - - [[ 0.9194, -0.4475, 0.5493, ..., -0.5596, 0.3150, -0.8185], - [ 0.0611, 0.4717, -0.4997, ..., -0.2578, 0.0153, -0.2430], - [ 0.2247, -0.7758, -0.5275, ..., -0.3152, -0.2773, -0.2574], - ..., - [ 0.5975, -0.0493, -0.0846, ..., 1.1953, 0.5427, 0.8596], - [-1.4139, 1.5450, 0.5793, ..., -0.3748, 0.6372, 0.6514], - [ 0.0524, -2.2445, -0.1922, ..., -0.0674, 0.1028, 0.0901]], - - [[ 1.2721, 0.6622, -1.6639, ..., -0.7511, 0.5620, -0.9582], - [-0.1979, 0.0355, 0.1847, ..., 0.4243, -0.9571, 0.5246], - [-0.2216, 0.5453, 1.0939, ..., 0.4928, 0.4782, 0.5052], - ..., - [ 2.6574, 1.6933, -0.3208, ..., 1.3397, 1.0281, 2.3172], - [-2.4877, -0.1074, 2.4119, ..., -0.3803, 0.6478, 0.6625], - [ 0.6542, -0.6925, 0.6528, ..., -0.1118, 0.3359, -0.6006]], - - ..., - - [[-1.0250, -0.1528, -0.9315, ..., -0.8048, 0.8531, -0.9555], - [ 0.0798, -0.1613, 0.1087, ..., 0.3938, -0.7239, 0.3769], - [ 0.2970, -0.3706, 0.7458, ..., 0.0809, 0.0739, 0.1079], - ..., - [-1.1842, 0.0535, -0.0299, ..., 1.0856, 0.2720, 1.3552], - [ 1.8635, -0.8026, 1.8737, ..., -0.2931, 0.5643, 0.6705], - [-0.6337, 1.2223, 0.5544, ..., -0.0724, -0.0074, -0.1799]], - - [[-0.1795, -0.8860, -1.8381, ..., -0.6720, 0.7575, -0.8347], - [ 0.0956, -0.0543, 0.5367, ..., 0.3598, -0.6273, 0.3355], - [ 0.4397, 0.0586, 0.7529, ..., -0.0108, -0.0089, 0.0065], - ..., - [-0.5191, -0.5583, 0.0824, ..., 1.1312, 0.2479, 1.1355], - [ 1.4893, 1.0558, 2.0221, ..., -0.2650, 0.5234, 0.6195], - [-1.3962, 0.1283, 1.2648, ..., -0.0525, -0.0143, -0.1620]], - - [[ 0.0449, 0.4719, -0.3696, ..., -0.3603, 0.2247, -0.3595], - [ 0.3308, 0.0816, 1.4085, ..., -0.2712, 0.2755, -0.3251], - [-0.4184, -0.4888, 0.2117, ..., -0.3717, -0.2753, -0.3033], - ..., - [ 0.3237, -0.2190, 0.0355, ..., 1.7209, 0.4245, 0.9084], - [-0.3446, 1.9417, 1.0332, ..., -0.3204, 0.5606, 0.6615], - [-1.3076, -2.2007, 1.7241, ..., -0.1305, 0.1364, 0.4083]]]], - device='cuda:0', grad_fn=) -torch.Size([1, 636, 32, 128]) -tensor([[[[-0.4032, -0.0167, 0.0300, ..., 0.0826, -0.0706, -0.0707], - [ 1.1458, 0.9121, -0.3251, ..., 0.5441, -0.2957, 0.5221], - [ 0.0237, -0.2481, -0.3345, ..., -0.1705, 0.3765, 0.5979], - ..., - [-0.0205, -0.0151, 0.0137, ..., 0.2737, 0.8049, -0.4081], - [ 0.1832, -0.4518, -0.2057, ..., -0.1818, 0.0770, 0.0802], - [-0.3089, 1.2236, 0.1609, ..., 0.6910, 0.1255, 0.2735]], - - [[-0.1543, -0.2455, 0.4159, ..., 0.2636, -0.2013, 0.2681], - [-0.3594, 0.9623, 0.6977, ..., -0.4968, 0.1220, -0.3481], - [-0.2486, -0.0808, 0.1940, ..., 1.6261, 1.6660, 1.5224], - ..., - [-0.4096, -1.1961, -1.5964, ..., -0.5465, -0.9488, -1.1182], - [ 0.1171, -0.5701, -0.2101, ..., -0.3053, 0.0722, 0.0750], - [ 0.1517, -0.0373, -0.1310, ..., -1.2013, 0.4634, -0.1018]], - - [[ 0.4018, 0.5513, -0.0035, ..., 0.0514, 0.2727, 0.1729], - [-1.0201, 0.6798, 0.7916, ..., -0.0699, 0.3202, -0.1044], - [ 0.6914, -0.1359, -0.4204, ..., -0.4903, -0.5271, -0.4598], - ..., - [ 0.0223, -0.2337, 0.3496, ..., -0.1539, 0.4398, -0.2678], - [ 0.3736, 0.4976, 0.1018, ..., 0.4244, -0.3059, -0.3004], - [ 1.4554, -0.7262, 1.1798, ..., 1.4838, -0.7006, 0.4569]], - - ..., - - [[-0.5534, -0.4153, 0.1699, ..., 0.1490, 0.0235, 0.1717], - [ 0.6339, -0.8811, 0.5064, ..., 0.3630, 0.0113, 0.3697], - [-0.2921, -0.1069, -0.0323, ..., -0.8665, -0.8079, -0.8872], - ..., - [-0.0759, 0.5072, -0.4380, ..., -0.1906, 0.0027, -0.5499], - [-0.5093, -0.3033, -0.3336, ..., 0.3695, -0.2852, -0.3327], - [-0.5670, 0.6759, 0.3940, ..., 0.6923, -0.6601, 0.1780]], - - [[ 0.0565, 0.1452, 0.6558, ..., 0.3668, -0.2878, 0.4342], - [ 0.9488, 0.1026, 0.5222, ..., 0.3105, 0.0088, 0.3085], - [-0.0185, -0.2681, 0.1706, ..., -0.8421, -0.8044, -0.8901], - ..., - [-0.4992, 0.1758, -0.0446, ..., -0.0725, -0.0414, -0.5302], - [ 0.0284, -0.3629, 0.5567, ..., 0.2499, -0.1556, -0.1848], - [-0.5448, -0.1193, 0.7166, ..., 0.4652, -0.5677, 0.0842]], - - [[ 0.1144, -0.6277, -0.3179, ..., 0.1037, -0.2150, 0.2512], - [ 0.1883, 1.3510, -0.3788, ..., 0.3876, -0.1587, 0.3198], - [-0.0343, 0.2604, -0.1608, ..., 1.5246, 1.4313, 1.5480], - ..., - [-1.2244, -0.8619, 1.1993, ..., -0.3920, -1.0808, -1.3035], - [-0.2864, -0.6541, 0.5345, ..., -0.5218, 0.1998, 0.2339], - [ 0.3169, 0.4646, -0.0869, ..., -1.6035, 0.4230, -0.3265]]]], - device='cuda:0', grad_fn=) diff --git a/out.txt b/out.txt deleted file mode 100644 index c9df43314..000000000 --- a/out.txt +++ /dev/null @@ -1,9523 +0,0 @@ -Using device=cuda NVIDIA PG509-210 -Loading model... -Time to load model: 6.96 seconds ------------------------------------------------------------ -Generator Args: -GeneratorArgs(prompt="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: What are the things I should be cautious about when I visit here? ASSISTANT:", encoded_prompt=None, image_prompts=['../view.jpg'], chat_mode=False, gui_mode=False, num_samples=1, max_new_tokens=200, top_k=200, temperature=0.8, compile=False, compile_prefill=False, speculate_k=5, sequential_prefill=False, max_autotune=False, is_torchtune_model=False) -Builder Args: -BuilderArgs(checkpoint_path=PosixPath('/home/gasoonjia/.torchchat/model-cache/llava-hf/llava-1.5-7b-hf/model.pth'), checkpoint_dir=None, dcp_dir=None, params_path=None, params_table='llava-1.5', gguf_path=None, gguf_kwargs=None, dso_path=None, pte_path=None, device='cuda', precision=torch.float32, setup_caches=False, use_distributed=False, is_chat_model=False, prefill_possible=True, dynamic_shapes=False, max_seq_length=None) -Image prompts ['../view.jpg'] -BATTTTTTTCHCHHHHHHHHH -{'tokens': tensor([[ 1, 319, 13563, 1546, 263, 12758, 5199, 322, 385, 23116, - 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, - 322, 1248, 568, 6089, 304, 278, 5199, 29915, 29879, 5155, - 29889, 3148, 1001, 29901]], device='cuda:0', dtype=torch.int32), 'encoder_input': tensor([[[[-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923], - [-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923], - [-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923], - ..., - [-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923], - [-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923], - [-1.7923, -1.7923, -1.7923, ..., -1.7923, -1.7923, -1.7923]], - - [[-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521], - [-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521], - [-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521], - ..., - [-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521], - [-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521], - [-1.7521, -1.7521, -1.7521, ..., -1.7521, -1.7521, -1.7521]], - - [[-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802], - [-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802], - [-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802], - ..., - [-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802], - [-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802], - [-1.4802, -1.4802, -1.4802, ..., -1.4802, -1.4802, -1.4802]]]], - device='cuda:0'), 'post_tokens': tensor([[ 1724, 526, 278, 2712, 306, 881, 367, 274, 1300, 2738, - 1048, 746, 306, 6493, 1244, 29973, 319, 1799, 9047, 13566, - 29901]], device='cuda:0', dtype=torch.int32)} -5777777666666666 879 -embeddings sizes: -torch.Size([1, 34, 4096]) -torch.Size([1, 576, 4096]) -torch.Size([1, 21, 4096]) -input_pos torch.int32 torch.Size([631]) -tensor([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, - 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, - 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, - 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, - 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, - 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, - 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, - 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, - 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, - 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, - 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, - 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, - 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, - 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, - 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, - 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, - 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, - 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, - 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, - 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, - 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, - 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, - 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, - 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, - 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, - 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, - 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, - 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, - 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, - 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, - 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, - 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, - 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, - 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, - 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, - 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, - 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, - 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, - 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, - 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, - 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, - 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, - 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, - 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, - 630], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -(631, tensor([[[ 1.4072, 0.7952, -0.3502, ..., 0.5492, 0.5503, 0.5589], - [-9.5823, -5.0563, -1.2308, ..., -0.5064, -0.5130, -0.5142], - [-5.5127, -6.3697, 8.1133, ..., 0.0801, 0.0749, 0.0935], - ..., - [-6.2617, -3.9745, 5.8128, ..., 0.1045, 0.1087, 0.1012], - [-4.2650, -2.9318, 7.1847, ..., -0.1931, -0.1915, -0.1842], - [-1.8455, -2.3284, 8.4773, ..., 0.0422, 0.0611, 0.0587]]], - device='cuda:0')) -********** -tensor([[1932]], device='cuda:0', dtype=torch.int32) -tensor([631], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([631], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[6493]], device='cuda:0', dtype=torch.int32) -tensor([632], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([632], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions. USER: What are the things I should be cautious about when I visit here? ASSISTANT: When visiting********** -tensor([[292]], device='cuda:0', dtype=torch.int32) -tensor([633], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([633], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[263]], device='cuda:0', dtype=torch.int32) -tensor([634], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([634], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[325]], device='cuda:0', dtype=torch.int32) -tensor([635], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([635], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[4626]], device='cuda:0', dtype=torch.int32) -tensor([636], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([636], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - a vibrant********** -tensor([[424]], device='cuda:0', dtype=torch.int32) -tensor([637], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([637], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[2058]], device='cuda:0', dtype=torch.int32) -tensor([638], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([638], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[411]], device='cuda:0', dtype=torch.int32) -tensor([639], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([639], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[2381]], device='cuda:0', dtype=torch.int32) -tensor([640], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([640], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - place with swir********** -tensor([[381]], device='cuda:0', dtype=torch.int32) -tensor([641], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([641], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1847]], device='cuda:0', dtype=torch.int32) -tensor([642], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([642], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[322]], device='cuda:0', dtype=torch.int32) -tensor([643], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([643], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[325]], device='cuda:0', dtype=torch.int32) -tensor([644], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([644], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -ling and vibr********** -tensor([[4626]], device='cuda:0', dtype=torch.int32) -tensor([645], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([645], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[424]], device='cuda:0', dtype=torch.int32) -tensor([646], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([646], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[11955]], device='cuda:0', dtype=torch.int32) -tensor([647], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([647], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[763]], device='cuda:0', dtype=torch.int32) -tensor([648], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([648], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -ant colors like the********** -tensor([[278]], device='cuda:0', dtype=torch.int32) -tensor([649], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([649], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[697]], device='cuda:0', dtype=torch.int32) -tensor([650], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([650], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[3595]], device='cuda:0', dtype=torch.int32) -tensor([651], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([651], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[297]], device='cuda:0', dtype=torch.int32) -tensor([652], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([652], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - one seen in the********** -tensor([[278]], device='cuda:0', dtype=torch.int32) -tensor([653], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([653], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1967]], device='cuda:0', dtype=torch.int32) -tensor([654], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([654], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29892]], device='cuda:0', dtype=torch.int32) -tensor([655], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([655], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[727]], device='cuda:0', dtype=torch.int32) -tensor([656], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([656], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - image, there are********** -tensor([[526]], device='cuda:0', dtype=torch.int32) -tensor([657], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([657], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[263]], device='cuda:0', dtype=torch.int32) -tensor([658], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([658], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[2846]], device='cuda:0', dtype=torch.int32) -tensor([659], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([659], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[2712]], device='cuda:0', dtype=torch.int32) -tensor([660], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([660], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - a few things you********** -tensor([[366]], device='cuda:0', dtype=torch.int32) -tensor([661], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([661], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[881]], device='cuda:0', dtype=torch.int32) -tensor([662], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([662], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[367]], device='cuda:0', dtype=torch.int32) -tensor([663], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([663], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[274]], device='cuda:0', dtype=torch.int32) -tensor([664], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([664], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - should be caut********** -tensor([[1300]], device='cuda:0', dtype=torch.int32) -tensor([665], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([665], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[2738]], device='cuda:0', dtype=torch.int32) -tensor([666], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([666], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1048]], device='cuda:0', dtype=torch.int32) -tensor([667], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([667], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29901]], device='cuda:0', dtype=torch.int32) -tensor([668], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([668], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -ious about: -********** -tensor([[13]], device='cuda:0', dtype=torch.int32) -tensor([669], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([669], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[13]], device='cuda:0', dtype=torch.int32) -tensor([670], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([670], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29896]], device='cuda:0', dtype=torch.int32) -tensor([671], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([671], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29889]], device='cuda:0', dtype=torch.int32) -tensor([672], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([672], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - -1. Saf********** -tensor([[14795]], device='cuda:0', dtype=torch.int32) -tensor([673], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([673], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[3305]], device='cuda:0', dtype=torch.int32) -tensor([674], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([674], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29901]], device='cuda:0', dtype=torch.int32) -tensor([675], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([675], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1522]], device='cuda:0', dtype=torch.int32) -tensor([676], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([676], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -ety: Be aware********** -tensor([[9543]], device='cuda:0', dtype=torch.int32) -tensor([677], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([677], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[310]], device='cuda:0', dtype=torch.int32) -tensor([678], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([678], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[596]], device='cuda:0', dtype=torch.int32) -tensor([679], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([679], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[8388]], device='cuda:0', dtype=torch.int32) -tensor([680], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([680], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - of your surround********** -tensor([[618]], device='cuda:0', dtype=torch.int32) -tensor([681], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([681], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[886]], device='cuda:0', dtype=torch.int32) -tensor([682], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([682], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[322]], device='cuda:0', dtype=torch.int32) -tensor([683], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([683], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[6505]], device='cuda:0', dtype=torch.int32) -tensor([684], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([684], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -ings and watch out********** -tensor([[714]], device='cuda:0', dtype=torch.int32) -tensor([685], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([685], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[363]], device='cuda:0', dtype=torch.int32) -tensor([686], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([686], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[738]], device='cuda:0', dtype=torch.int32) -tensor([687], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([687], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[7037]], device='cuda:0', dtype=torch.int32) -tensor([688], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([688], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - for any potential ha********** -tensor([[447]], device='cuda:0', dtype=torch.int32) -tensor([689], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([689], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29920]], device='cuda:0', dtype=torch.int32) -tensor([690], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([690], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[3163]], device='cuda:0', dtype=torch.int32) -tensor([691], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([691], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29889]], device='cuda:0', dtype=torch.int32) -tensor([692], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([692], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -zards. The********** -tensor([[450]], device='cuda:0', dtype=torch.int32) -tensor([693], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([693], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1967]], device='cuda:0', dtype=torch.int32) -tensor([694], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([694], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[5680]], device='cuda:0', dtype=torch.int32) -tensor([695], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([695], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[263]], device='cuda:0', dtype=torch.int32) -tensor([696], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([696], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - image features a color********** -tensor([[2927]], device='cuda:0', dtype=torch.int32) -tensor([697], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([697], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1319]], device='cuda:0', dtype=torch.int32) -tensor([698], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([698], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[4766]], device='cuda:0', dtype=torch.int32) -tensor([699], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([699], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[287]], device='cuda:0', dtype=torch.int32) -tensor([700], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([700], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -ful patterned wall********** -tensor([[10090]], device='cuda:0', dtype=torch.int32) -tensor([701], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([701], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29892]], device='cuda:0', dtype=torch.int32) -tensor([702], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([702], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[577]], device='cuda:0', dtype=torch.int32) -tensor([703], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([703], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[7344]], device='cuda:0', dtype=torch.int32) -tensor([704], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([704], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -, so maintaining********** -tensor([[292]], device='cuda:0', dtype=torch.int32) -tensor([705], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([705], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[9109]], device='cuda:0', dtype=torch.int32) -tensor([706], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([706], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[22049]], device='cuda:0', dtype=torch.int32) -tensor([707], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([707], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[5418]], device='cuda:0', dtype=torch.int32) -tensor([708], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([708], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - safe walking distance and********** -tensor([[322]], device='cuda:0', dtype=torch.int32) -tensor([709], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([709], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[5146]], device='cuda:0', dtype=torch.int32) -tensor([710], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([710], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[292]], device='cuda:0', dtype=torch.int32) -tensor([711], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([711], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[8570]], device='cuda:0', dtype=torch.int32) -tensor([712], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([712], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - paying attention to********** -tensor([[304]], device='cuda:0', dtype=torch.int32) -tensor([713], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([713], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[278]], device='cuda:0', dtype=torch.int32) -tensor([714], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([714], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[11904]], device='cuda:0', dtype=torch.int32) -tensor([715], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([715], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[7101]], device='cuda:0', dtype=torch.int32) -tensor([716], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([716], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - the floor surface is********** -tensor([[338]], device='cuda:0', dtype=torch.int32) -tensor([717], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([717], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[18853]], device='cuda:0', dtype=torch.int32) -tensor([718], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([718], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29889]], device='cuda:0', dtype=torch.int32) -tensor([719], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([719], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[13]], device='cuda:0', dtype=torch.int32) -tensor([720], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([720], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - essential. - -********** -tensor([[13]], device='cuda:0', dtype=torch.int32) -tensor([721], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([721], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29906]], device='cuda:0', dtype=torch.int32) -tensor([722], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([722], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29889]], device='cuda:0', dtype=torch.int32) -tensor([723], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([723], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[12790]], device='cuda:0', dtype=torch.int32) -tensor([724], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([724], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -2. Lighting********** -tensor([[292]], device='cuda:0', dtype=torch.int32) -tensor([725], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([725], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29901]], device='cuda:0', dtype=torch.int32) -tensor([726], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([726], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[512]], device='cuda:0', dtype=torch.int32) -tensor([727], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([727], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[325]], device='cuda:0', dtype=torch.int32) -tensor([728], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([728], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -: In vibr********** -tensor([[4626]], device='cuda:0', dtype=torch.int32) -tensor([729], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([729], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[424]], device='cuda:0', dtype=torch.int32) -tensor([730], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([730], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[23136]], device='cuda:0', dtype=torch.int32) -tensor([731], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([731], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29892]], device='cuda:0', dtype=torch.int32) -tensor([732], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([732], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -ant environments, it********** -tensor([[372]], device='cuda:0', dtype=torch.int32) -tensor([733], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([733], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1795]], device='cuda:0', dtype=torch.int32) -tensor([734], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([734], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[367]], device='cuda:0', dtype=torch.int32) -tensor([735], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([735], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[5189]], device='cuda:0', dtype=torch.int32) -tensor([736], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([736], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - might be difficult to********** -tensor([[304]], device='cuda:0', dtype=torch.int32) -tensor([737], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([737], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1074]], device='cuda:0', dtype=torch.int32) -tensor([738], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([738], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[278]], device='cuda:0', dtype=torch.int32) -tensor([739], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([739], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[11904]], device='cuda:0', dtype=torch.int32) -tensor([740], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([740], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - see the floor clearly********** -tensor([[9436]], device='cuda:0', dtype=torch.int32) -tensor([741], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([741], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[2861]], device='cuda:0', dtype=torch.int32) -tensor([742], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([742], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[304]], device='cuda:0', dtype=torch.int32) -tensor([743], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([743], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[278]], device='cuda:0', dtype=torch.int32) -tensor([744], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([744], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - due to the bright********** -tensor([[11785]], device='cuda:0', dtype=torch.int32) -tensor([745], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([745], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[11955]], device='cuda:0', dtype=torch.int32) -tensor([746], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([746], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29889]], device='cuda:0', dtype=torch.int32) -tensor([747], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([747], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[22521]], device='cuda:0', dtype=torch.int32) -tensor([748], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([748], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - colors. Ensure********** -tensor([[545]], device='cuda:0', dtype=torch.int32) -tensor([749], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([749], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[393]], device='cuda:0', dtype=torch.int32) -tensor([750], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([750], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[278]], device='cuda:0', dtype=torch.int32) -tensor([751], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([751], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[4038]], device='cuda:0', dtype=torch.int32) -tensor([752], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([752], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - that the area has********** -tensor([[756]], device='cuda:0', dtype=torch.int32) -tensor([753], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([753], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[19967]], device='cuda:0', dtype=torch.int32) -tensor([754], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([754], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[339]], device='cuda:0', dtype=torch.int32) -tensor([755], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([755], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[403]], device='cuda:0', dtype=torch.int32) -tensor([756], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([756], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - adequate light********** -tensor([[3578]], device='cuda:0', dtype=torch.int32) -tensor([757], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([757], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[292]], device='cuda:0', dtype=torch.int32) -tensor([758], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([758], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[304]], device='cuda:0', dtype=torch.int32) -tensor([759], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([759], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[5557]], device='cuda:0', dtype=torch.int32) -tensor([760], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([760], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -ing to prevent acc********** -tensor([[1035]], device='cuda:0', dtype=torch.int32) -tensor([761], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([761], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[16719]], device='cuda:0', dtype=torch.int32) -tensor([762], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([762], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[322]], device='cuda:0', dtype=torch.int32) -tensor([763], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([763], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[9801]], device='cuda:0', dtype=torch.int32) -tensor([764], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([764], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -idents and ensure safety********** -tensor([[15332]], device='cuda:0', dtype=torch.int32) -tensor([765], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([765], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[363]], device='cuda:0', dtype=torch.int32) -tensor([766], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([766], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[14332]], device='cuda:0', dtype=torch.int32) -tensor([767], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([767], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29889]], device='cuda:0', dtype=torch.int32) -tensor([768], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([768], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - for everyone. -********** -tensor([[13]], device='cuda:0', dtype=torch.int32) -tensor([769], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([769], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[13]], device='cuda:0', dtype=torch.int32) -tensor([770], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([770], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29941]], device='cuda:0', dtype=torch.int32) -tensor([771], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([771], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29889]], device='cuda:0', dtype=torch.int32) -tensor([772], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([772], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - -3. Ex********** -tensor([[1222]], device='cuda:0', dtype=torch.int32) -tensor([773], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([773], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[572]], device='cuda:0', dtype=torch.int32) -tensor([774], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([774], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[12418]], device='cuda:0', dtype=torch.int32) -tensor([775], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([775], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29901]], device='cuda:0', dtype=torch.int32) -tensor([776], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([776], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -ploration: If********** -tensor([[960]], device='cuda:0', dtype=torch.int32) -tensor([777], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([777], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[278]], device='cuda:0', dtype=torch.int32) -tensor([778], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([778], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[4038]], device='cuda:0', dtype=torch.int32) -tensor([779], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([779], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[338]], device='cuda:0', dtype=torch.int32) -tensor([780], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([780], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - the area is a********** -tensor([[263]], device='cuda:0', dtype=torch.int32) -tensor([781], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([781], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[5613]], device='cuda:0', dtype=torch.int32) -tensor([782], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([782], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[17570]], device='cuda:0', dtype=torch.int32) -tensor([783], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([783], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29892]], device='cuda:0', dtype=torch.int32) -tensor([784], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([784], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - natural habitat, avoid********** -tensor([[4772]], device='cuda:0', dtype=torch.int32) -tensor([785], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([785], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29543]], device='cuda:0', dtype=torch.int32) -tensor([786], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([786], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[292]], device='cuda:0', dtype=torch.int32) -tensor([787], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([787], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[470]], device='cuda:0', dtype=torch.int32) -tensor([788], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([788], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - disturbing or dam********** -tensor([[5625]], device='cuda:0', dtype=torch.int32) -tensor([789], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([789], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[6751]], device='cuda:0', dtype=torch.int32) -tensor([790], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([790], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[278]], device='cuda:0', dtype=torch.int32) -tensor([791], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([791], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[5177]], device='cuda:0', dtype=torch.int32) -tensor([792], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([792], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -aging the environment.********** -tensor([[29889]], device='cuda:0', dtype=torch.int32) -tensor([793], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([793], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[7419]], device='cuda:0', dtype=torch.int32) -tensor([794], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([794], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[363]], device='cuda:0', dtype=torch.int32) -tensor([795], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([795], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[738]], device='cuda:0', dtype=torch.int32) -tensor([796], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([796], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - Look for any signs********** -tensor([[18906]], device='cuda:0', dtype=torch.int32) -tensor([797], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([797], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[393]], device='cuda:0', dtype=torch.int32) -tensor([798], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([798], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[12266]], device='cuda:0', dtype=torch.int32) -tensor([799], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([799], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[5764]], device='cuda:0', dtype=torch.int32) -tensor([800], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([800], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - that indicate closed-********** -tensor([[29899]], device='cuda:0', dtype=torch.int32) -tensor([801], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([801], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[2696]], device='cuda:0', dtype=torch.int32) -tensor([802], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([802], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[470]], device='cuda:0', dtype=torch.int32) -tensor([803], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([803], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[22078]], device='cuda:0', dtype=torch.int32) -tensor([804], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([804], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -off or restricted areas********** -tensor([[10161]], device='cuda:0', dtype=torch.int32) -tensor([805], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([805], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29892]], device='cuda:0', dtype=torch.int32) -tensor([806], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([806], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[322]], device='cuda:0', dtype=torch.int32) -tensor([807], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([807], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1101]], device='cuda:0', dtype=torch.int32) -tensor([808], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([808], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -, and follow the********** -tensor([[278]], device='cuda:0', dtype=torch.int32) -tensor([809], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([809], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1410]], device='cuda:0', dtype=torch.int32) -tensor([810], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([810], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[10652]], device='cuda:0', dtype=torch.int32) -tensor([811], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([811], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1475]], device='cuda:0', dtype=torch.int32) -tensor([812], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([812], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - guidelines provided********** -tensor([[4944]], device='cuda:0', dtype=torch.int32) -tensor([813], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([813], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[491]], device='cuda:0', dtype=torch.int32) -tensor([814], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([814], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[278]], device='cuda:0', dtype=torch.int32) -tensor([815], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([815], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1887]], device='cuda:0', dtype=torch.int32) -tensor([816], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([816], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - by the local authorities********** -tensor([[21142]], device='cuda:0', dtype=torch.int32) -tensor([817], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([817], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[470]], device='cuda:0', dtype=torch.int32) -tensor([818], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([818], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[8775]], device='cuda:0', dtype=torch.int32) -tensor([819], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([819], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[19264]], device='cuda:0', dtype=torch.int32) -tensor([820], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([820], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - or wildlife exper********** -tensor([[2902]], device='cuda:0', dtype=torch.int32) -tensor([821], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([821], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[1372]], device='cuda:0', dtype=torch.int32) -tensor([822], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([822], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29889]], device='cuda:0', dtype=torch.int32) -tensor([823], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([823], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[13]], device='cuda:0', dtype=torch.int32) -tensor([824], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([824], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -ts. - -********** -tensor([[13]], device='cuda:0', dtype=torch.int32) -tensor([825], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([825], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29946]], device='cuda:0', dtype=torch.int32) -tensor([826], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([826], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[29889]], device='cuda:0', dtype=torch.int32) -tensor([827], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([827], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -********** -tensor([[19040]], device='cuda:0', dtype=torch.int32) -tensor([828], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([828], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 -4. Photography2024-09-23:20:10:15,721 INFO [generate.py:1047] -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Generated 199 tokens -Time for inference 1: 21.2711 sec total -Time to first token: 1.2352 sec with parallel prefill. - - Total throughput: 9.4024 tokens/sec, 0.1064 s/token -First token throughput: 0.8096 tokens/sec, 1.2352 s/token - Next token throughput: 9.9322 tokens/sec, 0.1007 s/token -2024-09-23:20:10:15,721 INFO [generate.py:1058] -Bandwidth achieved: 265.65 GB/s -2024-09-23:20:10:15,721 INFO [generate.py:1062] *** This first iteration will include cold start effects for dynamic import, hardware caches. *** -********** -tensor([[2]], device='cuda:0', dtype=torch.int32) -tensor([829], device='cuda:0', dtype=torch.int32) -input_pos torch.int32 torch.Size([1]) -tensor([829], device='cuda:0', dtype=torch.int32) -causal_mask: torch.bool torch.Size([880, 880]) -tensor([[ True, False, False, ..., False, False, False], - [ True, True, False, ..., False, False, False], - [ True, True, True, ..., False, False, False], - ..., - [ True, True, True, ..., True, False, False], - [ True, True, True, ..., True, True, False], - [ True, True, True, ..., True, True, True]], device='cuda:0') -before tok_embedding torch.float32 -after tok_embedding torch.float32 -before entering layer 0 tok_embedding torch.float32 -before entering layer 1 tok_embedding torch.float32 -before entering layer 2 tok_embedding torch.float32 -before entering layer 3 tok_embedding torch.float32 -before entering layer 4 tok_embedding torch.float32 -before entering layer 5 tok_embedding torch.float32 -before entering layer 6 tok_embedding torch.float32 -before entering layer 7 tok_embedding torch.float32 -before entering layer 8 tok_embedding torch.float32 -before entering layer 9 tok_embedding torch.float32 -before entering layer 10 tok_embedding torch.float32 -before entering layer 11 tok_embedding torch.float32 -before entering layer 12 tok_embedding torch.float32 -before entering layer 13 tok_embedding torch.float32 -before entering layer 14 tok_embedding torch.float32 -before entering layer 15 tok_embedding torch.float32 -before entering layer 16 tok_embedding torch.float32 -before entering layer 17 tok_embedding torch.float32 -before entering layer 18 tok_embedding torch.float32 -before entering layer 19 tok_embedding torch.float32 -before entering layer 20 tok_embedding torch.float32 -before entering layer 21 tok_embedding torch.float32 -before entering layer 22 tok_embedding torch.float32 -before entering layer 23 tok_embedding torch.float32 -before entering layer 24 tok_embedding torch.float32 -before entering layer 25 tok_embedding torch.float32 -before entering layer 26 tok_embedding torch.float32 -before entering layer 27 tok_embedding torch.float32 -before entering layer 28 tok_embedding torch.float32 -before entering layer 29 tok_embedding torch.float32 -before entering layer 30 tok_embedding torch.float32 -before entering layer 31 tok_embedding torch.float32 - -======================================== - - - Average tokens/sec (total): 9.40 -Average tokens/sec (first token): 0.81 -Average tokens/sec (next tokens): 9.93 - -Memory used: 35.51 GB From 8ecc2fab7d28ab079cefbee6a99d7f07df83dd90 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Mon, 23 Sep 2024 20:34:01 -0700 Subject: [PATCH 6/8] remove extra debug code --- freq_compare.py | 126 ------------------------------------------------ 1 file changed, 126 deletions(-) delete mode 100644 freq_compare.py diff --git a/freq_compare.py b/freq_compare.py deleted file mode 100644 index 13ecbdd2f..000000000 --- a/freq_compare.py +++ /dev/null @@ -1,126 +0,0 @@ -import torch -from typing import Any, Dict, Optional, Tuple -from torchchat.utils.build_utils import find_multiple, get_precision - -# Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L77 -def hf_precompute_freqs_cis(dim: int, end: int, theta: float): - freqs = 1.0 / ( - theta - ** (torch.arange(0, dim, 2, device="cpu", dtype=torch.int64).float() / dim) - ) - # pyre-ignore Undefined attribute [16]: `float` has no attribute `device`. - t = torch.arange(end, device=freqs.device, dtype=torch.int64).type_as( - freqs # pyre-ignore - ) - freqs = torch.outer(t, freqs).float() # pyre-ignore - emb = torch.cat((freqs, freqs), dim=-1) - freqs_cos = torch.cos(emb) - freqs_sin = torch.sin(emb) - return freqs_cos, freqs_sin - - -def precompute_freqs_cis( - n_elem: int, - seq_len: int, - base: int = 10000, - dtype=None, - rope_scaling: Optional[Dict[str, Any]] = None, -): - if not dtype: - dtype = get_precision() - freqs = 1.0 / ( - base ** (torch.arange(0, n_elem, 2)[: (n_elem // 2)].float() / n_elem) - ) - t = torch.arange(seq_len, device=freqs.device) - if rope_scaling is not None: - freqs = apply_scaling(freqs, rope_scaling) - freqs = torch.outer(t, freqs) - freqs_cis = torch.polar(torch.ones_like(freqs), freqs) - cache = torch.stack([freqs_cis.real, freqs_cis.imag], dim=-1) - return cache.to(dtype=dtype) - -# Based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/modeling_llama.py#L135 -def rotate_half(x): - """Rotates half the hidden dims of the input.""" - x1 = x[..., : x.shape[-1] // 2] - x2 = x[..., x.shape[-1] // 2 :] - return torch.cat((-x2, x1), dim=-1) - - -def hf_apply_rotary_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1): - """Applies Rotary Position Embedding to the query and key tensors. - - Args: - q (`torch.Tensor`): The query tensor. - k (`torch.Tensor`): The key tensor. - cos (`torch.Tensor`): The cosine part of the rotary embedding. - sin (`torch.Tensor`): The sine part of the rotary embedding. - position_ids (`torch.Tensor`, *optional*): - Deprecated and unused. - unsqueeze_dim (`int`, *optional*, defaults to 1): - The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and - sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note - that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and - k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes - cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have - the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. - Returns: - `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. - """ - cos = cos.unsqueeze(unsqueeze_dim) - sin = sin.unsqueeze(unsqueeze_dim) - q_embed = (q * cos) + (rotate_half(q) * sin) - k_embed = (k * cos) + (rotate_half(k) * sin) - return q_embed, k_embed - -def apply_rotary_emb(x, freqs_cis): - xshaped = x.float().reshape(*x.shape[:-1], -1, 2) - freqs_cis = freqs_cis.view(1, xshaped.size(1), 1, xshaped.size(3), 2) - x_out2 = torch.stack( - [ - xshaped[..., 0] * freqs_cis[..., 0] - xshaped[..., 1] * freqs_cis[..., 1], - xshaped[..., 1] * freqs_cis[..., 0] + xshaped[..., 0] * freqs_cis[..., 1], - ], - -1, - ) - - x_out2 = x_out2.flatten(3) - return x_out2.type_as(x) - - -# 比较函数 -def compare_methods(): - torch.manual_seed(0) - x = torch.randn(1, 636, 32, 128) - - # 设置参数 - n_elem = 128 - seq_len = 1536 - base = 10000 - dtype = None - rope_scaling = None - - all_freq_cis = precompute_freqs_cis(n_elem, seq_len, base, dtype, rope_scaling) - input_pos = torch.arange( - x.shape[1], - device=x.device, - dtype=torch.int, - ) - freq_cis = all_freq_cis[input_pos] - x_out1 = apply_rotary_emb(x, freq_cis) - - - dim = 128 - end = 1536 - theta = 10000.0 - freqs_cos, freqs_sin = hf_precompute_freqs_cis(dim, end, theta) - fc, fs = freqs_cos[:x.shape[1]], freqs_sin[:x.shape[1]] - x_out2, _ = hf_apply_rotary_emb(x, x, fc, fs) - - print(x_out1) - print("************************") - print(x_out2) - - -if __name__ == "__main__": - compare_methods() From a70d7b51b54f1b0015ee9bc0e8508636f7129e55 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Mon, 23 Sep 2024 20:36:38 -0700 Subject: [PATCH 7/8] remove extra print --- torchchat/cli/convert_hf_checkpoint.py | 2 -- torchchat/cli/download.py | 6 ------ 2 files changed, 8 deletions(-) diff --git a/torchchat/cli/convert_hf_checkpoint.py b/torchchat/cli/convert_hf_checkpoint.py index c15b3ce82..7001eb1e1 100644 --- a/torchchat/cli/convert_hf_checkpoint.py +++ b/torchchat/cli/convert_hf_checkpoint.py @@ -309,8 +309,6 @@ def convert_hf_checkpoint( model_name: Optional[str] = None, remove_bin_files: bool = False, ): - print(model_name) - print("***********************") if "llava" in model_name: convert_llava_checkpoint(model_dir=model_dir) else: diff --git a/torchchat/cli/download.py b/torchchat/cli/download.py index eb96e99b4..69cfbf21c 100644 --- a/torchchat/cli/download.py +++ b/torchchat/cli/download.py @@ -80,16 +80,10 @@ def download_and_convert( # allows recovery in the event that the download or conversion # fails unexpectedly. temp_dir = models_dir / "downloads" / model_config.name - # temp_dir = Path("/home/gasoonjia") / "downloads" / model_config.name - if os.path.isdir(temp_dir): shutil.rmtree(temp_dir) os.makedirs(temp_dir, exist_ok=True) - print("**************************************************") - print("**************************************************") - print("temp dir: ", temp_dir) - try: if ( model_config.distribution_channel From 937e7ed49ec308941f5ef060c327989000b9cbd7 Mon Sep 17 00:00:00 2001 From: gasoonjia Date: Mon, 23 Sep 2024 20:50:38 -0700 Subject: [PATCH 8/8] remove input pos keyword in generate.py --- torchchat/cli/download.py | 2 +- torchchat/generate.py | 4 +- torchchat/model.py | 470 +------------------------------------- 3 files changed, 4 insertions(+), 472 deletions(-) diff --git a/torchchat/cli/download.py b/torchchat/cli/download.py index 69cfbf21c..8d7ab79c3 100644 --- a/torchchat/cli/download.py +++ b/torchchat/cli/download.py @@ -30,7 +30,7 @@ def _download_hf_snapshot( try: snapshot_download( model_config.distribution_path, - cache_dir=artifact_dir, + local_dir=artifact_dir, local_dir_use_symlinks=False, token=hf_token, ignore_patterns=None if "llava" in model_config.name else "*safetensors*", diff --git a/torchchat/generate.py b/torchchat/generate.py index 52f1e45fa..be1cab606 100644 --- a/torchchat/generate.py +++ b/torchchat/generate.py @@ -374,7 +374,7 @@ def prefill( logits = model(x) else: # input_pos: [B, S] - logits = model(x, input_pos=input_pos) + logits = model(x, input_pos) # print(f"logits {logits.shape}") # print(f"x: {x},\n input_pos: {input_pos}\n") @@ -398,7 +398,7 @@ def decode_one_token( else: logits = model(x) else: - logits = model(x, input_pos=input_pos) + logits = model(x, input_pos) # print(f"x: {x},\n input_pos: {input_pos}\n") return self.sample(logits, need_probs=need_probs, **sampling_kwargs) diff --git a/torchchat/model.py b/torchchat/model.py index b23084d90..e14b0d2b8 100644 --- a/torchchat/model.py +++ b/torchchat/model.py @@ -562,10 +562,9 @@ def __init__(self, config: ModelArgs) -> None: def forward( self, tokens: Tensor, - *, + input_pos: Optional[Tensor] = None, encoder_input: Optional[Dict[str, Tensor]] = None, post_tokens: Optional[Tensor] = None, - input_pos: Optional[Tensor] = None, ) -> Tensor: return self.model(tokens, encoder_input=encoder_input, post_tokens=post_tokens, input_pos=input_pos) @@ -1032,470 +1031,3 @@ def setup_caches(self, max_batch_size, max_seq_length): except: pass - - -from torchvision import transforms as tvT - -def llava_image_preprocess( - # img_address: str, - target_h: int, - target_w: int, - rescale_factor: float, - image_mean: List[float], - image_std: List[float], - ) -> torch.Tensor: - """ - Preprocess an image by resizing it to fit a target height and width, - padding with median RGB value to make a square, scaling, and normalizing. - - Args: - img_address (str): Address of the local image file will be forwarded to the model. - target_h (int): Target height. - target_w (int): Target width. - rescale_factor (float): Rescaling factor. - image_mean (list): Mean values for normalization. - image_std (list): Standard deviation values for normalization. - - Returns: - torch.Tensor: Preprocessed image tensor. - - Raises: - FileNotFoundError: If the image file does not exist. - ValueError: If the target height or width is not positive. - """ - - # # Check if the image file exists - # if not os.path.exists(img_address): - # raise FileNotFoundError("Image file not found") - - # Check if the target height and width are positive - if target_h <= 0 or target_w <= 0: - raise ValueError("Target height and width must be positive") - - # Load the image from the given address - image = Image.open( - requests.get( - "https://llava-vl.github.io/static/images/view.jpg", stream=True - ).raw) - # Convert the image to a tensor - img = tvT.functional.pil_to_tensor(image) - - # Calculate the height and width ratios - ratio_h = img.shape[1] / target_h - ratio_w = img.shape[2] / target_w - - # Resize the image to fit in a target_h x target_w canvas - ratio = max(ratio_h, ratio_w) - output_size = (int(img.shape[1] / ratio), int(img.shape[2] / ratio)) - img = tvT.Resize(size=output_size)(img) - - # Pad the image with median RGB value to make a square - l_pad = (target_w - img.shape[2]) // 2 - t_pad = (target_h - img.shape[1]) // 2 - r_pad = -((target_w - img.shape[2]) // -2) - b_pad = -((target_h - img.shape[1]) // -2) - - torch._check(l_pad >= 0) - torch._check(t_pad >= 0) - torch._check(r_pad >= 0) - torch._check(b_pad >= 0) - - # Pad the image - resized = torch.nn.functional.pad( - img, - (l_pad, r_pad, t_pad, b_pad), - ) - - # Scale the image - scaled = resized * rescale_factor - - # Normalize the image - normed = tvT.Normalize(image_mean, image_std)(scaled) - - return normed.unsqueeze(0) - - - - - -if __name__ == "__main__": - import re - from PIL import Image - import requests - - def prepare_image(target_h: int, target_w: int) -> torch.Tensor: - """Read image into a tensor and resize the image so that it fits in - a target_h x target_w canvas. - - Args: - image (Image): An Image object. - target_h (int): Target height. - target_w (int): Target width. - - Returns: - torch.Tensor: resized image tensor. - """ - image = Image.open( - requests.get( - "https://llava-vl.github.io/static/images/view.jpg", stream=True - ).raw) - - img = torchvision.transforms.functional.pil_to_tensor(image) - # height ratio - ratio_h = img.shape[1] / target_h - # width ratio - ratio_w = img.shape[2] / target_w - # resize the image so that it fits in a target_h x target_w canvas - ratio = max(ratio_h, ratio_w) - output_size = (int(img.shape[1] / ratio), int(img.shape[2] / ratio)) - img = torchvision.transforms.Resize(size=output_size)(img) - return img - - - def image_preprocess(img: torch.Tensor, target_h: int, target_w: int, rescale_factor, image_mean, image_std) -> torch.Tensor: - # pad the image with median rgb value, to make a square - l_pad = (target_w - img.shape[2]) // 2 - t_pad = (target_h - img.shape[1]) // 2 - # ceil division - r_pad = -((target_w - img.shape[2]) // -2) - b_pad = -((target_h - img.shape[1]) // -2) - - torch._check(l_pad >= 0) - torch._check(t_pad >= 0) - torch._check(r_pad >= 0) - torch._check(b_pad >= 0) - - # This is different from the original implementation, due to export limitations. - resized = torch.nn.functional.pad( - img, - (l_pad, r_pad, t_pad, b_pad), - ) - - scaled = resized * rescale_factor - from torchvision.transforms.v2 import functional as tvF - normed = tvF.normalize( - scaled, image_mean, image_std - ) - return normed.unsqueeze(0) - - - # def checkpoint_remap(llava_model, llava_ckpt): - # def _translate_state_dict_for_vision_model(hf_state_dict) -> Dict[str, Any]: - # translated_state_dict = {} - - # # Define the mapping from old names to new names - # hf_weight_prefix = "vision_model." - # name_mapping = { - # f"{hf_weight_prefix}embeddings.class_embedding": "cls_token_embedding.weight", - # f"{hf_weight_prefix}embeddings.position_embedding.weight": "token_pos_embedding.positional_embedding", - # f"{hf_weight_prefix}embeddings.patch_embedding.weight": "conv.weight", - # f"{hf_weight_prefix}pre_layrnorm.weight": "ln_pre.weight", - # f"{hf_weight_prefix}pre_layrnorm.bias": "ln_pre.bias", - # f"{hf_weight_prefix}post_layernorm.weight": "ln_post.weight", - # f"{hf_weight_prefix}post_layernorm.bias": "ln_post.bias", - # } - - # # Use regular expressions to define the mapping for each layer - # patterns = [ - # ( - # rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.(k|q|v)_proj\.(weight|bias)", - # lambda match: f"layers.{match.group(1)}.attn.{match.group(2)}_proj.{match.group(3)}", - # ), - # ( - # rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.out_proj\.(weight|bias)", - # lambda match: f"layers.{match.group(1)}.attn.output_proj.{match.group(2)}", - # ), - # ( - # rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.mlp\.fc(1|2)\.(weight|bias)", - # lambda match: f"layers.{match.group(1)}.mlp.w{match.group(2)}.{match.group(3)}", - # ), - # ( - # rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm1\.(weight|bias)", - # lambda match: f"layers.{match.group(1)}.sa_norm.{match.group(2)}", - # ), - # ( - # rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm2\.(weight|bias)", - # lambda match: f"layers.{match.group(1)}.mlp_norm.{match.group(2)}", - # ), - # ] - - # # Apply the patterns to update the name mapping - # for pattern, replacement in patterns: - # for key in list(hf_state_dict.keys()): - # if re.match(pattern, key): - # new_key = re.sub(pattern, replacement, key) - # name_mapping[key] = new_key - - # # Process the combined self-attention weights and biases - # temp_state_dict = {} - # for k, v in hf_state_dict.items(): - # new_k = name_mapping[k] - # if "in_proj_weight" in new_k or "in_proj_bias" in new_k: - # if new_k not in temp_state_dict: - # temp_state_dict[new_k] = {"q": None, "k": None, "v": None} - # if "q_proj" in k: - # temp_state_dict[new_k]["q"] = v - # elif "k_proj" in k: - # temp_state_dict[new_k]["k"] = v - # elif "v_proj" in k: - # temp_state_dict[new_k]["v"] = v - # else: - # temp_state_dict[new_k] = v - - # # Final processing of the combined self-attention weights and biases - # for k, v in temp_state_dict.items(): - # if isinstance(v, dict): - # translated_state_dict[k] = torch.cat([v["q"], v["k"], v["v"]], dim=0) - # else: - # translated_state_dict[k] = v - - # return translated_state_dict - - # new_state_dict = {} - # for k, v in state_dict.items(): - # if k.startswith("model.model."): - # new_state_dict[k.replace("model.model.", "")] = v - # elif k.startswith("model."): - # new_state_dict[k.replace("model.", "")] = v - # else: - # new_state_dict[k] = v - # return new_state_dict - - # def _translate_state_dict_for_text_model(hf_state_dict) -> Dict[str, Any]: - # key_map = { - # # fmt: off - # r"model.layers.([0-9]+).self_attn.q_proj.": r"layers.\1.attention.wq.", - # r"model.layers.([0-9]+).self_attn.k_proj.": r"layers.\1.attention.wk.", - # r"model.layers.([0-9]+).self_attn.v_proj.": r"layers.\1.attention.wv.", - # r"model.layers.([0-9]+).self_attn.o_proj.": r"layers.\1.attention.wo.", - # r"model.layers.([0-9]+).input_layernorm.": r"layers.\1.attention_norm.", - # r"model.layers.([0-9]+).mlp.gate_proj.": r"layers.\1.feed_forward.w1.", - # r"model.layers.([0-9]+).mlp.down_proj.": r"layers.\1.feed_forward.w2.", - # r"model.layers.([0-9]+).mlp.up_proj.": r"layers.\1.feed_forward.w3.", - # r"model.layers.([0-9]+).post_attention_layernorm.": r"layers.\1.ffn_norm.", - # r"model.norm.": r"norm.", - # # r"model.embed_tokens.": r"tok_embeddings.", # load separately - # r"lm_head.": r"output.", - # # fmt: on - # } - - # new_state_dict = {} - - # def get_new_key(old_key: str) -> str: - # for old_pattern, replacement in key_map.items(): - # if (new_key := re.sub(old_pattern, replacement, old_key)) != old_key: - # return new_key - - # return old_key - - # # Convert module keys from hf transformer to Llama transformer. - # for old_key in hf_state_dict.keys(): - # new_key = get_new_key(old_key) - - # new_state_dict[new_key] = hf_state_dict[old_key] - - # return new_state_dict - - # def split_checkpoint(llava_ckpt): - # from collections import OrderedDict - # language_model_ckpt = OrderedDict() - # multi_modal_ckpt = OrderedDict() - # vision_tower_ckpt = OrderedDict() - # for key, value in llava_ckpt.items(): - # if key.startswith("language_model"): - # language_model_ckpt[key[len("language_model") + 1:]] = value - # elif key.startswith("multi_modal_projector"): - # multi_modal_ckpt[key[len("multi_modal_projector") + 1:]] = value - # elif key.startswith("vision_tower"): - # vision_tower_ckpt[key[len("vision_tower") + 1:]] = value - # return language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt - - # llava_model = llava_model.model - - # language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt = split_checkpoint(llava_ckpt) - - # llava_model.tok_embeddings.load_state_dict({"weight": language_model_ckpt.pop("model.embed_tokens.weight")}) - - # llava_model.encoder.load_state_dict(state_dict=_translate_state_dict_for_vision_model(vision_tower_ckpt), - # strict=True, - # assign=True, - # ) - - # llava_model.decoder.load_state_dict(state_dict=_translate_state_dict_for_text_model(language_model_ckpt), - # strict=True, - # assign=True, - # ) - - # llava_model.mm_projector.load_state_dict(state_dict=multi_modal_ckpt, - # strict=True, - # assign=True, - # ) - - def remap_llava_checkpoint(llava_ckpt): - def _translate_state_dict_for_vision_model(hf_state_dict) -> Dict[str, Any]: - translated_state_dict = {} - hf_weight_prefix = "vision_model." - name_mapping = { - f"{hf_weight_prefix}embeddings.class_embedding": "model.encoder.cls_token_embedding.weight", - f"{hf_weight_prefix}embeddings.position_embedding.weight": "model.encoder.token_pos_embedding.positional_embedding", - f"{hf_weight_prefix}embeddings.patch_embedding.weight": "model.encoder.conv.weight", - f"{hf_weight_prefix}pre_layrnorm.weight": "model.encoder.ln_pre.weight", - f"{hf_weight_prefix}pre_layrnorm.bias": "model.encoder.ln_pre.bias", - f"{hf_weight_prefix}post_layernorm.weight": "model.encoder.ln_post.weight", - f"{hf_weight_prefix}post_layernorm.bias": "model.encoder.ln_post.bias", - } - patterns = [ - ( - rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.(k|q|v)_proj\.(weight|bias)", - lambda match: f"model.encoder.layers.{match.group(1)}.attn.{match.group(2)}_proj.{match.group(3)}", - ), - ( - rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.self_attn\.out_proj\.(weight|bias)", - lambda match: f"model.encoder.layers.{match.group(1)}.attn.output_proj.{match.group(2)}", - ), - ( - rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.mlp\.fc(1|2)\.(weight|bias)", - lambda match: f"model.encoder.layers.{match.group(1)}.mlp.w{match.group(2)}.{match.group(3)}", - ), - ( - rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm1\.(weight|bias)", - lambda match: f"model.encoder.layers.{match.group(1)}.sa_norm.{match.group(2)}", - ), - ( - rf"{hf_weight_prefix}encoder\.layers\.([0-9]+)\.layer_norm2\.(weight|bias)", - lambda match: f"model.encoder.layers.{match.group(1)}.mlp_norm.{match.group(2)}", - ), - ] - for pattern, replacement in patterns: - for key in list(hf_state_dict.keys()): - if re.match(pattern, key): - new_key = re.sub(pattern, replacement, key) - name_mapping[key] = new_key - temp_state_dict = {} - for k, v in hf_state_dict.items(): - new_k = name_mapping.get(k, k) - if "in_proj_weight" in new_k or "in_proj_bias" in new_k: - if new_k not in temp_state_dict: - temp_state_dict[new_k] = {"q": None, "k": None, "v": None} - if "q_proj" in k: - temp_state_dict[new_k]["q"] = v - elif "k_proj" in k: - temp_state_dict[new_k]["k"] = v - elif "v_proj" in k: - temp_state_dict[new_k]["v"] = v - else: - temp_state_dict[new_k] = v - for k, v in temp_state_dict.items(): - if isinstance(v, dict): - translated_state_dict[k] = torch.cat([v["q"], v["k"], v["v"]], dim=0) - else: - translated_state_dict[k] = v - return translated_state_dict - - def _translate_state_dict_for_text_model(hf_state_dict) -> Dict[str, Any]: - key_map = { - r"model.layers.([0-9]+).self_attn.q_proj.": r"model.decoder.layers.\1.attention.wq.", - r"model.layers.([0-9]+).self_attn.k_proj.": r"model.decoder.layers.\1.attention.wk.", - r"model.layers.([0-9]+).self_attn.v_proj.": r"model.decoder.layers.\1.attention.wv.", - r"model.layers.([0-9]+).self_attn.o_proj.": r"model.decoder.layers.\1.attention.wo.", - r"model.layers.([0-9]+).input_layernorm.": r"model.decoder.layers.\1.attention_norm.", - r"model.layers.([0-9]+).mlp.gate_proj.": r"model.decoder.layers.\1.feed_forward.w1.", - r"model.layers.([0-9]+).mlp.down_proj.": r"model.decoder.layers.\1.feed_forward.w2.", - r"model.layers.([0-9]+).mlp.up_proj.": r"model.decoder.layers.\1.feed_forward.w3.", - r"model.layers.([0-9]+).post_attention_layernorm.": r"model.decoder.layers.\1.ffn_norm.", - r"model.norm.": r"model.decoder.norm.", - # r"model.embed_tokens.": r"tok_embeddings.", # load separately - r"lm_head.": r"model.decoder.output.", - } - new_state_dict = {} - def get_new_key(old_key: str) -> str: - for old_pattern, replacement in key_map.items(): - if (new_key := re.sub(old_pattern, replacement, old_key)) != old_key: - return new_key - return old_key - for old_key in hf_state_dict.keys(): - new_key = get_new_key(old_key) - new_state_dict[new_key] = hf_state_dict[old_key] - return new_state_dict - - def _translate_state_dict_for_mm_projector_model(hf_state_dict) -> Dict[str, Any]: - new_state_dict = {} - for old_key in hf_state_dict.keys(): - new_key = "model.mm_projector." + old_key - new_state_dict[new_key] = hf_state_dict[old_key] - return new_state_dict - - def split_checkpoint(llava_ckpt): - language_model_ckpt = {} - multi_modal_ckpt = {} - vision_tower_ckpt = {} - for key, value in llava_ckpt.items(): - if key.startswith("language_model"): - language_model_ckpt[key[len("language_model") + 1:]] = value - elif key.startswith("multi_modal_projector"): - multi_modal_ckpt[key[len("multi_modal_projector") + 1:]] = value - elif key.startswith("vision_tower"): - vision_tower_ckpt[key[len("vision_tower") + 1:]] = value - return language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt - language_model_ckpt, multi_modal_ckpt, vision_tower_ckpt = split_checkpoint(llava_ckpt) - remapped_state_dict = { - "model.tok_embeddings.weight": language_model_ckpt.pop("model.embed_tokens.weight"), - } - remapped_state_dict.update(_translate_state_dict_for_text_model(language_model_ckpt)) - remapped_state_dict.update(_translate_state_dict_for_vision_model(vision_tower_ckpt)) - remapped_state_dict.update(_translate_state_dict_for_mm_projector_model(multi_modal_ckpt)) - return remapped_state_dict - - with torch.device("cuda"): - print("Preparing input") - pre_tokens = torch.tensor([[ 1, 319, 13563, 1546, 263, 12758, 5199, 322, 385, 23116, - 21082, 20255, 29889, 450, 20255, 4076, 8444, 29892, 13173, 29892, - 322, 1248, 568, 6089, 304, 278, 5199, 29915, 29879, 5155, - 29889, 3148, 1001, 29901, 29871]]) - # img = prepare_image(336, 336) - post_tokens = torch.tensor([[29871, 13, 462, 9651, 1724, 526, 278, 2712, 306, 881, - 367, 274, 1300, 2738, 1048, 746, 306, 6493, 1244, 29973, - 319, 1799, 9047, 13566, 29901]]) - img = llava_image_preprocess(target_h=336, target_w=336, image_mean=[0.48145466, 0.4578275, 0.40821073], image_std=[0.26862954, 0.26130258, 0.27577711], rescale_factor=0.00392156862745098) - print(img) - - print("Done, Now creating model...") - llava_model = Model.from_params("/home/gasoonjia/torchchat/torchchat/model_params/llava-1.5.json") - - llava_model = llava_model.eval() - - print("Done. Now loading checkpoint...") - llava_ckpt = torch.load("/home/gasoonjia/executorch/examples/models/llava/llava_checkpoint.pth", map_location="cuda") - - print("Done. Now checkpoint remapping...") - remapped_state_dict = remap_llava_checkpoint(llava_ckpt) - llava_model.load_state_dict(remapped_state_dict, strict=True) - - print("Done. Now setup caches...") - - llava_model.setup_caches(1, 768) - - print("Done. Now running prefilling inference...") - # being tested, using llama_transformer - context_len, prefill_logits = llava_model(tokens=pre_tokens, encoder_input=img, post_tokens=post_tokens) - print("prefill_logits: ") - print(prefill_logits[0, -1].shape) - print(prefill_logits[0, -1]) - print("context_len: \n", context_len) - # Always generate one token at a time. - new_tokens = [torch.argmax(prefill_logits[0, -1], dim=-1).item()] - print(new_tokens) - print(prefill_logits.shape) - print("Done. Now running generation inference...") - for i in range(10): - logits = llava_model( - torch.tensor([new_tokens[i]]), input_pos=torch.tensor([context_len + i]) - ) - print(f"{i}-th logits: ") - print(logits) - - print(f"{i}-th logits.shape: ") - print(logits.shape) - new_tokens.append(torch.argmax(logits[-1, :]).item()) - - print("Done. The output is:", new_tokens)