Skip to content

Commit 196d52d

Browse files
quic-akuruvilAnn Kuruvilla
authored andcommitted
Reference values are updated
Signed-off-by: Ann Kuruvilla <[email protected]>
1 parent 924bf4f commit 196d52d

File tree

2 files changed

+168
-168
lines changed

2 files changed

+168
-168
lines changed

tests/finetune/reference_data.py

Lines changed: 167 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -13,206 +13,206 @@
1313
"llama_3.2_1B_config_alpaca_single_device": {
1414
"description": "Baseline for Llama on Alpaca single-device",
1515
"train_step_losses": [
16-
1.5112206935882568,
17-
1.2211230993270874,
18-
1.9942185878753662,
19-
2.093623161315918,
20-
0.9168124198913574,
21-
1.2125635147094727,
22-
0.3648962676525116,
23-
1.6231939792633057,
24-
0.8259601593017578,
25-
0.7741442918777466,
26-
1.7359141111373901,
27-
2.118462085723877,
28-
2.061161994934082,
29-
0.8256913423538208,
30-
0.8088029623031616,
31-
1.761340618133545,
32-
1.6828027963638306,
33-
1.3538823127746582,
34-
2.0672550201416016,
35-
3.1532647609710693,
16+
1.5110896825790405,
17+
1.2206485271453857,
18+
1.9950776100158691,
19+
2.091615676879883,
20+
0.9182446599006653,
21+
1.1993569135665894,
22+
0.36413607001304626,
23+
1.6241482496261597,
24+
0.8270177245140076,
25+
0.7749958634376526,
26+
1.73696768283844,
27+
2.120077610015869,
28+
2.061460256576538,
29+
0.8267984390258789,
30+
0.8105809688568115,
31+
1.7627557516098022,
32+
1.6819559335708618,
33+
1.3528242111206055,
34+
2.0654125213623047,
35+
3.156151294708252,
3636
],
3737
"eval_step_losses": [
38-
1.462059736251831,
39-
0.24527676403522491,
40-
1.046107292175293,
41-
1.6403586864471436,
42-
1.395291805267334,
43-
2.8664817810058594,
44-
1.035412311553955,
45-
1.8670039176940918,
46-
3.8079662322998047,
47-
0.6516809463500977,
38+
1.4607517719268799,
39+
0.24302150309085846,
40+
1.0471211671829224,
41+
1.642044186592102,
42+
1.3949533700942993,
43+
2.8850066661834717,
44+
1.0366586446762085,
45+
1.8661959171295166,
46+
3.81632924079895,
47+
0.6577113270759583,
4848
],
4949
"train_step_metrics": [
50-
4.532259941101074,
51-
3.390994071960449,
52-
7.34645938873291,
53-
8.114261627197266,
54-
2.5013046264648438,
55-
3.3620924949645996,
56-
1.4403645992279053,
57-
5.069255828857422,
58-
2.2840728759765625,
59-
2.1687355041503906,
60-
5.674112319946289,
61-
8.318334579467773,
62-
7.855090141296387,
63-
2.283458948135376,
64-
2.2452187538146973,
65-
5.820234775543213,
66-
5.380615711212158,
67-
3.872429847717285,
68-
7.903097629547119,
69-
23.412376403808594,
50+
4.531666278839111,
51+
3.389385223388672,
52+
7.352773189544678,
53+
8.09798812866211,
54+
2.504889488220215,
55+
3.3179824352264404,
56+
1.43927001953125,
57+
5.074095249176025,
58+
2.286489486694336,
59+
2.1705832481384277,
60+
5.680093288421631,
61+
8.33178424835205,
62+
7.857433319091797,
63+
2.2859883308410645,
64+
2.2492144107818604,
65+
5.828476905822754,
66+
5.376060962677002,
67+
3.8683345317840576,
68+
7.8885498046875,
69+
23.480052947998047,
7070
],
7171
"eval_step_metrics": [ # steps 0-9
72-
4.31483793258667,
73-
1.2779749631881714,
74-
2.8465487957000732,
75-
5.157018661499023,
76-
4.036152362823486,
77-
17.575077056884766,
78-
2.816267251968384,
79-
6.468885898590088,
80-
45.05870819091797,
81-
1.9187631607055664,
72+
4.309197902679443,
73+
1.27509605884552,
74+
2.8494362831115723,
75+
5.1657185554504395,
76+
4.034786224365234,
77+
17.9036865234375,
78+
2.819779396057129,
79+
6.463661193847656,
80+
45.437110900878906,
81+
1.9303690195083618,
8282
],
8383
},
8484
# Scenario 2: Single-device llama 3.2-1B training on GSM8k dataset.
8585
"llama_3.2_1B_config_gsm8k_single_device": {
8686
"description": "Baseline for Llama on GSM8k single-device",
8787
"train_step_losses": [
88-
2.250276803970337,
89-
2.3231687545776367,
90-
1.9379945993423462,
91-
1.5981022119522095,
92-
1.9867562055587769,
93-
1.4573354721069336,
94-
1.8969658613204956,
95-
1.2177824974060059,
96-
1.6489791870117188,
97-
1.5380687713623047,
98-
1.4025083780288696,
99-
1.5301083326339722,
100-
1.6858205795288086,
101-
1.383747935295105,
102-
1.7968919277191162,
103-
1.4075607061386108,
104-
1.6447738409042358,
105-
1.2807793617248535,
106-
0.8450672030448914,
107-
1.5795941352844238,
88+
2.250361204147339,
89+
2.3252110481262207,
90+
1.9360781908035278,
91+
1.5984115600585938,
92+
1.9874038696289062,
93+
1.4579044580459595,
94+
1.8975679874420166,
95+
1.2175723314285278,
96+
1.6473736763000488,
97+
1.537960410118103,
98+
1.4019465446472168,
99+
1.5310447216033936,
100+
1.6878201961517334,
101+
1.3849903345108032,
102+
1.7976438999176025,
103+
1.4060133695602417,
104+
1.646375060081482,
105+
1.2835280895233154,
106+
0.8465587496757507,
107+
1.5783095359802246,
108108
],
109109
"eval_step_losses": [
110-
1.7081595659255981,
111-
1.719305157661438,
112-
1.153528094291687,
113-
2.0051634311676025,
114-
1.3372926712036133,
115-
1.3009852170944214,
116-
1.2207027673721313,
117-
1.3452664613723755,
118-
1.329830288887024,
119-
1.307450532913208,
110+
1.707140326499939,
111+
1.7226355075836182,
112+
1.1531383991241455,
113+
2.0035903453826904,
114+
1.3362350463867188,
115+
1.3013248443603516,
116+
1.2195535898208618,
117+
1.3454742431640625,
118+
1.3299248218536377,
119+
1.3073854446411133,
120120
],
121121
"train_step_metrics": [
122-
9.490362167358398,
123-
10.207969665527344,
124-
6.944809913635254,
125-
4.943641662597656,
126-
7.291841506958008,
127-
4.294501304626465,
128-
6.6656389236450195,
129-
3.3796849250793457,
130-
5.201667308807373,
131-
4.655590534210205,
132-
4.065384864807129,
133-
4.618677139282227,
134-
5.396877765655518,
135-
3.989826202392578,
136-
6.030873775482178,
137-
4.0859761238098145,
138-
5.179838180541992,
139-
3.5994436740875244,
140-
2.328134298324585,
141-
4.852985858917236,
122+
9.49116325378418,
123+
10.228837966918945,
124+
6.93151330947876,
125+
4.945170879364014,
126+
7.296566009521484,
127+
4.296945571899414,
128+
6.66965389251709,
129+
3.378974676132202,
130+
5.193322658538818,
131+
4.655086040496826,
132+
4.063101291656494,
133+
4.623003959655762,
134+
5.407680034637451,
135+
3.994786262512207,
136+
6.0354108810424805,
137+
4.0796589851379395,
138+
5.188138961791992,
139+
3.60935115814209,
140+
2.3316092491149902,
141+
4.846755504608154,
142142
],
143143
"eval_step_metrics": [ # steps 0-9
144-
5.518795013427734,
145-
5.580649375915527,
146-
3.1693549156188965,
147-
7.42730712890625,
148-
3.8087174892425537,
149-
3.672913074493408,
150-
3.38956880569458,
151-
3.8392088413238525,
152-
3.7804012298583984,
153-
3.6967368125915527,
144+
5.5131731033325195,
145+
5.599266052246094,
146+
3.1681201457977295,
147+
7.415632247924805,
148+
3.8046915531158447,
149+
3.674160957336426,
150+
3.3856759071350098,
151+
3.8400065898895264,
152+
3.7807586193084717,
153+
3.69649600982666,
154154
],
155155
},
156156
# Scenario 3: Single-device google-bert/bert-base-uncased training on IMDB dataset.
157157
"bert_base_uncased_config_imdb_single_device": {
158158
"description": "Baseline for google-bert/bert-base-uncased on IMDB single-device",
159159
"train_step_losses": [
160-
0.357421875,
161-
0.546875,
162-
0.98486328125,
163-
0.35302734375,
164-
1.23828125,
165-
0.60791015625,
166-
0.44384765625,
167-
0.791015625,
168-
0.7861328125,
169-
0.51318359375,
170-
0.50244140625,
171-
0.90087890625,
172-
0.8818359375,
173-
0.86279296875,
174-
0.6396484375,
175-
0.49267578125,
176-
0.97119140625,
177-
0.7451171875,
178-
0.798828125,
179-
0.7080078125,
160+
0.390625,
161+
0.51220703125,
162+
0.9208984375,
163+
0.4052734375,
164+
1.1640625,
165+
0.6533203125,
166+
0.5087890625,
167+
0.76171875,
168+
0.63525390625,
169+
0.50146484375,
170+
0.5439453125,
171+
0.947265625,
172+
0.89013671875,
173+
0.80419921875,
174+
0.6533203125,
175+
0.4580078125,
176+
0.92041015625,
177+
0.7412109375,
178+
0.7197265625,
179+
0.62158203125,
180180
],
181181
"eval_step_losses": [
182-
0.634765625,
183-
0.8173828125,
182+
0.6044921875,
183+
0.798828125,
184184
0.9072265625,
185-
0.7177734375,
186-
0.59423828125,
187-
0.69921875,
188-
0.7109375,
189-
0.7216796875,
190-
0.6064453125,
191-
0.7041015625,
185+
0.70361328125,
186+
0.59912109375,
187+
0.66357421875,
188+
0.6962890625,
189+
0.75390625,
190+
0.61328125,
191+
0.6806640625,
192192
],
193193
"train_step_metrics": [
194194
1.0,
195195
1.0,
196196
0.5,
197-
0.5,
198-
0.5,
199-
0.5,
200-
0.5,
201-
0.5,
202-
0.5,
203-
0.5,
204-
0.5,
205-
0.5,
206-
0.5,
207-
0.5,
208-
0.5,
209-
0.5,
210-
0.5,
211-
0.5,
212-
0.449951171875,
213-
0.4091796875,
197+
0.49999988079071045,
198+
0.49999988079071045,
199+
0.5,
200+
0.5000002384185791,
201+
0.5000002384185791,
202+
0.6250002384185791,
203+
0.6249998807907104,
204+
0.625,
205+
0.6000000238418579,
206+
0.5833332538604736,
207+
0.5714285373687744,
208+
0.5714285373687744,
209+
0.5714285373687744,
210+
0.5625,
211+
0.555555522441864,
212+
0.5055557489395142,
213+
0.5101010203361511,
214214
],
215-
"eval_step_metrics": [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0],
215+
"eval_step_metrics": [1.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0],
216216
},
217217
# Scenario 4: Distributed google-bert/bert-base-uncased training (world_size=2)
218218
"bert_base_uncased_config_imdb_distributed_ws2": {

tests/finetune/test_finetune.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ def assert_list_close(ref_list, actual_list, atol, name, scenario_key, current_w
140140
]
141141

142142

143-
@pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
143+
# @pytest.mark.skip() # remove when it's clear why diff val_step_loss values are observed in diff runs on existing code (even without PR #478 changes)
144144
@pytest.mark.cli
145145
@pytest.mark.on_qaic
146146
@pytest.mark.finetune

0 commit comments

Comments
 (0)