forked from pengzhiliang/MAE-pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pretrain_base_0.75_400e.txt
400 lines (400 loc) · 103 KB
/
pretrain_base_0.75_400e.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
{"train_lr": 2.99062424873788e-05, "train_min_lr": 2.99062424873788e-05, "train_loss": 0.9935376493690106, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03353406208453891, "epoch": 0, "n_parameters": 93325440}
{"train_lr": 8.991105056494908e-05, "train_min_lr": 8.991105056494908e-05, "train_loss": 0.9758817947302492, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.08507240483035836, "epoch": 1, "n_parameters": 93325440}
{"train_lr": 0.0001499158586425194, "train_min_lr": 0.0001499158586425194, "train_loss": 0.9611001945793246, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.128755036335534, "epoch": 2, "n_parameters": 93325440}
{"train_lr": 0.00020992066672008975, "train_min_lr": 0.00020992066672008975, "train_loss": 0.9310973301434364, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.17579967240826824, "epoch": 3, "n_parameters": 93325440}
{"train_lr": 0.00026992547479766013, "train_min_lr": 0.00026992547479766013, "train_loss": 0.887464275273184, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.2299580722092054, "epoch": 4, "n_parameters": 93325440}
{"train_lr": 0.0003299302828752303, "train_min_lr": 0.0003299302828752303, "train_loss": 0.8575904849821177, "train_loss_scale": 65536.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.22862222800270104, "epoch": 5, "n_parameters": 93325440}
{"train_lr": 0.00038993509095280063, "train_min_lr": 0.00038993509095280063, "train_loss": 0.8232777749355404, "train_loss_scale": 104395.48717948717, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.21724188721810395, "epoch": 6, "n_parameters": 93325440}
{"train_lr": 0.00044993989903037104, "train_min_lr": 0.00044993989903037104, "train_loss": 0.7865594529952759, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.19654494937127218, "epoch": 7, "n_parameters": 93325440}
{"train_lr": 0.0005099447071079412, "train_min_lr": 0.0005099447071079412, "train_loss": 0.7636158630156364, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.171916592746782, "epoch": 8, "n_parameters": 93325440}
{"train_lr": 0.0005699495151855116, "train_min_lr": 0.0005699495151855116, "train_loss": 0.7478578644924057, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.1429553751188975, "epoch": 9, "n_parameters": 93325440}
{"train_lr": 0.0006299543232630818, "train_min_lr": 0.0006299543232630818, "train_loss": 0.7356351461404791, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.12458268381082095, "epoch": 10, "n_parameters": 93325440}
{"train_lr": 0.0006899591313406522, "train_min_lr": 0.0006899591313406522, "train_loss": 0.726113576346483, "train_loss_scale": 131072.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.11398386040654702, "epoch": 11, "n_parameters": 93325440}
{"train_lr": 0.0007499639394182228, "train_min_lr": 0.0007499639394182228, "train_loss": 0.71920872768626, "train_loss_scale": 155017.84615384616, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.10540476584663758, "epoch": 12, "n_parameters": 93325440}
{"train_lr": 0.0008099687474957929, "train_min_lr": 0.0008099687474957929, "train_loss": 0.7139992883715492, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.10431485875055958, "epoch": 13, "n_parameters": 93325440}
{"train_lr": 0.0008699735555733632, "train_min_lr": 0.0008699735555733632, "train_loss": 0.708360363323337, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0887956558010326, "epoch": 14, "n_parameters": 93325440}
{"train_lr": 0.0009299783636509334, "train_min_lr": 0.0009299783636509334, "train_loss": 0.7042842504735558, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.08402582650813155, "epoch": 15, "n_parameters": 93325440}
{"train_lr": 0.0009899831717285039, "train_min_lr": 0.0009899831717285039, "train_loss": 0.7004435619769188, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07971460644442302, "epoch": 16, "n_parameters": 93325440}
{"train_lr": 0.001049987979806074, "train_min_lr": 0.001049987979806074, "train_loss": 0.6999204354838301, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.08760032328800896, "epoch": 17, "n_parameters": 93325440}
{"train_lr": 0.0011099927878836444, "train_min_lr": 0.0011099927878836444, "train_loss": 0.6953817655881628, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07566600679778136, "epoch": 18, "n_parameters": 93325440}
{"train_lr": 0.0011699975959612145, "train_min_lr": 0.0011699975959612145, "train_loss": 0.6931444115411395, "train_loss_scale": 464633.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07172701840933698, "epoch": 19, "n_parameters": 93325440}
{"train_lr": 0.0012300024040387849, "train_min_lr": 0.0012300024040387849, "train_loss": 0.6906391145565953, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06653258443260804, "epoch": 20, "n_parameters": 93325440}
{"train_lr": 0.0012900072121163552, "train_min_lr": 0.0012900072121163552, "train_loss": 0.6887923278965247, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06384101945859118, "epoch": 21, "n_parameters": 93325440}
{"train_lr": 0.0013500120201939254, "train_min_lr": 0.0013500120201939254, "train_loss": 0.6873886124350321, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06326920523618658, "epoch": 22, "n_parameters": 93325440}
{"train_lr": 0.0014100168282714964, "train_min_lr": 0.0014100168282714964, "train_loss": 0.6858006822518431, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06038653005201083, "epoch": 23, "n_parameters": 93325440}
{"train_lr": 0.0014700216363490658, "train_min_lr": 0.0014700216363490658, "train_loss": 0.6844061892479658, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.057754088969280325, "epoch": 24, "n_parameters": 93325440}
{"train_lr": 0.0015300264444266366, "train_min_lr": 0.0015300264444266366, "train_loss": 0.6839535279342761, "train_loss_scale": 714174.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05955945764883207, "epoch": 25, "n_parameters": 93325440}
{"train_lr": 0.0015900312525042061, "train_min_lr": 0.0015900312525042061, "train_loss": 0.6827620689351207, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05615876699821689, "epoch": 26, "n_parameters": 93325440}
{"train_lr": 0.0016500360605817771, "train_min_lr": 0.0016500360605817771, "train_loss": 0.6814118546123306, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05241773727660378, "epoch": 27, "n_parameters": 93325440}
{"train_lr": 0.0017100408686593481, "train_min_lr": 0.0017100408686593481, "train_loss": 0.6803960090455337, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.050834771377058365, "epoch": 28, "n_parameters": 93325440}
{"train_lr": 0.0017700456767369176, "train_min_lr": 0.0017700456767369176, "train_loss": 0.6797578791156411, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05221204828614226, "epoch": 29, "n_parameters": 93325440}
{"train_lr": 0.0018300504848144882, "train_min_lr": 0.0018300504848144882, "train_loss": 0.678908175502259, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04822072399278673, "epoch": 30, "n_parameters": 93325440}
{"train_lr": 0.001890055292892058, "train_min_lr": 0.001890055292892058, "train_loss": 0.67856838229375, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.048526402544946626, "epoch": 31, "n_parameters": 93325440}
{"train_lr": 0.0019500601009696296, "train_min_lr": 0.0019500601009696296, "train_loss": 0.6778434767411687, "train_loss_scale": 2046739.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04580618743187724, "epoch": 32, "n_parameters": 93325440}
{"train_lr": 0.0020100649090471993, "train_min_lr": 0.0020100649090471993, "train_loss": 0.6768887781370909, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.044690437405967176, "epoch": 33, "n_parameters": 93325440}
{"train_lr": 0.002070069717124769, "train_min_lr": 0.002070069717124769, "train_loss": 0.6764640975743532, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.044290755033636324, "epoch": 34, "n_parameters": 93325440}
{"train_lr": 0.0021300745252023395, "train_min_lr": 0.0021300745252023395, "train_loss": 0.6758714542748072, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04358653886578022, "epoch": 35, "n_parameters": 93325440}
{"train_lr": 0.0021900793332799103, "train_min_lr": 0.0021900793332799103, "train_loss": 0.6754509876601589, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.041925776057327405, "epoch": 36, "n_parameters": 93325440}
{"train_lr": 0.00225008414135748, "train_min_lr": 0.00225008414135748, "train_loss": 0.6753806370095565, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04360225025373392, "epoch": 37, "n_parameters": 93325440}
{"train_lr": 0.002310088949435051, "train_min_lr": 0.002310088949435051, "train_loss": 0.6767300866448727, "train_loss_scale": 283149.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 38, "n_parameters": 93325440}
{"train_lr": 0.0023700937575126205, "train_min_lr": 0.0023700937575126205, "train_loss": 0.6744202525617603, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.039843473296899065, "epoch": 39, "n_parameters": 93325440}
{"train_lr": 0.002399984905490592, "train_min_lr": 0.002399984905490592, "train_loss": 0.6737221590697001, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03926232078661903, "epoch": 40, "n_parameters": 93325440}
{"train_lr": 0.0023998940486030145, "train_min_lr": 0.0023998940486030145, "train_loss": 0.6729661250869051, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03798527103992036, "epoch": 41, "n_parameters": 93325440}
{"train_lr": 0.0023997121959074114, "train_min_lr": 0.0023997121959074114, "train_loss": 0.6726540239671102, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03898819892977675, "epoch": 42, "n_parameters": 93325440}
{"train_lr": 0.0023994393612525775, "train_min_lr": 0.0023994393612525775, "train_loss": 0.6718972477202232, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03660729594934636, "epoch": 43, "n_parameters": 93325440}
{"train_lr": 0.002399075565415922, "train_min_lr": 0.002399075565415922, "train_loss": 0.6716325357556343, "train_loss_scale": 412540.71794871794, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03810084838038071, "epoch": 44, "n_parameters": 93325440}
{"train_lr": 0.0023986208361019097, "train_min_lr": 0.0023986208361019097, "train_loss": 0.6708741470311697, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.035760302478686355, "epoch": 45, "n_parameters": 93325440}
{"train_lr": 0.002398075207939935, "train_min_lr": 0.002398075207939935, "train_loss": 0.6701816931032599, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0361831144680484, "epoch": 46, "n_parameters": 93325440}
{"train_lr": 0.002397438722481704, "train_min_lr": 0.002397438722481704, "train_loss": 0.6697720118010273, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03561555056904371, "epoch": 47, "n_parameters": 93325440}
{"train_lr": 0.002396711428198033, "train_min_lr": 0.002396711428198033, "train_loss": 0.6699321212438054, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03857750490379448, "epoch": 48, "n_parameters": 93325440}
{"train_lr": 0.00239589338047521, "train_min_lr": 0.00239589338047521, "train_loss": 0.6692431434654654, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03674515096757274, "epoch": 49, "n_parameters": 93325440}
{"train_lr": 0.0023949846416107326, "train_min_lr": 0.0023949846416107326, "train_loss": 0.6711435524078134, "train_loss_scale": 609988.9230769231, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.039313955614582084, "epoch": 50, "n_parameters": 93325440}
{"train_lr": 0.0023939852808085834, "train_min_lr": 0.0023939852808085834, "train_loss": 0.6686080549724209, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0351801153857452, "epoch": 51, "n_parameters": 93325440}
{"train_lr": 0.0023928953741739565, "train_min_lr": 0.0023928953741739565, "train_loss": 0.6680617731733199, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03496479747506479, "epoch": 52, "n_parameters": 93325440}
{"train_lr": 0.002391715004707465, "train_min_lr": 0.002391715004707465, "train_loss": 0.6678077465591904, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0346150501177479, "epoch": 53, "n_parameters": 93325440}
{"train_lr": 0.002390444262298807, "train_min_lr": 0.002390444262298807, "train_loss": 0.667374048788005, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03409873149118935, "epoch": 54, "n_parameters": 93325440}
{"train_lr": 0.002389083243719943, "train_min_lr": 0.002389083243719943, "train_loss": 0.6669572112986293, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.034755515513750605, "epoch": 55, "n_parameters": 93325440}
{"train_lr": 0.002387632052617705, "train_min_lr": 0.002387632052617705, "train_loss": 0.6681992361942927, "train_loss_scale": 809957.7435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 56, "n_parameters": 93325440}
{"train_lr": 0.0023860907995059146, "train_min_lr": 0.0023860907995059146, "train_loss": 0.6667240882913271, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03386468641483822, "epoch": 57, "n_parameters": 93325440}
{"train_lr": 0.002384459601756962, "train_min_lr": 0.002384459601756962, "train_loss": 0.6663572651405747, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.033974862639577344, "epoch": 58, "n_parameters": 93325440}
{"train_lr": 0.0023827385835928716, "train_min_lr": 0.0023827385835928716, "train_loss": 0.6660926373054584, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03487831858010628, "epoch": 59, "n_parameters": 93325440}
{"train_lr": 0.002380927876075842, "train_min_lr": 0.002380927876075842, "train_loss": 0.665804479462214, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0342934140159438, "epoch": 60, "n_parameters": 93325440}
{"train_lr": 0.0023790276170982585, "train_min_lr": 0.0023790276170982585, "train_loss": 0.6654091311188844, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03340641431247768, "epoch": 61, "n_parameters": 93325440}
{"train_lr": 0.002377037951372201, "train_min_lr": 0.002377037951372201, "train_loss": 0.6653385141339058, "train_loss_scale": 547813.7435897436, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03390334472537805, "epoch": 62, "n_parameters": 93325440}
{"train_lr": 0.0023749590304184146, "train_min_lr": 0.0023749590304184146, "train_loss": 0.6650327132441677, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03470497362267894, "epoch": 63, "n_parameters": 93325440}
{"train_lr": 0.002372791012554783, "train_min_lr": 0.002372791012554783, "train_loss": 0.6656214212521147, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03538080747239292, "epoch": 64, "n_parameters": 93325440}
{"train_lr": 0.0023705340628842582, "train_min_lr": 0.0023705340628842582, "train_loss": 0.664771322089319, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03493891082083186, "epoch": 65, "n_parameters": 93325440}
{"train_lr": 0.002368188353282295, "train_min_lr": 0.002368188353282295, "train_loss": 0.6646730641428477, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03361503693919916, "epoch": 66, "n_parameters": 93325440}
{"train_lr": 0.0023657540623837642, "train_min_lr": 0.0023657540623837642, "train_loss": 0.6643176994119318, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.033847464117197655, "epoch": 67, "n_parameters": 93325440}
{"train_lr": 0.00236323137556934, "train_min_lr": 0.00236323137556934, "train_loss": 0.6641596989133037, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03371662331912189, "epoch": 68, "n_parameters": 93325440}
{"train_lr": 0.0023606204849513923, "train_min_lr": 0.0023606204849513923, "train_loss": 0.6640265851926345, "train_loss_scale": 1714018.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03474894096740545, "epoch": 69, "n_parameters": 93325440}
{"train_lr": 0.002357921589359349, "train_min_lr": 0.002357921589359349, "train_loss": 0.6637311494933107, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.033441266182284705, "epoch": 70, "n_parameters": 93325440}
{"train_lr": 0.002355134894324556, "train_min_lr": 0.002355134894324556, "train_loss": 0.6635217848353279, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03384943720765221, "epoch": 71, "n_parameters": 93325440}
{"train_lr": 0.0023522606120646365, "train_min_lr": 0.0023522606120646365, "train_loss": 0.6633951249890603, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.033622669599329434, "epoch": 72, "n_parameters": 93325440}
{"train_lr": 0.002349298961467303, "train_min_lr": 0.002349298961467303, "train_loss": 0.6633510416707932, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03442244214984851, "epoch": 73, "n_parameters": 93325440}
{"train_lr": 0.0023462501680737214, "train_min_lr": 0.0023462501680737214, "train_loss": 0.663141653347665, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.033474423409176945, "epoch": 74, "n_parameters": 93325440}
{"train_lr": 0.0023431144640613144, "train_min_lr": 0.0023431144640613144, "train_loss": 0.6629860976185554, "train_loss_scale": 2567666.871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03471351237012408, "epoch": 75, "n_parameters": 93325440}
{"train_lr": 0.0023398920882260776, "train_min_lr": 0.0023398920882260776, "train_loss": 0.6628032483351536, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03359772727633707, "epoch": 76, "n_parameters": 93325440}
{"train_lr": 0.002336583285964409, "train_min_lr": 0.002336583285964409, "train_loss": 0.6635485236079265, "train_loss_scale": 4080036.1025641025, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 77, "n_parameters": 93325440}
{"train_lr": 0.0023331883092544115, "train_min_lr": 0.0023331883092544115, "train_loss": 0.6628299321597203, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.033620487104575984, "epoch": 78, "n_parameters": 93325440}
{"train_lr": 0.0023297074166367046, "train_min_lr": 0.0023297074166367046, "train_loss": 0.6625400330537022, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.033616363954467654, "epoch": 79, "n_parameters": 93325440}
{"train_lr": 0.0023261408731947413, "train_min_lr": 0.0023261408731947413, "train_loss": 0.6622193893656517, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03423108916896849, "epoch": 80, "n_parameters": 93325440}
{"train_lr": 0.002322488950534608, "train_min_lr": 0.002322488950534608, "train_loss": 0.6621070915164474, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.033666627577100046, "epoch": 81, "n_parameters": 93325440}
{"train_lr": 0.0023187519267643623, "train_min_lr": 0.0023187519267643623, "train_loss": 0.6622220281845866, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03502245578102958, "epoch": 82, "n_parameters": 93325440}
{"train_lr": 0.0023149300864728226, "train_min_lr": 0.0023149300864728226, "train_loss": 0.6620114981316221, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03415211925330835, "epoch": 83, "n_parameters": 93325440}
{"train_lr": 0.00231102372070793, "train_min_lr": 0.00231102372070793, "train_loss": 0.6617905595459235, "train_loss_scale": 3448201.846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03398606629492954, "epoch": 84, "n_parameters": 93325440}
{"train_lr": 0.002307033126954561, "train_min_lr": 0.002307033126954561, "train_loss": 0.6629372512062008, "train_loss_scale": 1459436.3076923077, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 85, "n_parameters": 93325440}
{"train_lr": 0.002302958609111882, "train_min_lr": 0.002302958609111882, "train_loss": 0.661601463977534, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03439310951850926, "epoch": 86, "n_parameters": 93325440}
{"train_lr": 0.002298800477470194, "train_min_lr": 0.002298800477470194, "train_loss": 0.6614590679319241, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03440534176591497, "epoch": 87, "n_parameters": 93325440}
{"train_lr": 0.0022945590486873305, "train_min_lr": 0.0022945590486873305, "train_loss": 0.6613834299242649, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.034254728303028226, "epoch": 88, "n_parameters": 93325440}
{"train_lr": 0.0022902346457645086, "train_min_lr": 0.0022902346457645086, "train_loss": 0.6611541778279039, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03509240785542016, "epoch": 89, "n_parameters": 93325440}
{"train_lr": 0.002285827598021753, "train_min_lr": 0.002285827598021753, "train_loss": 0.6612528438608234, "train_loss_scale": 262144.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03502046571781811, "epoch": 90, "n_parameters": 93325440}
{"train_lr": 0.0022813382410728175, "train_min_lr": 0.0022813382410728175, "train_loss": 0.6609599692269396, "train_loss_scale": 333561.4358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03530590945424942, "epoch": 91, "n_parameters": 93325440}
{"train_lr": 0.0022767669167996093, "train_min_lr": 0.0022767669167996093, "train_loss": 0.6609104336597599, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.034537219042436056, "epoch": 92, "n_parameters": 93325440}
{"train_lr": 0.002272113973326174, "train_min_lr": 0.002272113973326174, "train_loss": 0.6607935074955608, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.035604342979450636, "epoch": 93, "n_parameters": 93325440}
{"train_lr": 0.00226737976499217, "train_min_lr": 0.00226737976499217, "train_loss": 0.6625717931355422, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0398545018158471, "epoch": 94, "n_parameters": 93325440}
{"train_lr": 0.0022625646523258907, "train_min_lr": 0.0022625646523258907, "train_loss": 0.6607921089594945, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.034550254352581806, "epoch": 95, "n_parameters": 93325440}
{"train_lr": 0.002257669002016808, "train_min_lr": 0.002257669002016808, "train_loss": 0.6605105223372961, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.034636476095049426, "epoch": 96, "n_parameters": 93325440}
{"train_lr": 0.0022526931868876465, "train_min_lr": 0.0022526931868876465, "train_loss": 0.6604859339407622, "train_loss_scale": 524288.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.035125622901922234, "epoch": 97, "n_parameters": 93325440}
{"train_lr": 0.0022476375858659957, "train_min_lr": 0.0022476375858659957, "train_loss": 0.6602891938378795, "train_loss_scale": 976318.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03531917932634361, "epoch": 98, "n_parameters": 93325440}
{"train_lr": 0.002242502583955447, "train_min_lr": 0.002242502583955447, "train_loss": 0.6603036113083363, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03573522149012066, "epoch": 99, "n_parameters": 93325440}
{"train_lr": 0.0022372885722062746, "train_min_lr": 0.0022372885722062746, "train_loss": 0.6603299641790681, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03621007654314431, "epoch": 100, "n_parameters": 93325440}
{"train_lr": 0.00223199594768566, "train_min_lr": 0.00223199594768566, "train_loss": 0.6600742791659939, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03562110195008035, "epoch": 101, "n_parameters": 93325440}
{"train_lr": 0.002226625113447457, "train_min_lr": 0.002226625113447457, "train_loss": 0.659991750254845, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03583730082027614, "epoch": 102, "n_parameters": 93325440}
{"train_lr": 0.0022211764785014763, "train_min_lr": 0.0022211764785014763, "train_loss": 0.6598965032503773, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.035701336876417585, "epoch": 103, "n_parameters": 93325440}
{"train_lr": 0.002215650457782375, "train_min_lr": 0.002215650457782375, "train_loss": 0.6597276878996919, "train_loss_scale": 1522451.6923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03592537800208307, "epoch": 104, "n_parameters": 93325440}
{"train_lr": 0.0022100474721180197, "train_min_lr": 0.0022100474721180197, "train_loss": 0.6596770419094425, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0358598633036495, "epoch": 105, "n_parameters": 93325440}
{"train_lr": 0.0022043679481974616, "train_min_lr": 0.0022043679481974616, "train_loss": 0.659525745572188, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.035852226828678675, "epoch": 106, "n_parameters": 93325440}
{"train_lr": 0.0021986123185384417, "train_min_lr": 0.0021986123185384417, "train_loss": 0.6594595561902492, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03639518651060569, "epoch": 107, "n_parameters": 93325440}
{"train_lr": 0.002192781021454436, "train_min_lr": 0.002192781021454436, "train_loss": 0.6592210801079487, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.035937764580385424, "epoch": 108, "n_parameters": 93325440}
{"train_lr": 0.0021868745010212983, "train_min_lr": 0.0021868745010212983, "train_loss": 0.6593411502261193, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03644818510525884, "epoch": 109, "n_parameters": 93325440}
{"train_lr": 0.0021808932070434225, "train_min_lr": 0.0021808932070434225, "train_loss": 0.659102826904601, "train_loss_scale": 2184533.3333333335, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03611473233486789, "epoch": 110, "n_parameters": 93325440}
{"train_lr": 0.002174837595019509, "train_min_lr": 0.002174837595019509, "train_loss": 0.6592099234843866, "train_loss_scale": 3112119.794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 111, "n_parameters": 93325440}
{"train_lr": 0.0021687081261078578, "train_min_lr": 0.0021687081261078578, "train_loss": 0.6590672825486996, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03640902054328949, "epoch": 112, "n_parameters": 93325440}
{"train_lr": 0.0021625052670912522, "train_min_lr": 0.0021625052670912522, "train_loss": 0.6590020086807318, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.036369339479372285, "epoch": 113, "n_parameters": 93325440}
{"train_lr": 0.0021562294903414267, "train_min_lr": 0.0021562294903414267, "train_loss": 0.6589464796945835, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03698857306526639, "epoch": 114, "n_parameters": 93325440}
{"train_lr": 0.0021498812737830776, "train_min_lr": 0.0021498812737830776, "train_loss": 0.6587196541233704, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03618161466856224, "epoch": 115, "n_parameters": 93325440}
{"train_lr": 0.0021434611008574723, "train_min_lr": 0.0021434611008574723, "train_loss": 0.6586414177257282, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03689980891963037, "epoch": 116, "n_parameters": 93325440}
{"train_lr": 0.002136969460485639, "train_min_lr": 0.002136969460485639, "train_loss": 0.658656991349581, "train_loss_scale": 2318966.153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03741015896248894, "epoch": 117, "n_parameters": 93325440}
{"train_lr": 0.002130406847031118, "train_min_lr": 0.002130406847031118, "train_loss": 0.6585542277600138, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03709424373049002, "epoch": 118, "n_parameters": 93325440}
{"train_lr": 0.002123773760262341, "train_min_lr": 0.002123773760262341, "train_loss": 0.6583075237054473, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03686075314927178, "epoch": 119, "n_parameters": 93325440}
{"train_lr": 0.002117070705314543, "train_min_lr": 0.002117070705314543, "train_loss": 0.6585102182072706, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.038124814844475344, "epoch": 120, "n_parameters": 93325440}
{"train_lr": 0.0021102981926513073, "train_min_lr": 0.0021102981926513073, "train_loss": 0.6583685486887892, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03707056720621693, "epoch": 121, "n_parameters": 93325440}
{"train_lr": 0.0021034567380257023, "train_min_lr": 0.0021034567380257023, "train_loss": 0.658206457654253, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.036788777669127554, "epoch": 122, "n_parameters": 93325440}
{"train_lr": 0.0020965468624409753, "train_min_lr": 0.0020965468624409753, "train_loss": 0.6580094202923087, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.037147117468218006, "epoch": 123, "n_parameters": 93325440}
{"train_lr": 0.002089569092110911, "train_min_lr": 0.002089569092110911, "train_loss": 0.6592672796537861, "train_loss_scale": 3428036.923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 124, "n_parameters": 93325440}
{"train_lr": 0.0020825239584197327, "train_min_lr": 0.0020825239584197327, "train_loss": 0.658130434437249, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03721978203154718, "epoch": 125, "n_parameters": 93325440}
{"train_lr": 0.0020754119978816502, "train_min_lr": 0.0020754119978816502, "train_loss": 0.6580190308009967, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.037171452276360914, "epoch": 126, "n_parameters": 93325440}
{"train_lr": 0.0020682337520999913, "train_min_lr": 0.0020682337520999913, "train_loss": 0.657770600862419, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03729113473747976, "epoch": 127, "n_parameters": 93325440}
{"train_lr": 0.0020609897677259627, "train_min_lr": 0.0020609897677259627, "train_loss": 0.6576646924591981, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03765304574074271, "epoch": 128, "n_parameters": 93325440}
{"train_lr": 0.002053680596417025, "train_min_lr": 0.002053680596417025, "train_loss": 0.6576654529199004, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03781039642098431, "epoch": 129, "n_parameters": 93325440}
{"train_lr": 0.00204630679479487, "train_min_lr": 0.00204630679479487, "train_loss": 0.6575054912469708, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03823968046941818, "epoch": 130, "n_parameters": 93325440}
{"train_lr": 0.002038868924403038, "train_min_lr": 0.002038868924403038, "train_loss": 0.6574385501921941, "train_loss_scale": 2258471.3846153845, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 131, "n_parameters": 93325440}
{"train_lr": 0.0020313675516641576, "train_min_lr": 0.0020313675516641576, "train_loss": 0.6573616523200121, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03814637377404441, "epoch": 132, "n_parameters": 93325440}
{"train_lr": 0.0020238032478368064, "train_min_lr": 0.0020238032478368064, "train_loss": 0.6572599850642757, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03755328234117956, "epoch": 133, "n_parameters": 93325440}
{"train_lr": 0.002016176588972008, "train_min_lr": 0.002016176588972008, "train_loss": 0.657276330444102, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03865379040750364, "epoch": 134, "n_parameters": 93325440}
{"train_lr": 0.002008488155869361, "train_min_lr": 0.002008488155869361, "train_loss": 0.6572356811270882, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.038243835081513494, "epoch": 135, "n_parameters": 93325440}
{"train_lr": 0.002000738534032814, "train_min_lr": 0.002000738534032814, "train_loss": 0.6577621484414126, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04061774171602268, "epoch": 136, "n_parameters": 93325440}
{"train_lr": 0.0019929283136260727, "train_min_lr": 0.0019929283136260727, "train_loss": 0.6570336658698626, "train_loss_scale": 3145728.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0380012393833544, "epoch": 137, "n_parameters": 93325440}
{"train_lr": 0.001985058089427659, "train_min_lr": 0.001985058089427659, "train_loss": 0.6569213079622923, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.037724959305845775, "epoch": 138, "n_parameters": 93325440}
{"train_lr": 0.0019771284607856218, "train_min_lr": 0.0019771284607856218, "train_loss": 0.6569687100366141, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03889632499060379, "epoch": 139, "n_parameters": 93325440}
{"train_lr": 0.0019691400315718726, "train_min_lr": 0.0019691400315718726, "train_loss": 0.6568288122041103, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03823023961665921, "epoch": 140, "n_parameters": 93325440}
{"train_lr": 0.001961093410136237, "train_min_lr": 0.001961093410136237, "train_loss": 0.6566447338137107, "train_loss_scale": 2285357.9487179485, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 141, "n_parameters": 93325440}
{"train_lr": 0.0019529892092600813, "train_min_lr": 0.0019529892092600813, "train_loss": 0.6577811507412639, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0412411448927835, "epoch": 142, "n_parameters": 93325440}
{"train_lr": 0.0019448280461096836, "train_min_lr": 0.0019448280461096836, "train_loss": 0.6566688927750175, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03831841904096878, "epoch": 143, "n_parameters": 93325440}
{"train_lr": 0.0019366105421892137, "train_min_lr": 0.0019366105421892137, "train_loss": 0.6564733442874291, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03837170583219864, "epoch": 144, "n_parameters": 93325440}
{"train_lr": 0.0019283373232934099, "train_min_lr": 0.0019283373232934099, "train_loss": 0.6564794568201671, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03877275445474646, "epoch": 145, "n_parameters": 93325440}
{"train_lr": 0.0019200090194599236, "train_min_lr": 0.0019200090194599236, "train_loss": 0.6564271587591904, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04010523812702069, "epoch": 146, "n_parameters": 93325440}
{"train_lr": 0.0019116262649213377, "train_min_lr": 0.0019116262649213377, "train_loss": 0.656340552183489, "train_loss_scale": 3145728.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0387989131327814, "epoch": 147, "n_parameters": 93325440}
{"train_lr": 0.0019031896980568602, "train_min_lr": 0.0019031896980568602, "train_loss": 0.6562293704169301, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.039052358601624385, "epoch": 148, "n_parameters": 93325440}
{"train_lr": 0.001894699961343726, "train_min_lr": 0.001894699961343726, "train_loss": 0.6562115392910365, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.039240701452422984, "epoch": 149, "n_parameters": 93325440}
{"train_lr": 0.0018861577013082516, "train_min_lr": 0.0018861577013082516, "train_loss": 0.6560243863421373, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03884188269073956, "epoch": 150, "n_parameters": 93325440}
{"train_lr": 0.0018775635684766133, "train_min_lr": 0.0018775635684766133, "train_loss": 0.6559707283830414, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.039168536728725604, "epoch": 151, "n_parameters": 93325440}
{"train_lr": 0.0018689182173253027, "train_min_lr": 0.0018689182173253027, "train_loss": 0.6558644921303942, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03925456973509146, "epoch": 152, "n_parameters": 93325440}
{"train_lr": 0.0018602223062312783, "train_min_lr": 0.0018602223062312783, "train_loss": 0.6558279459340832, "train_loss_scale": 4207747.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 153, "n_parameters": 93325440}
{"train_lr": 0.0018514764974218371, "train_min_lr": 0.0018514764974218371, "train_loss": 0.6558603528313912, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.039963523779685296, "epoch": 154, "n_parameters": 93325440}
{"train_lr": 0.0018426814569241794, "train_min_lr": 0.0018426814569241794, "train_loss": 0.6557053431916313, "train_loss_scale": 3313769.0256410255, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 155, "n_parameters": 93325440}
{"train_lr": 0.0018338378545146971, "train_min_lr": 0.0018338378545146971, "train_loss": 0.6557001805362793, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04054142555030875, "epoch": 156, "n_parameters": 93325440}
{"train_lr": 0.0018249463636679463, "train_min_lr": 0.0018249463636679463, "train_loss": 0.6555671931411593, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.039896765908895016, "epoch": 157, "n_parameters": 93325440}
{"train_lr": 0.0018160076615053812, "train_min_lr": 0.0018160076615053812, "train_loss": 0.6556670764843241, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.040883598544706516, "epoch": 158, "n_parameters": 93325440}
{"train_lr": 0.0018070224287437813, "train_min_lr": 0.0018070224287437813, "train_loss": 0.6553049489664726, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0398384396177836, "epoch": 159, "n_parameters": 93325440}
{"train_lr": 0.0017979913496434085, "train_min_lr": 0.0017979913496434085, "train_loss": 0.6553408457205082, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04008960960886608, "epoch": 160, "n_parameters": 93325440}
{"train_lr": 0.0017889151119559006, "train_min_lr": 0.0017889151119559006, "train_loss": 0.6552601037785794, "train_loss_scale": 2117316.923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.03976045259967064, "epoch": 161, "n_parameters": 93325440}
{"train_lr": 0.0017797944068718974, "train_min_lr": 0.0017797944068718974, "train_loss": 0.6551581882895567, "train_loss_scale": 3340655.5897435895, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 162, "n_parameters": 93325440}
{"train_lr": 0.0017706299289684047, "train_min_lr": 0.0017706299289684047, "train_loss": 0.6550808818772053, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04039769527168037, "epoch": 163, "n_parameters": 93325440}
{"train_lr": 0.0017614223761558967, "train_min_lr": 0.0017614223761558967, "train_loss": 0.654921959584149, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.039989605074366316, "epoch": 164, "n_parameters": 93325440}
{"train_lr": 0.001752172449625165, "train_min_lr": 0.001752172449625165, "train_loss": 0.6550507604693755, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.041154535164913304, "epoch": 165, "n_parameters": 93325440}
{"train_lr": 0.0017428808537939323, "train_min_lr": 0.0017428808537939323, "train_loss": 0.6547575006022667, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04056112305261195, "epoch": 166, "n_parameters": 93325440}
{"train_lr": 0.0017335482962531922, "train_min_lr": 0.0017335482962531922, "train_loss": 0.6546947882057, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04049342578181472, "epoch": 167, "n_parameters": 93325440}
{"train_lr": 0.0017241754877133318, "train_min_lr": 0.0017241754877133318, "train_loss": 0.6546552269838941, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04057711954467381, "epoch": 168, "n_parameters": 93325440}
{"train_lr": 0.0017147631419500143, "train_min_lr": 0.0017147631419500143, "train_loss": 0.6545772810156146, "train_loss_scale": 4187582.358974359, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04082152419962371, "epoch": 169, "n_parameters": 93325440}
{"train_lr": 0.0017053119757498118, "train_min_lr": 0.0017053119757498118, "train_loss": 0.6545756479534202, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.042030869087634176, "epoch": 170, "n_parameters": 93325440}
{"train_lr": 0.001695822708855617, "train_min_lr": 0.001695822708855617, "train_loss": 0.6545097664332925, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04112377291760193, "epoch": 171, "n_parameters": 93325440}
{"train_lr": 0.001686296063911845, "train_min_lr": 0.001686296063911845, "train_loss": 0.6543824052772461, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.040817254019948915, "epoch": 172, "n_parameters": 93325440}
{"train_lr": 0.0016767327664093945, "train_min_lr": 0.0016767327664093945, "train_loss": 0.6542876160536439, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04131795748327978, "epoch": 173, "n_parameters": 93325440}
{"train_lr": 0.0016671335446303921, "train_min_lr": 0.0016671335446303921, "train_loss": 0.6544181934725015, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04192646426291993, "epoch": 174, "n_parameters": 93325440}
{"train_lr": 0.0016574991295927436, "train_min_lr": 0.0016574991295927436, "train_loss": 0.6541326292432271, "train_loss_scale": 4207747.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 175, "n_parameters": 93325440}
{"train_lr": 0.001647830254994458, "train_min_lr": 0.001647830254994458, "train_loss": 0.654219655558849, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04151277960493015, "epoch": 176, "n_parameters": 93325440}
{"train_lr": 0.0016381276571577643, "train_min_lr": 0.0016381276571577643, "train_loss": 0.6540735674639925, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04252699216326269, "epoch": 177, "n_parameters": 93325440}
{"train_lr": 0.0016283920749730564, "train_min_lr": 0.0016283920749730564, "train_loss": 0.6539614871383096, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.042018215010802336, "epoch": 178, "n_parameters": 93325440}
{"train_lr": 0.0016186242498426112, "train_min_lr": 0.0016186242498426112, "train_loss": 0.6538805883043469, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04170390099692994, "epoch": 179, "n_parameters": 93325440}
{"train_lr": 0.0016088249256241284, "train_min_lr": 0.0016088249256241284, "train_loss": 0.6538529533128707, "train_loss_scale": 2144203.487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 180, "n_parameters": 93325440}
{"train_lr": 0.0015989948485740878, "train_min_lr": 0.0015989948485740878, "train_loss": 0.6536841267146744, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04197185315812627, "epoch": 181, "n_parameters": 93325440}
{"train_lr": 0.0015891347672909151, "train_min_lr": 0.0015891347672909151, "train_loss": 0.6536750256394347, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.042221493111589015, "epoch": 182, "n_parameters": 93325440}
{"train_lr": 0.001579245432657976, "train_min_lr": 0.001579245432657976, "train_loss": 0.6545264580979561, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04499619110869483, "epoch": 183, "n_parameters": 93325440}
{"train_lr": 0.0015693275977863898, "train_min_lr": 0.0015693275977863898, "train_loss": 0.6535338635007159, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04249066217109943, "epoch": 184, "n_parameters": 93325440}
{"train_lr": 0.00155938201795768, "train_min_lr": 0.00155938201795768, "train_loss": 0.6535800552855318, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.043255524387439855, "epoch": 185, "n_parameters": 93325440}
{"train_lr": 0.0015494094505662558, "train_min_lr": 0.0015494094505662558, "train_loss": 0.6533823190256953, "train_loss_scale": 2271914.6666666665, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 186, "n_parameters": 93325440}
{"train_lr": 0.001539410655061736, "train_min_lr": 0.001539410655061736, "train_loss": 0.6532779107921016, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0433023767068218, "epoch": 187, "n_parameters": 93325440}
{"train_lr": 0.0015293863928911096, "train_min_lr": 0.0015293863928911096, "train_loss": 0.6531557398251234, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04270600716774471, "epoch": 188, "n_parameters": 93325440}
{"train_lr": 0.001519337427440752, "train_min_lr": 0.001519337427440752, "train_loss": 0.6530751259042284, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04218650396125248, "epoch": 189, "n_parameters": 93325440}
{"train_lr": 0.00150926452397829, "train_min_lr": 0.00150926452397829, "train_loss": 0.6529797476310378, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04319860042932515, "epoch": 190, "n_parameters": 93325440}
{"train_lr": 0.0014991684495943168, "train_min_lr": 0.0014991684495943168, "train_loss": 0.6529432415054777, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04291143520281483, "epoch": 191, "n_parameters": 93325440}
{"train_lr": 0.0014890499731439859, "train_min_lr": 0.0014890499731439859, "train_loss": 0.652965060244195, "train_loss_scale": 2251749.7435897435, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04314781410786777, "epoch": 192, "n_parameters": 93325440}
{"train_lr": 0.001478909865188459, "train_min_lr": 0.001478909865188459, "train_loss": 0.6527594692575244, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04327949065452394, "epoch": 193, "n_parameters": 93325440}
{"train_lr": 0.0014687488979362113, "train_min_lr": 0.0014687488979362113, "train_loss": 0.6527444154071884, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04356447886675596, "epoch": 194, "n_parameters": 93325440}
{"train_lr": 0.001458567845184241, "train_min_lr": 0.001458567845184241, "train_loss": 0.6526207220621216, "train_loss_scale": 2318966.153846154, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 195, "n_parameters": 93325440}
{"train_lr": 0.001448367482259133, "train_min_lr": 0.001448367482259133, "train_loss": 0.6524855274563799, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04391956604563464, "epoch": 196, "n_parameters": 93325440}
{"train_lr": 0.001438148585958014, "train_min_lr": 0.001438148585958014, "train_loss": 0.6524408043911442, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0437394251796202, "epoch": 197, "n_parameters": 93325440}
{"train_lr": 0.0014279119344894028, "train_min_lr": 0.0014279119344894028, "train_loss": 0.6523803126496764, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04416952813521792, "epoch": 198, "n_parameters": 93325440}
{"train_lr": 0.0014176583074139429, "train_min_lr": 0.0014176583074139429, "train_loss": 0.6521773194559873, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04388629415263541, "epoch": 199, "n_parameters": 93325440}
{"train_lr": 0.0014073884855850315, "train_min_lr": 0.0014073884855850315, "train_loss": 0.6522814940231351, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04478665374410458, "epoch": 200, "n_parameters": 93325440}
{"train_lr": 0.0013971032510893652, "train_min_lr": 0.0013971032510893652, "train_loss": 0.6521291283604044, "train_loss_scale": 3112119.794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.044348080087309845, "epoch": 201, "n_parameters": 93325440}
{"train_lr": 0.0013868033871873699, "train_min_lr": 0.0013868033871873699, "train_loss": 0.6527649165155032, "train_loss_scale": 3454923.487179487, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 202, "n_parameters": 93325440}
{"train_lr": 0.0013764896782535606, "train_min_lr": 0.0013764896782535606, "train_loss": 0.6520171289642652, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.044557038258808926, "epoch": 203, "n_parameters": 93325440}
{"train_lr": 0.0013661629097168019, "train_min_lr": 0.0013661629097168019, "train_loss": 0.6519864696340684, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04660083410831598, "epoch": 204, "n_parameters": 93325440}
{"train_lr": 0.0013558238680005015, "train_min_lr": 0.0013558238680005015, "train_loss": 0.651847201733826, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04455802842186621, "epoch": 205, "n_parameters": 93325440}
{"train_lr": 0.0013454733404627138, "train_min_lr": 0.0013454733404627138, "train_loss": 0.6517544956400226, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04539844350149043, "epoch": 206, "n_parameters": 93325440}
{"train_lr": 0.0013351121153361868, "train_min_lr": 0.0013351121153361868, "train_loss": 0.6516532257008247, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.045580623366941624, "epoch": 207, "n_parameters": 93325440}
{"train_lr": 0.0013247409816683246, "train_min_lr": 0.0013247409816683246, "train_loss": 0.6515663202183369, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0457235167388064, "epoch": 208, "n_parameters": 93325440}
{"train_lr": 0.001314360729261115, "train_min_lr": 0.001314360729261115, "train_loss": 0.6514970767908753, "train_loss_scale": 4073314.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04559553323838955, "epoch": 209, "n_parameters": 93325440}
{"train_lr": 0.0013039721486109636, "train_min_lr": 0.0013039721486109636, "train_loss": 0.6521160529018977, "train_loss_scale": 1626637.1282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 210, "n_parameters": 93325440}
{"train_lr": 0.0012935760308485087, "train_min_lr": 0.0012935760308485087, "train_loss": 0.6513881812301966, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.045461809143232994, "epoch": 211, "n_parameters": 93325440}
{"train_lr": 0.0012831731676783689, "train_min_lr": 0.0012831731676783689, "train_loss": 0.6512881333772571, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04622284271037923, "epoch": 212, "n_parameters": 93325440}
{"train_lr": 0.001272764351318853, "train_min_lr": 0.001272764351318853, "train_loss": 0.6511788309241334, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04637779676331542, "epoch": 213, "n_parameters": 93325440}
{"train_lr": 0.0012623503744416213, "train_min_lr": 0.0012623503744416213, "train_loss": 0.6510865527372329, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04615227956300936, "epoch": 214, "n_parameters": 93325440}
{"train_lr": 0.0012519320301113358, "train_min_lr": 0.0012519320301113358, "train_loss": 0.6510655480699662, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.047730134501575656, "epoch": 215, "n_parameters": 93325440}
{"train_lr": 0.0012415101117252529, "train_min_lr": 0.0012415101117252529, "train_loss": 0.6508944522684965, "train_loss_scale": 1451874.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.046214736675700314, "epoch": 216, "n_parameters": 93325440}
{"train_lr": 0.0012310854129528052, "train_min_lr": 0.0012310854129528052, "train_loss": 0.6508428633977206, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04702541534191905, "epoch": 217, "n_parameters": 93325440}
{"train_lr": 0.0012206587276751709, "train_min_lr": 0.0012206587276751709, "train_loss": 0.6506849839232671, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04694693328406757, "epoch": 218, "n_parameters": 93325440}
{"train_lr": 0.0012102308499247975, "train_min_lr": 0.0012102308499247975, "train_loss": 0.6506286191586883, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04696661863141717, "epoch": 219, "n_parameters": 93325440}
{"train_lr": 0.0011998025738249494, "train_min_lr": 0.0011998025738249494, "train_loss": 0.6505410681502559, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04692891295641088, "epoch": 220, "n_parameters": 93325440}
{"train_lr": 0.0011893746935292267, "train_min_lr": 0.0011893746935292267, "train_loss": 0.6505277044354723, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04882969319796524, "epoch": 221, "n_parameters": 93325440}
{"train_lr": 0.0011789480031610881, "train_min_lr": 0.0011789480031610881, "train_loss": 0.6503539036672848, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04787985252168698, "epoch": 222, "n_parameters": 93325440}
{"train_lr": 0.001168523296753375, "train_min_lr": 0.001168523296753375, "train_loss": 0.6502815783262635, "train_loss_scale": 4140530.871794872, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04777871564818689, "epoch": 223, "n_parameters": 93325440}
{"train_lr": 0.0011581013681878376, "train_min_lr": 0.0011581013681878376, "train_loss": 0.6502043705624647, "train_loss_scale": 3179336.205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 224, "n_parameters": 93325440}
{"train_lr": 0.0011476830111346887, "train_min_lr": 0.0011476830111346887, "train_loss": 0.6501268409670163, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04823222842951042, "epoch": 225, "n_parameters": 93325440}
{"train_lr": 0.001137269018992153, "train_min_lr": 0.001137269018992153, "train_loss": 0.6499739350894322, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0483317183306775, "epoch": 226, "n_parameters": 93325440}
{"train_lr": 0.0011268601848260537, "train_min_lr": 0.0011268601848260537, "train_loss": 0.6509864589390464, "train_loss_scale": 1451874.4615384615, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 227, "n_parameters": 93325440}
{"train_lr": 0.0011164573013094073, "train_min_lr": 0.0011164573013094073, "train_loss": 0.6499788061930583, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04817305395427423, "epoch": 228, "n_parameters": 93325440}
{"train_lr": 0.001106061160662077, "train_min_lr": 0.001106061160662077, "train_loss": 0.6497716619274937, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04839964024722576, "epoch": 229, "n_parameters": 93325440}
{"train_lr": 0.0010956725545904166, "train_min_lr": 0.0010956725545904166, "train_loss": 0.6496301181136798, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04950273199341236, "epoch": 230, "n_parameters": 93325440}
{"train_lr": 0.0010852922742270053, "train_min_lr": 0.0010852922742270053, "train_loss": 0.6495860489801719, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.049837018661678605, "epoch": 231, "n_parameters": 93325440}
{"train_lr": 0.0010749211100703794, "train_min_lr": 0.0010749211100703794, "train_loss": 0.6494765863395654, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.049607541806136184, "epoch": 232, "n_parameters": 93325440}
{"train_lr": 0.0010645598519248383, "train_min_lr": 0.0010645598519248383, "train_loss": 0.6494915124076681, "train_loss_scale": 1263668.5128205128, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04930285058724575, "epoch": 233, "n_parameters": 93325440}
{"train_lr": 0.0010542092888403117, "train_min_lr": 0.0010542092888403117, "train_loss": 0.6492499822798448, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05036587239458011, "epoch": 234, "n_parameters": 93325440}
{"train_lr": 0.0010438702090522496, "train_min_lr": 0.0010438702090522496, "train_loss": 0.6491830785018511, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.04942306615889837, "epoch": 235, "n_parameters": 93325440}
{"train_lr": 0.001033543399921608, "train_min_lr": 0.001033543399921608, "train_loss": 0.6491319514476718, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05040391029503483, "epoch": 236, "n_parameters": 93325440}
{"train_lr": 0.001023229647874884, "train_min_lr": 0.001023229647874884, "train_loss": 0.6491064585697575, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05058605785672672, "epoch": 237, "n_parameters": 93325440}
{"train_lr": 0.0010129297383442272, "train_min_lr": 0.0010129297383442272, "train_loss": 0.6488493490868654, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05047347367359086, "epoch": 238, "n_parameters": 93325440}
{"train_lr": 0.0010026444557076238, "train_min_lr": 0.0010026444557076238, "train_loss": 0.6489043886988208, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05205608325270124, "epoch": 239, "n_parameters": 93325440}
{"train_lr": 0.000992374583229171, "train_min_lr": 0.000992374583229171, "train_loss": 0.6487640984451923, "train_loss_scale": 3764118.9743589745, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0509271570123159, "epoch": 240, "n_parameters": 93325440}
{"train_lr": 0.0009821209029994167, "train_min_lr": 0.0009821209029994167, "train_loss": 0.6486013716516587, "train_loss_scale": 2520615.3846153845, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 241, "n_parameters": 93325440}
{"train_lr": 0.0009718841958758109, "train_min_lr": 0.0009718841958758109, "train_loss": 0.6486407348360771, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05130887303787928, "epoch": 242, "n_parameters": 93325440}
{"train_lr": 0.0009616652414232358, "train_min_lr": 0.0009616652414232358, "train_loss": 0.6486426391758215, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05432937994527702, "epoch": 243, "n_parameters": 93325440}
{"train_lr": 0.0009514648178546331, "train_min_lr": 0.0009514648178546331, "train_loss": 0.648351379407522, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.051419835310811415, "epoch": 244, "n_parameters": 93325440}
{"train_lr": 0.0009412837019717529, "train_min_lr": 0.0009412837019717529, "train_loss": 0.6482436602982955, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05215922709649954, "epoch": 245, "n_parameters": 93325440}
{"train_lr": 0.0009311226691059865, "train_min_lr": 0.0009311226691059865, "train_loss": 0.6480796076834966, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05242994439621002, "epoch": 246, "n_parameters": 93325440}
{"train_lr": 0.0009209824930593261, "train_min_lr": 0.0009209824930593261, "train_loss": 0.6481469419952004, "train_loss_scale": 2910470.564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.053209778387099504, "epoch": 247, "n_parameters": 93325440}
{"train_lr": 0.0009108639460454382, "train_min_lr": 0.0009108639460454382, "train_loss": 0.6480937603956614, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05251093531170717, "epoch": 248, "n_parameters": 93325440}
{"train_lr": 0.0009007677986308538, "train_min_lr": 0.0009007677986308538, "train_loss": 0.6479733674667585, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05371020100294398, "epoch": 249, "n_parameters": 93325440}
{"train_lr": 0.0008906948196762859, "train_min_lr": 0.0008906948196762859, "train_loss": 0.6478110108858882, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05340170722383146, "epoch": 250, "n_parameters": 93325440}
{"train_lr": 0.000880645776278082, "train_min_lr": 0.000880645776278082, "train_loss": 0.6477152522510061, "train_loss_scale": 4012819.6923076925, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 251, "n_parameters": 93325440}
{"train_lr": 0.000870621433709802, "train_min_lr": 0.000870621433709802, "train_loss": 0.647687696135388, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.053430883751179166, "epoch": 252, "n_parameters": 93325440}
{"train_lr": 0.0008606225553639452, "train_min_lr": 0.0008606225553639452, "train_loss": 0.6475503460193673, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05456276907800482, "epoch": 253, "n_parameters": 93325440}
{"train_lr": 0.0008506499026938082, "train_min_lr": 0.0008506499026938082, "train_loss": 0.647404620041832, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.053933491894545466, "epoch": 254, "n_parameters": 93325440}
{"train_lr": 0.0008407042351555041, "train_min_lr": 0.0008407042351555041, "train_loss": 0.6473850671631786, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.054598415115227304, "epoch": 255, "n_parameters": 93325440}
{"train_lr": 0.0008307863101501201, "train_min_lr": 0.0008307863101501201, "train_loss": 0.6478753392942823, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05803065253899266, "epoch": 256, "n_parameters": 93325440}
{"train_lr": 0.0008208968829660467, "train_min_lr": 0.0008208968829660467, "train_loss": 0.6472325039645418, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05549604690466554, "epoch": 257, "n_parameters": 93325440}
{"train_lr": 0.0008110367067214505, "train_min_lr": 0.0008110367067214505, "train_loss": 0.6470743234579762, "train_loss_scale": 3515418.2564102565, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05561789496539113, "epoch": 258, "n_parameters": 93325440}
{"train_lr": 0.0008012065323069282, "train_min_lr": 0.0008012065323069282, "train_loss": 0.6470537870549239, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05580556814152843, "epoch": 259, "n_parameters": 93325440}
{"train_lr": 0.0007914071083283216, "train_min_lr": 0.0007914071083283216, "train_loss": 0.6468538586050272, "train_loss_scale": 3071789.9487179485, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 260, "n_parameters": 93325440}
{"train_lr": 0.0007816391810497043, "train_min_lr": 0.0007816391810497043, "train_loss": 0.646771350899377, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0561039160268429, "epoch": 261, "n_parameters": 93325440}
{"train_lr": 0.0007719034943365599, "train_min_lr": 0.0007719034943365599, "train_loss": 0.6466650695611651, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05663711445119519, "epoch": 262, "n_parameters": 93325440}
{"train_lr": 0.0007622007895991216, "train_min_lr": 0.0007622007895991216, "train_loss": 0.646612029761458, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05639886655486547, "epoch": 263, "n_parameters": 93325440}
{"train_lr": 0.0007525318057359233, "train_min_lr": 0.0007525318057359233, "train_loss": 0.6464776289529908, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05716049529086703, "epoch": 264, "n_parameters": 93325440}
{"train_lr": 0.0007428972790775184, "train_min_lr": 0.0007428972790775184, "train_loss": 0.6463966291063489, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05726632617939359, "epoch": 265, "n_parameters": 93325440}
{"train_lr": 0.0007332979433304174, "train_min_lr": 0.0007332979433304174, "train_loss": 0.6463383005406612, "train_loss_scale": 2359296.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05750849833473181, "epoch": 266, "n_parameters": 93325440}
{"train_lr": 0.0007237345295211991, "train_min_lr": 0.0007237345295211991, "train_loss": 0.6462797559797764, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0577031647762618, "epoch": 267, "n_parameters": 93325440}
{"train_lr": 0.0007142077659408527, "train_min_lr": 0.0007142077659408527, "train_loss": 0.6460776025524888, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05781100867077326, "epoch": 268, "n_parameters": 93325440}
{"train_lr": 0.0007047183780893101, "train_min_lr": 0.0007047183780893101, "train_loss": 0.6460817293622173, "train_loss_scale": 3522139.8974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 269, "n_parameters": 93325440}
{"train_lr": 0.0006952670886201941, "train_min_lr": 0.0006952670886201941, "train_loss": 0.6458973227164302, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05810472993657757, "epoch": 270, "n_parameters": 93325440}
{"train_lr": 0.0006858546172857918, "train_min_lr": 0.0006858546172857918, "train_loss": 0.6459075553008379, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05934882303699851, "epoch": 271, "n_parameters": 93325440}
{"train_lr": 0.0006764816808822353, "train_min_lr": 0.0006764816808822353, "train_loss": 0.645777548233477, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.058964002722253404, "epoch": 272, "n_parameters": 93325440}
{"train_lr": 0.0006671489931949224, "train_min_lr": 0.0006671489931949224, "train_loss": 0.6455980250850702, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0593459837926695, "epoch": 273, "n_parameters": 93325440}
{"train_lr": 0.000657857264944153, "train_min_lr": 0.000657857264944153, "train_loss": 0.6455329716062317, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.05996235124528026, "epoch": 274, "n_parameters": 93325440}
{"train_lr": 0.0006486072037310055, "train_min_lr": 0.0006486072037310055, "train_loss": 0.6459870672760866, "train_loss_scale": 1539255.7948717948, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 275, "n_parameters": 93325440}
{"train_lr": 0.0006393995139834575, "train_min_lr": 0.0006393995139834575, "train_loss": 0.6454331303875034, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06105376998535716, "epoch": 276, "n_parameters": 93325440}
{"train_lr": 0.0006302348969027304, "train_min_lr": 0.0006302348969027304, "train_loss": 0.6450987507660801, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06070478687970302, "epoch": 277, "n_parameters": 93325440}
{"train_lr": 0.0006211140504098989, "train_min_lr": 0.0006211140504098989, "train_loss": 0.6451377625314471, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06072737142825738, "epoch": 278, "n_parameters": 93325440}
{"train_lr": 0.0006120376690927338, "train_min_lr": 0.0006120376690927338, "train_loss": 0.6450422000951874, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.061695305057443105, "epoch": 279, "n_parameters": 93325440}
{"train_lr": 0.0006030064441528148, "train_min_lr": 0.0006030064441528148, "train_loss": 0.6448882166296244, "train_loss_scale": 1048576.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.061000630307273984, "epoch": 280, "n_parameters": 93325440}
{"train_lr": 0.0005940210633528858, "train_min_lr": 0.0005940210633528858, "train_loss": 0.6448262258408926, "train_loss_scale": 1176287.1794871795, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06153951602008862, "epoch": 281, "n_parameters": 93325440}
{"train_lr": 0.0005850822109644842, "train_min_lr": 0.0005850822109644842, "train_loss": 0.6445927659574991, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.062148432581661604, "epoch": 282, "n_parameters": 93325440}
{"train_lr": 0.0005761905677158267, "train_min_lr": 0.0005761905677158267, "train_loss": 0.6445580886629148, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06258190625036757, "epoch": 283, "n_parameters": 93325440}
{"train_lr": 0.0005673468107399736, "train_min_lr": 0.0005673468107399736, "train_loss": 0.6444872012361884, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06241093659534668, "epoch": 284, "n_parameters": 93325440}
{"train_lr": 0.0005585516135232553, "train_min_lr": 0.0005585516135232553, "train_loss": 0.6443635298607823, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06303226940620404, "epoch": 285, "n_parameters": 93325440}
{"train_lr": 0.0005498056458539954, "train_min_lr": 0.0005498056458539954, "train_loss": 0.6442393993911071, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06352718579224668, "epoch": 286, "n_parameters": 93325440}
{"train_lr": 0.0005411095737714909, "train_min_lr": 0.0005411095737714909, "train_loss": 0.6442402946309019, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06359702980336852, "epoch": 287, "n_parameters": 93325440}
{"train_lr": 0.0005324640595153003, "train_min_lr": 0.0005324640595153003, "train_loss": 0.6440057540073608, "train_loss_scale": 3589356.3076923075, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06370922606677198, "epoch": 288, "n_parameters": 93325440}
{"train_lr": 0.0005238697614748063, "train_min_lr": 0.0005238697614748063, "train_loss": 0.643923002987718, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06471771006591809, "epoch": 289, "n_parameters": 93325440}
{"train_lr": 0.0005153273341390795, "train_min_lr": 0.0005153273341390795, "train_loss": 0.6438843237761503, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06490508722475706, "epoch": 290, "n_parameters": 93325440}
{"train_lr": 0.0005068374280470331, "train_min_lr": 0.0005068374280470331, "train_loss": 0.6437251456320668, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06503680388992414, "epoch": 291, "n_parameters": 93325440}
{"train_lr": 0.0004984006897378886, "train_min_lr": 0.0004984006897378886, "train_loss": 0.6435524815311416, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06528743010205336, "epoch": 292, "n_parameters": 93325440}
{"train_lr": 0.0004900177617019307, "train_min_lr": 0.0004900177617019307, "train_loss": 0.6435383766507491, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06551545427347986, "epoch": 293, "n_parameters": 93325440}
{"train_lr": 0.00048168928233158545, "train_min_lr": 0.00048168928233158545, "train_loss": 0.6435012809024789, "train_loss_scale": 4207747.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 294, "n_parameters": 93325440}
{"train_lr": 0.00047341588587280147, "train_min_lr": 0.00047341588587280147, "train_loss": 0.6432838347764351, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06607394509065227, "epoch": 295, "n_parameters": 93325440}
{"train_lr": 0.00046519820237675105, "train_min_lr": 0.00046519820237675105, "train_loss": 0.6431936899152322, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06693988484449875, "epoch": 296, "n_parameters": 93325440}
{"train_lr": 0.0004570368576518498, "train_min_lr": 0.0004570368576518498, "train_loss": 0.6430653700222954, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06650815351316944, "epoch": 297, "n_parameters": 93325440}
{"train_lr": 0.00044893247321609476, "train_min_lr": 0.00044893247321609476, "train_loss": 0.6430479057180958, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06812319378010355, "epoch": 298, "n_parameters": 93325440}
{"train_lr": 0.0004408856662497389, "train_min_lr": 0.0004408856662497389, "train_loss": 0.6429014222648664, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06764771367829198, "epoch": 299, "n_parameters": 93325440}
{"train_lr": 0.00043289704954828676, "train_min_lr": 0.00043289704954828676, "train_loss": 0.6429314889157047, "train_loss_scale": 3522139.8974358975, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 300, "n_parameters": 93325440}
{"train_lr": 0.0004249672314758303, "train_min_lr": 0.0004249672314758303, "train_loss": 0.642660735700375, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06784943320478003, "epoch": 301, "n_parameters": 93325440}
{"train_lr": 0.0004170968159187159, "train_min_lr": 0.0004170968159187159, "train_loss": 0.6425696528779391, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06832408513396214, "epoch": 302, "n_parameters": 93325440}
{"train_lr": 0.0004092864022395612, "train_min_lr": 0.0004092864022395612, "train_loss": 0.6424364460488925, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06891751661896706, "epoch": 303, "n_parameters": 93325440}
{"train_lr": 0.00040153658523160577, "train_min_lr": 0.00040153658523160577, "train_loss": 0.6424073156876824, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06886650525176755, "epoch": 304, "n_parameters": 93325440}
{"train_lr": 0.0003938479550734206, "train_min_lr": 0.0003938479550734206, "train_loss": 0.6422329479828477, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06983281817669287, "epoch": 305, "n_parameters": 93325440}
{"train_lr": 0.0003862210972839593, "train_min_lr": 0.0003862210972839593, "train_loss": 0.6421345776806657, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.069272606060482, "epoch": 306, "n_parameters": 93325440}
{"train_lr": 0.00037865659267797083, "train_min_lr": 0.00037865659267797083, "train_loss": 0.6420191806764939, "train_loss_scale": 4006098.0512820515, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07026870318282491, "epoch": 307, "n_parameters": 93325440}
{"train_lr": 0.0003711550173217691, "train_min_lr": 0.0003711550173217691, "train_loss": 0.6419261168831816, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07061367023449677, "epoch": 308, "n_parameters": 93325440}
{"train_lr": 0.00036371694248936003, "train_min_lr": 0.00036371694248936003, "train_loss": 0.6419294705518928, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07054317421399248, "epoch": 309, "n_parameters": 93325440}
{"train_lr": 0.00035634293461894045, "train_min_lr": 0.00035634293461894045, "train_loss": 0.6417247991149242, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07073189578472804, "epoch": 310, "n_parameters": 93325440}
{"train_lr": 0.00034903355526975867, "train_min_lr": 0.00034903355526975867, "train_loss": 0.6417726809158921, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07111873459787323, "epoch": 311, "n_parameters": 93325440}
{"train_lr": 0.00034178936107935213, "train_min_lr": 0.00034178936107935213, "train_loss": 0.6415677572576663, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07161029661074281, "epoch": 312, "n_parameters": 93325440}
{"train_lr": 0.00033461090372115536, "train_min_lr": 0.00033461090372115536, "train_loss": 0.641492684252369, "train_loss_scale": 4234633.846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 313, "n_parameters": 93325440}
{"train_lr": 0.0003274987298624889, "train_min_lr": 0.0003274987298624889, "train_loss": 0.6412481918739967, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07331909517494914, "epoch": 314, "n_parameters": 93325440}
{"train_lr": 0.0003204533811229274, "train_min_lr": 0.0003204533811229274, "train_loss": 0.6412237585307314, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07187579928013758, "epoch": 315, "n_parameters": 93325440}
{"train_lr": 0.0003134753940330548, "train_min_lr": 0.0003134753940330548, "train_loss": 0.6410378232025183, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07243024513650781, "epoch": 316, "n_parameters": 93325440}
{"train_lr": 0.00030656529999360446, "train_min_lr": 0.00030656529999360446, "train_loss": 0.6408698111533736, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07311062428813714, "epoch": 317, "n_parameters": 93325440}
{"train_lr": 0.00029972362523499117, "train_min_lr": 0.00029972362523499117, "train_loss": 0.6409426364713372, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07305406825616956, "epoch": 318, "n_parameters": 93325440}
{"train_lr": 0.00029295089077723615, "train_min_lr": 0.00029295089077723615, "train_loss": 0.6407700431031677, "train_loss_scale": 4207747.282051282, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 319, "n_parameters": 93325440}
{"train_lr": 0.0002862476123902899, "train_min_lr": 0.0002862476123902899, "train_loss": 0.6407054029643918, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0735805794620552, "epoch": 320, "n_parameters": 93325440}
{"train_lr": 0.00027961430055475504, "train_min_lr": 0.00027961430055475504, "train_loss": 0.6406478409488232, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07441432332285704, "epoch": 321, "n_parameters": 93325440}
{"train_lr": 0.00027305146042300914, "train_min_lr": 0.00027305146042300914, "train_loss": 0.640592068959123, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07498312365406981, "epoch": 322, "n_parameters": 93325440}
{"train_lr": 0.00026655959178073735, "train_min_lr": 0.00026655959178073735, "train_loss": 0.6404183830062931, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07469649117392226, "epoch": 323, "n_parameters": 93325440}
{"train_lr": 0.00026013918900887165, "train_min_lr": 0.00026013918900887165, "train_loss": 0.6402167561344612, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07498014215618753, "epoch": 324, "n_parameters": 93325440}
{"train_lr": 0.00025379074104594005, "train_min_lr": 0.00025379074104594005, "train_loss": 0.6401908083412892, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0752872514944428, "epoch": 325, "n_parameters": 93325440}
{"train_lr": 0.00024751473135083417, "train_min_lr": 0.00024751473135083417, "train_loss": 0.6400560573555338, "train_loss_scale": 4274963.692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 326, "n_parameters": 93325440}
{"train_lr": 0.00024131163786599068, "train_min_lr": 0.00024131163786599068, "train_loss": 0.639981580277284, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07499027719052556, "epoch": 327, "n_parameters": 93325440}
{"train_lr": 0.00023518193298099495, "train_min_lr": 0.00023518193298099495, "train_loss": 0.6398042239821874, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07650711690672697, "epoch": 328, "n_parameters": 93325440}
{"train_lr": 0.00022912608349660648, "train_min_lr": 0.00022912608349660648, "train_loss": 0.6398056661471342, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07644278692224851, "epoch": 329, "n_parameters": 93325440}
{"train_lr": 0.0002231445505892088, "train_min_lr": 0.0002231445505892088, "train_loss": 0.6396185159205626, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07728877522719976, "epoch": 330, "n_parameters": 93325440}
{"train_lr": 0.00021723778977569177, "train_min_lr": 0.00021723778977569177, "train_loss": 0.6396183661925487, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07660753896030095, "epoch": 331, "n_parameters": 93325440}
{"train_lr": 0.00021140625087876029, "train_min_lr": 0.00021140625087876029, "train_loss": 0.6395186086973319, "train_loss_scale": 4328736.820512821, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 332, "n_parameters": 93325440}
{"train_lr": 0.0002056503779926791, "train_min_lr": 0.0002056503779926791, "train_loss": 0.6393728998418038, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07701161621998136, "epoch": 333, "n_parameters": 93325440}
{"train_lr": 0.00019997060944945298, "train_min_lr": 0.00019997060944945298, "train_loss": 0.6392656035291461, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07688560603090967, "epoch": 334, "n_parameters": 93325440}
{"train_lr": 0.00019436737778544695, "train_min_lr": 0.00019436737778544695, "train_loss": 0.6391785949802934, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07801455034850499, "epoch": 335, "n_parameters": 93325440}
{"train_lr": 0.00018884110970844584, "train_min_lr": 0.00018884110970844584, "train_loss": 0.6390720842979275, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07811531333778149, "epoch": 336, "n_parameters": 93325440}
{"train_lr": 0.00018339222606515945, "train_min_lr": 0.00018339222606515945, "train_loss": 0.6390230263559482, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07809111967873879, "epoch": 337, "n_parameters": 93325440}
{"train_lr": 0.00017802114180917348, "train_min_lr": 0.00017802114180917348, "train_loss": 0.6388136494952517, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07835216268610495, "epoch": 338, "n_parameters": 93325440}
{"train_lr": 0.00017272826596934892, "train_min_lr": 0.00017272826596934892, "train_loss": 0.6388165717705702, "train_loss_scale": 4234633.846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 339, "n_parameters": 93325440}
{"train_lr": 0.00016751400161867366, "train_min_lr": 0.00016751400161867366, "train_loss": 0.638698265553476, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07814501840860033, "epoch": 340, "n_parameters": 93325440}
{"train_lr": 0.00016237874584356537, "train_min_lr": 0.00016237874584356537, "train_loss": 0.6386319290225705, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07872966158753023, "epoch": 341, "n_parameters": 93325440}
{"train_lr": 0.00015732288971363333, "train_min_lr": 0.00015732288971363333, "train_loss": 0.6385865815652486, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07859047117810218, "epoch": 342, "n_parameters": 93325440}
{"train_lr": 0.00015234681825189645, "train_min_lr": 0.00015234681825189645, "train_loss": 0.63845045119524, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.079885441212891, "epoch": 343, "n_parameters": 93325440}
{"train_lr": 0.0001474509104054623, "train_min_lr": 0.0001474509104054623, "train_loss": 0.6383905794519262, "train_loss_scale": 4113644.3076923075, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 344, "n_parameters": 93325440}
{"train_lr": 0.00014263553901666846, "train_min_lr": 0.00014263553901666846, "train_loss": 0.6382861136673734, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07959201844791189, "epoch": 345, "n_parameters": 93325440}
{"train_lr": 0.00013790107079468978, "train_min_lr": 0.00013790107079468978, "train_loss": 0.6382117107367287, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07981004401181753, "epoch": 346, "n_parameters": 93325440}
{"train_lr": 0.00013324786628761168, "train_min_lr": 0.00013324786628761168, "train_loss": 0.6381998471915722, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07895925439273317, "epoch": 347, "n_parameters": 93325440}
{"train_lr": 0.00012867627985497265, "train_min_lr": 0.00012867627985497265, "train_loss": 0.6379489563644315, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07938735000789165, "epoch": 348, "n_parameters": 93325440}
{"train_lr": 0.00012418665964077964, "train_min_lr": 0.00012418665964077964, "train_loss": 0.637967341245176, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07966416738688564, "epoch": 349, "n_parameters": 93325440}
{"train_lr": 0.00011977934754699389, "train_min_lr": 0.00011977934754699389, "train_loss": 0.6379127469009314, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07942784486863858, "epoch": 350, "n_parameters": 93325440}
{"train_lr": 0.00011545467920749486, "train_min_lr": 0.00011545467920749486, "train_loss": 0.6377013552790651, "train_loss_scale": 3414593.641025641, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07806967773164313, "epoch": 351, "n_parameters": 93325440}
{"train_lr": 0.00011121298396252068, "train_min_lr": 0.00011121298396252068, "train_loss": 0.6377180949665415, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07993513080649652, "epoch": 352, "n_parameters": 93325440}
{"train_lr": 0.00010705458483358618, "train_min_lr": 0.00010705458483358618, "train_loss": 0.637548968124275, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07938833866650477, "epoch": 353, "n_parameters": 93325440}
{"train_lr": 0.00010297979849888524, "train_min_lr": 0.00010297979849888524, "train_loss": 0.6374551640489162, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.08055205480792584, "epoch": 354, "n_parameters": 93325440}
{"train_lr": 9.89889352691732e-05, "train_min_lr": 9.89889352691732e-05, "train_loss": 0.6374118199380927, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07887526011715333, "epoch": 355, "n_parameters": 93325440}
{"train_lr": 9.508229906413639e-05, "train_min_lr": 9.508229906413639e-05, "train_loss": 0.6373603121879009, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07952352477094302, "epoch": 356, "n_parameters": 93325440}
{"train_lr": 9.126018738924708e-05, "train_min_lr": 9.126018738924708e-05, "train_loss": 0.6372977341405857, "train_loss_scale": 4301850.256410256, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 357, "n_parameters": 93325440}
{"train_lr": 8.752289131310686e-05, "train_min_lr": 8.752289131310686e-05, "train_loss": 0.6372137449633999, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07873909236290134, "epoch": 358, "n_parameters": 93325440}
{"train_lr": 8.387069544528183e-05, "train_min_lr": 8.387069544528183e-05, "train_loss": 0.6371395650486915, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.08040461130440235, "epoch": 359, "n_parameters": 93325440}
{"train_lr": 8.030387791462727e-05, "train_min_lr": 8.030387791462727e-05, "train_loss": 0.6370332259923602, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07940743340609166, "epoch": 360, "n_parameters": 93325440}
{"train_lr": 7.682271034810752e-05, "train_min_lr": 7.682271034810752e-05, "train_loss": 0.6370450413475434, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07772449648771913, "epoch": 361, "n_parameters": 93325440}
{"train_lr": 7.342745785011076e-05, "train_min_lr": 7.342745785011076e-05, "train_loss": 0.6370495901657984, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07786894112061231, "epoch": 362, "n_parameters": 93325440}
{"train_lr": 7.011837898225992e-05, "train_min_lr": 7.011837898225992e-05, "train_loss": 0.6369192731590607, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07879469144898348, "epoch": 363, "n_parameters": 93325440}
{"train_lr": 6.689572574372245e-05, "train_min_lr": 6.689572574372245e-05, "train_loss": 0.6368134421511338, "train_loss_scale": 4221190.564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 364, "n_parameters": 93325440}
{"train_lr": 6.375974355201949e-05, "train_min_lr": 6.375974355201949e-05, "train_loss": 0.6368455880632004, "train_loss_scale": 2910470.564102564, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 365, "n_parameters": 93325440}
{"train_lr": 6.0710671224336305e-05, "train_min_lr": 6.0710671224336305e-05, "train_loss": 0.6366940776411539, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07782639305178936, "epoch": 366, "n_parameters": 93325440}
{"train_lr": 5.774874095933571e-05, "train_min_lr": 5.774874095933571e-05, "train_loss": 0.6366343214295995, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07853440878291924, "epoch": 367, "n_parameters": 93325440}
{"train_lr": 5.487417831947492e-05, "train_min_lr": 5.487417831947492e-05, "train_loss": 0.6365780937843598, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0772015988970032, "epoch": 368, "n_parameters": 93325440}
{"train_lr": 5.208720221382823e-05, "train_min_lr": 5.208720221382823e-05, "train_loss": 0.6364671008136028, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07735530239267227, "epoch": 369, "n_parameters": 93325440}
{"train_lr": 4.938802488141633e-05, "train_min_lr": 4.938802488141633e-05, "train_loss": 0.6364298100368335, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07671566227546488, "epoch": 370, "n_parameters": 93325440}
{"train_lr": 4.677685187504342e-05, "train_min_lr": 4.677685187504342e-05, "train_loss": 0.6363930054104481, "train_loss_scale": 2520615.3846153845, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07729287096896233, "epoch": 371, "n_parameters": 93325440}
{"train_lr": 4.4253882045643506e-05, "train_min_lr": 4.4253882045643506e-05, "train_loss": 0.6362656567675563, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07769798796671705, "epoch": 372, "n_parameters": 93325440}
{"train_lr": 4.18193075271371e-05, "train_min_lr": 4.18193075271371e-05, "train_loss": 0.6362442963589461, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07702068207212366, "epoch": 373, "n_parameters": 93325440}
{"train_lr": 3.947331372179967e-05, "train_min_lr": 3.947331372179967e-05, "train_loss": 0.6362754225444335, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07484106043687998, "epoch": 374, "n_parameters": 93325440}
{"train_lr": 3.7216079286142414e-05, "train_min_lr": 3.7216079286142414e-05, "train_loss": 0.6361566183324425, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0760205382337937, "epoch": 375, "n_parameters": 93325440}
{"train_lr": 3.5047776117306726e-05, "train_min_lr": 3.5047776117306726e-05, "train_loss": 0.6362235399249655, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07602877177011508, "epoch": 376, "n_parameters": 93325440}
{"train_lr": 3.296856933997393e-05, "train_min_lr": 3.296856933997393e-05, "train_loss": 0.635972078746328, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07373363789744102, "epoch": 377, "n_parameters": 93325440}
{"train_lr": 3.097861729379017e-05, "train_min_lr": 3.097861729379017e-05, "train_loss": 0.6361510580023512, "train_loss_scale": 4234633.846153846, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 378, "n_parameters": 93325440}
{"train_lr": 2.9078071521308036e-05, "train_min_lr": 2.9078071521308036e-05, "train_loss": 0.63601395005408, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07353063681139013, "epoch": 379, "n_parameters": 93325440}
{"train_lr": 2.726707675644639e-05, "train_min_lr": 2.726707675644639e-05, "train_loss": 0.6359621320063105, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07359813988352051, "epoch": 380, "n_parameters": 93325440}
{"train_lr": 2.5545770913468177e-05, "train_min_lr": 2.5545770913468177e-05, "train_loss": 0.6358990157023072, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07391278346618399, "epoch": 381, "n_parameters": 93325440}
{"train_lr": 2.3914285076477597e-05, "train_min_lr": 2.3914285076477597e-05, "train_loss": 0.6358039946987842, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07293483195826411, "epoch": 382, "n_parameters": 93325440}
{"train_lr": 2.2372743489437732e-05, "train_min_lr": 2.2372743489437732e-05, "train_loss": 0.635828505962705, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0730095375329256, "epoch": 383, "n_parameters": 93325440}
{"train_lr": 2.0921263546708787e-05, "train_min_lr": 2.0921263546708787e-05, "train_loss": 0.6358541949914817, "train_loss_scale": 4274963.692307692, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 384, "n_parameters": 93325440}
{"train_lr": 1.9559955784107943e-05, "train_min_lr": 1.9559955784107943e-05, "train_loss": 0.6357775855427369, "train_loss_scale": 2392904.205128205, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 385, "n_parameters": 93325440}
{"train_lr": 1.8288923870491908e-05, "train_min_lr": 1.8288923870491908e-05, "train_loss": 0.6357611800090243, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0697251740986338, "epoch": 386, "n_parameters": 93325440}
{"train_lr": 1.7108264599861837e-05, "train_min_lr": 1.7108264599861837e-05, "train_loss": 0.6357341195241764, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.07034617092890236, "epoch": 387, "n_parameters": 93325440}
{"train_lr": 1.6018067883992388e-05, "train_min_lr": 1.6018067883992388e-05, "train_loss": 0.6357648971323402, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06915067615083013, "epoch": 388, "n_parameters": 93325440}
{"train_lr": 1.5018416745584281e-05, "train_min_lr": 1.5018416745584281e-05, "train_loss": 0.6355957043810915, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06922066757359947, "epoch": 389, "n_parameters": 93325440}
{"train_lr": 1.4109387311942033e-05, "train_min_lr": 1.4109387311942033e-05, "train_loss": 0.6356340470270087, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06942802757168046, "epoch": 390, "n_parameters": 93325440}
{"train_lr": 1.3291048809176455e-05, "train_min_lr": 1.3291048809176455e-05, "train_loss": 0.6357145799944798, "train_loss_scale": 3038181.7435897435, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0692412858733382, "epoch": 391, "n_parameters": 93325440}
{"train_lr": 1.2563463556932867e-05, "train_min_lr": 1.2563463556932867e-05, "train_loss": 0.6356194600558434, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06838465781691365, "epoch": 392, "n_parameters": 93325440}
{"train_lr": 1.1926686963645178e-05, "train_min_lr": 1.1926686963645178e-05, "train_loss": 0.6356445192717589, "train_loss_scale": 4194304.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0678466994387026, "epoch": 393, "n_parameters": 93325440}
{"train_lr": 1.1380767522316361e-05, "train_min_lr": 1.1380767522316361e-05, "train_loss": 0.6355957909702108, "train_loss_scale": 3690180.923076923, "train_weight_decay": 0.050000000000000266, "train_grad_norm": Infinity, "epoch": 394, "n_parameters": 93325440}
{"train_lr": 1.0925746806825483e-05, "train_min_lr": 1.0925746806825483e-05, "train_loss": 0.63564810082794, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06712529549184136, "epoch": 395, "n_parameters": 93325440}
{"train_lr": 1.0561659468761706e-05, "train_min_lr": 1.0561659468761706e-05, "train_loss": 0.6356388856298648, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06706462644088344, "epoch": 396, "n_parameters": 93325440}
{"train_lr": 1.0288533234785454e-05, "train_min_lr": 1.0288533234785454e-05, "train_loss": 0.6356670448126701, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06805484952070774, "epoch": 397, "n_parameters": 93325440}
{"train_lr": 1.0106388904516887e-05, "train_min_lr": 1.0106388904516887e-05, "train_loss": 0.6356371932734663, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.0677952241534606, "epoch": 398, "n_parameters": 93325440}
{"train_lr": 1.0015240348951963e-05, "train_min_lr": 1.0015240348951963e-05, "train_loss": 0.6355630416327562, "train_loss_scale": 2097152.0, "train_weight_decay": 0.050000000000000266, "train_grad_norm": 0.06769333966076374, "epoch": 399, "n_parameters": 93325440}