dq158 commited on
Commit
e9ec199
·
1 Parent(s): ea30298

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "google/flan-t5-large",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
 
1
  {
2
+ "_name_or_path": "dq158/finalMeat",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:025890d5356002da0350524acea1e028a4c5f1d1f7cc3bee2cc13262f0621eeb
3
  size 3132668808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8f858a5136dc0160c7a812fe5b8bc3292741ce22b781f9b25d06a5f83f9e844
3
  size 3132668808
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7f356132a63ab1bc73486a7f9c3ebd47f727e216f286211b35fc1d1a53f3277
3
  size 6265677800
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec4b854adc7be93ea54cb3ba20a1f58e1d0c24fae05ad270bc9b5bc179cdcb59
3
  size 6265677800
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1ebb3e8ecd863784104abdee43036b081c962dae091b76d5c1db6131e386d91
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b93bcac787dd1316177bf9f0cedd7b77d1cea8c71476888b60d1fec88f24fd8a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2517fb96a777d41d764ad3dee01acb528730d0eb485080f169d6588b17f93ab7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:917b485f18156b96c93bc0dd12087e5d8b63a3098bce56ebac8eace0334f00f3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,827 +1,827 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0,
5
  "eval_steps": 500,
6
- "global_step": 204657,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
- "learning_rate": 0.0001,
14
- "loss": 4.0519,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.01,
19
- "learning_rate": 9.999946825617329e-05,
20
- "loss": 3.6979,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.02,
25
- "learning_rate": 9.99978730360032e-05,
26
- "loss": 3.5393,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.03,
31
- "learning_rate": 9.999521437341967e-05,
32
- "loss": 3.4397,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 0.04,
37
- "learning_rate": 9.999149232497183e-05,
38
- "loss": 3.4883,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 0.04,
43
- "learning_rate": 9.998670696982668e-05,
44
- "loss": 3.5107,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 0.05,
49
- "learning_rate": 9.998085840976759e-05,
50
- "loss": 3.4742,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 0.06,
55
- "learning_rate": 9.997394676919193e-05,
56
- "loss": 3.3594,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 0.07,
61
- "learning_rate": 9.996597219510866e-05,
62
- "loss": 3.3098,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 0.07,
67
- "learning_rate": 9.995693485713496e-05,
68
- "loss": 3.4248,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 0.08,
73
- "learning_rate": 9.994683494749277e-05,
74
- "loss": 3.3875,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 0.09,
79
- "learning_rate": 9.993567268100469e-05,
80
- "loss": 3.3726,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 0.1,
85
- "learning_rate": 9.992344829508938e-05,
86
- "loss": 3.3911,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 0.1,
91
- "learning_rate": 9.991016204975648e-05,
92
- "loss": 3.3826,
93
  "step": 7000
94
  },
95
  {
96
  "epoch": 0.11,
97
- "learning_rate": 9.989581422760117e-05,
98
- "loss": 3.3095,
99
  "step": 7500
100
  },
101
  {
102
  "epoch": 0.12,
103
- "learning_rate": 9.988040513379809e-05,
104
- "loss": 3.3544,
105
  "step": 8000
106
  },
107
  {
108
  "epoch": 0.12,
109
- "learning_rate": 9.986393509609485e-05,
110
- "loss": 3.3007,
111
  "step": 8500
112
  },
113
  {
114
  "epoch": 0.13,
115
- "learning_rate": 9.984640446480509e-05,
116
- "loss": 3.289,
117
  "step": 9000
118
  },
119
  {
120
  "epoch": 0.14,
121
- "learning_rate": 9.9827813612801e-05,
122
- "loss": 3.2892,
123
  "step": 9500
124
  },
125
  {
126
  "epoch": 0.15,
127
- "learning_rate": 9.98081629355054e-05,
128
- "loss": 3.3141,
129
  "step": 10000
130
  },
131
  {
132
  "epoch": 0.15,
133
- "learning_rate": 9.978745285088338e-05,
134
- "loss": 3.3381,
135
  "step": 10500
136
  },
137
  {
138
  "epoch": 0.16,
139
- "learning_rate": 9.97656837994333e-05,
140
- "loss": 3.2098,
141
  "step": 11000
142
  },
143
  {
144
  "epoch": 0.17,
145
- "learning_rate": 9.974285624417751e-05,
146
- "loss": 3.3139,
147
  "step": 11500
148
  },
149
  {
150
  "epoch": 0.18,
151
- "learning_rate": 9.971897067065248e-05,
152
- "loss": 3.2457,
153
  "step": 12000
154
  },
155
  {
156
  "epoch": 0.18,
157
- "learning_rate": 9.969402758689845e-05,
158
- "loss": 3.1359,
159
  "step": 12500
160
  },
161
  {
162
  "epoch": 0.19,
163
- "learning_rate": 9.966802752344868e-05,
164
- "loss": 3.2499,
165
  "step": 13000
166
  },
167
  {
168
  "epoch": 0.2,
169
- "learning_rate": 9.964097103331806e-05,
170
- "loss": 3.3144,
171
  "step": 13500
172
  },
173
  {
174
  "epoch": 0.21,
175
- "learning_rate": 9.961285869199149e-05,
176
- "loss": 3.3416,
177
  "step": 14000
178
  },
179
  {
180
  "epoch": 0.21,
181
- "learning_rate": 9.95836910974115e-05,
182
- "loss": 3.2466,
183
  "step": 14500
184
  },
185
  {
186
  "epoch": 0.22,
187
- "learning_rate": 9.955346886996564e-05,
188
- "loss": 3.2055,
189
  "step": 15000
190
  },
191
  {
192
  "epoch": 0.23,
193
- "learning_rate": 9.952219265247323e-05,
194
- "loss": 3.1853,
195
  "step": 15500
196
  },
197
  {
198
  "epoch": 0.23,
199
- "learning_rate": 9.948986311017168e-05,
200
- "loss": 3.261,
201
  "step": 16000
202
  },
203
  {
204
  "epoch": 0.24,
205
- "learning_rate": 9.945648093070237e-05,
206
- "loss": 3.1393,
207
  "step": 16500
208
  },
209
  {
210
  "epoch": 0.25,
211
- "learning_rate": 9.942204682409603e-05,
212
- "loss": 3.3337,
213
  "step": 17000
214
  },
215
  {
216
  "epoch": 0.26,
217
- "learning_rate": 9.938656152275759e-05,
218
- "loss": 3.1791,
219
  "step": 17500
220
  },
221
  {
222
  "epoch": 0.26,
223
- "learning_rate": 9.935002578145065e-05,
224
- "loss": 3.1644,
225
  "step": 18000
226
  },
227
  {
228
  "epoch": 0.27,
229
- "learning_rate": 9.931244037728141e-05,
230
- "loss": 3.2369,
231
  "step": 18500
232
  },
233
  {
234
  "epoch": 0.28,
235
- "learning_rate": 9.927380610968213e-05,
236
- "loss": 3.2139,
237
  "step": 19000
238
  },
239
  {
240
  "epoch": 0.29,
241
- "learning_rate": 9.923412380039415e-05,
242
- "loss": 3.1762,
243
  "step": 19500
244
  },
245
  {
246
  "epoch": 0.29,
247
- "learning_rate": 9.919339429345039e-05,
248
- "loss": 3.2732,
249
  "step": 20000
250
  },
251
  {
252
  "epoch": 0.3,
253
- "learning_rate": 9.915161845515739e-05,
254
- "loss": 3.197,
255
  "step": 20500
256
  },
257
  {
258
  "epoch": 0.31,
259
- "learning_rate": 9.910879717407693e-05,
260
- "loss": 3.1034,
261
  "step": 21000
262
  },
263
  {
264
  "epoch": 0.32,
265
- "learning_rate": 9.906493136100707e-05,
266
- "loss": 3.3108,
267
  "step": 21500
268
  },
269
  {
270
  "epoch": 0.32,
271
- "learning_rate": 9.902002194896285e-05,
272
- "loss": 3.1394,
273
  "step": 22000
274
  },
275
  {
276
  "epoch": 0.33,
277
- "learning_rate": 9.897406989315634e-05,
278
- "loss": 3.2385,
279
  "step": 22500
280
  },
281
  {
282
  "epoch": 0.34,
283
- "learning_rate": 9.892707617097645e-05,
284
- "loss": 3.1855,
285
  "step": 23000
286
  },
287
  {
288
  "epoch": 0.34,
289
- "learning_rate": 9.887904178196804e-05,
290
- "loss": 3.2088,
291
  "step": 23500
292
  },
293
  {
294
  "epoch": 0.35,
295
- "learning_rate": 9.882996774781066e-05,
296
- "loss": 3.2111,
297
  "step": 24000
298
  },
299
  {
300
  "epoch": 0.36,
301
- "learning_rate": 9.877985511229697e-05,
302
- "loss": 3.175,
303
  "step": 24500
304
  },
305
  {
306
  "epoch": 0.37,
307
- "learning_rate": 9.87287049413103e-05,
308
- "loss": 3.1891,
309
  "step": 25000
310
  },
311
  {
312
  "epoch": 0.37,
313
- "learning_rate": 9.867651832280217e-05,
314
- "loss": 3.182,
315
  "step": 25500
316
  },
317
  {
318
  "epoch": 0.38,
319
- "learning_rate": 9.86232963667691e-05,
320
- "loss": 3.3346,
321
  "step": 26000
322
  },
323
  {
324
  "epoch": 0.39,
325
- "learning_rate": 9.85690402052289e-05,
326
- "loss": 3.2496,
327
  "step": 26500
328
  },
329
  {
330
  "epoch": 0.4,
331
- "learning_rate": 9.851375099219677e-05,
332
- "loss": 3.222,
333
  "step": 27000
334
  },
335
  {
336
  "epoch": 0.4,
337
- "learning_rate": 9.845742990366059e-05,
338
- "loss": 3.2083,
339
  "step": 27500
340
  },
341
  {
342
  "epoch": 0.41,
343
- "learning_rate": 9.840007813755603e-05,
344
- "loss": 3.233,
345
  "step": 28000
346
  },
347
  {
348
  "epoch": 0.42,
349
- "learning_rate": 9.834169691374098e-05,
350
- "loss": 3.1732,
351
  "step": 28500
352
  },
353
  {
354
  "epoch": 0.43,
355
- "learning_rate": 9.828228747396964e-05,
356
- "loss": 3.1922,
357
  "step": 29000
358
  },
359
  {
360
  "epoch": 0.43,
361
- "learning_rate": 9.822185108186616e-05,
362
- "loss": 3.1923,
363
  "step": 29500
364
  },
365
  {
366
  "epoch": 0.44,
367
- "learning_rate": 9.816038902289763e-05,
368
- "loss": 3.2879,
369
  "step": 30000
370
  },
371
  {
372
  "epoch": 0.45,
373
- "learning_rate": 9.809790260434693e-05,
374
- "loss": 3.2816,
375
  "step": 30500
376
  },
377
  {
378
  "epoch": 0.45,
379
- "learning_rate": 9.803439315528469e-05,
380
- "loss": 3.2343,
381
  "step": 31000
382
  },
383
  {
384
  "epoch": 0.46,
385
- "learning_rate": 9.796986202654124e-05,
386
- "loss": 3.1372,
387
  "step": 31500
388
  },
389
  {
390
  "epoch": 0.47,
391
- "learning_rate": 9.790431059067775e-05,
392
- "loss": 3.2111,
393
  "step": 32000
394
  },
395
  {
396
  "epoch": 0.48,
397
- "learning_rate": 9.783774024195709e-05,
398
- "loss": 3.1488,
399
  "step": 32500
400
  },
401
  {
402
  "epoch": 0.48,
403
- "learning_rate": 9.77701523963141e-05,
404
- "loss": 3.1637,
405
  "step": 33000
406
  },
407
  {
408
  "epoch": 0.49,
409
- "learning_rate": 9.77015484913256e-05,
410
- "loss": 3.1593,
411
  "step": 33500
412
  },
413
  {
414
  "epoch": 0.5,
415
- "learning_rate": 9.763192998617969e-05,
416
- "loss": 3.1399,
417
  "step": 34000
418
  },
419
  {
420
  "epoch": 0.51,
421
- "learning_rate": 9.75612983616448e-05,
422
- "loss": 3.0628,
423
  "step": 34500
424
  },
425
  {
426
  "epoch": 0.51,
427
- "learning_rate": 9.748965512003812e-05,
428
- "loss": 3.2634,
429
  "step": 35000
430
  },
431
  {
432
  "epoch": 0.52,
433
- "learning_rate": 9.741700178519374e-05,
434
- "loss": 3.1562,
435
  "step": 35500
436
  },
437
  {
438
  "epoch": 0.53,
439
- "learning_rate": 9.734333990243012e-05,
440
- "loss": 3.2411,
441
  "step": 36000
442
  },
443
  {
444
  "epoch": 0.54,
445
- "learning_rate": 9.726867103851735e-05,
446
- "loss": 3.1336,
447
  "step": 36500
448
  },
449
  {
450
  "epoch": 0.54,
451
- "learning_rate": 9.719299678164369e-05,
452
- "loss": 3.1557,
453
  "step": 37000
454
  },
455
  {
456
  "epoch": 0.55,
457
- "learning_rate": 9.711631874138192e-05,
458
- "loss": 3.1368,
459
  "step": 37500
460
  },
461
  {
462
  "epoch": 0.56,
463
- "learning_rate": 9.703863854865502e-05,
464
- "loss": 3.1296,
465
  "step": 38000
466
  },
467
  {
468
  "epoch": 0.56,
469
- "learning_rate": 9.69599578557015e-05,
470
- "loss": 3.1308,
471
  "step": 38500
472
  },
473
  {
474
  "epoch": 0.57,
475
- "learning_rate": 9.688027833604027e-05,
476
- "loss": 3.1526,
477
  "step": 39000
478
  },
479
  {
480
  "epoch": 0.58,
481
- "learning_rate": 9.679960168443507e-05,
482
- "loss": 3.2699,
483
  "step": 39500
484
  },
485
  {
486
  "epoch": 0.59,
487
- "learning_rate": 9.671792961685831e-05,
488
- "loss": 3.0819,
489
  "step": 40000
490
  },
491
  {
492
  "epoch": 0.59,
493
- "learning_rate": 9.663526387045473e-05,
494
- "loss": 3.0947,
495
  "step": 40500
496
  },
497
  {
498
  "epoch": 0.6,
499
- "learning_rate": 9.655160620350434e-05,
500
- "loss": 3.1903,
501
  "step": 41000
502
  },
503
  {
504
  "epoch": 0.61,
505
- "learning_rate": 9.646695839538503e-05,
506
- "loss": 3.0587,
507
  "step": 41500
508
  },
509
  {
510
  "epoch": 0.62,
511
- "learning_rate": 9.638132224653482e-05,
512
- "loss": 3.1778,
513
  "step": 42000
514
  },
515
  {
516
  "epoch": 0.62,
517
- "learning_rate": 9.629469957841341e-05,
518
- "loss": 3.0616,
519
  "step": 42500
520
  },
521
  {
522
  "epoch": 0.63,
523
- "learning_rate": 9.62070922334636e-05,
524
- "loss": 3.1816,
525
  "step": 43000
526
  },
527
  {
528
  "epoch": 0.64,
529
- "learning_rate": 9.611850207507196e-05,
530
- "loss": 3.1625,
531
  "step": 43500
532
  },
533
  {
534
  "epoch": 0.64,
535
- "learning_rate": 9.602893098752929e-05,
536
- "loss": 3.2755,
537
  "step": 44000
538
  },
539
  {
540
  "epoch": 0.65,
541
- "learning_rate": 9.59383808759905e-05,
542
- "loss": 3.1046,
543
  "step": 44500
544
  },
545
  {
546
  "epoch": 0.66,
547
- "learning_rate": 9.584685366643411e-05,
548
- "loss": 3.176,
549
  "step": 45000
550
  },
551
  {
552
  "epoch": 0.67,
553
- "learning_rate": 9.575435130562125e-05,
554
- "loss": 3.1618,
555
  "step": 45500
556
  },
557
  {
558
  "epoch": 0.67,
559
- "learning_rate": 9.566087576105431e-05,
560
- "loss": 3.2012,
561
  "step": 46000
562
  },
563
  {
564
  "epoch": 0.68,
565
- "learning_rate": 9.556642902093503e-05,
566
- "loss": 3.2124,
567
  "step": 46500
568
  },
569
  {
570
  "epoch": 0.69,
571
- "learning_rate": 9.547101309412226e-05,
572
- "loss": 3.1282,
573
  "step": 47000
574
  },
575
  {
576
  "epoch": 0.7,
577
- "learning_rate": 9.53746300100892e-05,
578
- "loss": 3.1725,
579
  "step": 47500
580
  },
581
  {
582
  "epoch": 0.7,
583
- "learning_rate": 9.527728181888023e-05,
584
- "loss": 3.1428,
585
  "step": 48000
586
  },
587
  {
588
  "epoch": 0.71,
589
- "learning_rate": 9.517897059106737e-05,
590
- "loss": 3.1074,
591
  "step": 48500
592
  },
593
  {
594
  "epoch": 0.72,
595
- "learning_rate": 9.507969841770614e-05,
596
- "loss": 3.2534,
597
  "step": 49000
598
  },
599
  {
600
  "epoch": 0.73,
601
- "learning_rate": 9.497946741029116e-05,
602
- "loss": 3.1394,
603
  "step": 49500
604
  },
605
  {
606
  "epoch": 0.73,
607
- "learning_rate": 9.48782797007112e-05,
608
- "loss": 3.1688,
609
  "step": 50000
610
  },
611
  {
612
  "epoch": 0.74,
613
- "learning_rate": 9.477613744120386e-05,
614
- "loss": 3.2439,
615
  "step": 50500
616
  },
617
  {
618
  "epoch": 0.75,
619
- "learning_rate": 9.467304280430977e-05,
620
- "loss": 3.0768,
621
  "step": 51000
622
  },
623
  {
624
  "epoch": 0.75,
625
- "learning_rate": 9.456899798282642e-05,
626
- "loss": 3.082,
627
  "step": 51500
628
  },
629
  {
630
  "epoch": 0.76,
631
- "learning_rate": 9.446400518976144e-05,
632
- "loss": 3.1203,
633
  "step": 52000
634
  },
635
  {
636
  "epoch": 0.77,
637
- "learning_rate": 9.435806665828566e-05,
638
- "loss": 3.1243,
639
  "step": 52500
640
  },
641
  {
642
  "epoch": 0.78,
643
- "learning_rate": 9.425118464168545e-05,
644
- "loss": 3.1732,
645
  "step": 53000
646
  },
647
  {
648
  "epoch": 0.78,
649
- "learning_rate": 9.414336141331491e-05,
650
- "loss": 3.118,
651
  "step": 53500
652
  },
653
  {
654
  "epoch": 0.79,
655
- "learning_rate": 9.403459926654748e-05,
656
- "loss": 3.1597,
657
  "step": 54000
658
  },
659
  {
660
  "epoch": 0.8,
661
- "learning_rate": 9.392490051472718e-05,
662
- "loss": 3.1854,
663
  "step": 54500
664
  },
665
  {
666
  "epoch": 0.81,
667
- "learning_rate": 9.381426749111936e-05,
668
- "loss": 3.1857,
669
  "step": 55000
670
  },
671
  {
672
  "epoch": 0.81,
673
- "learning_rate": 9.370270254886115e-05,
674
- "loss": 3.1094,
675
  "step": 55500
676
  },
677
  {
678
  "epoch": 0.82,
679
- "learning_rate": 9.359020806091126e-05,
680
- "loss": 3.1459,
681
  "step": 56000
682
  },
683
  {
684
  "epoch": 0.83,
685
- "learning_rate": 9.347678641999973e-05,
686
- "loss": 3.063,
687
  "step": 56500
688
  },
689
  {
690
  "epoch": 0.84,
691
- "learning_rate": 9.336244003857682e-05,
692
- "loss": 3.0853,
693
  "step": 57000
694
  },
695
  {
696
  "epoch": 0.84,
697
- "learning_rate": 9.324717134876182e-05,
698
- "loss": 3.1004,
699
  "step": 57500
700
  },
701
  {
702
  "epoch": 0.85,
703
- "learning_rate": 9.313098280229133e-05,
704
- "loss": 3.0624,
705
  "step": 58000
706
  },
707
  {
708
  "epoch": 0.86,
709
- "learning_rate": 9.301387687046704e-05,
710
- "loss": 3.1182,
711
  "step": 58500
712
  },
713
  {
714
  "epoch": 0.86,
715
- "learning_rate": 9.289585604410317e-05,
716
- "loss": 3.0812,
717
  "step": 59000
718
  },
719
  {
720
  "epoch": 0.87,
721
- "learning_rate": 9.277692283347357e-05,
722
- "loss": 3.1594,
723
  "step": 59500
724
  },
725
  {
726
  "epoch": 0.88,
727
- "learning_rate": 9.265707976825829e-05,
728
- "loss": 3.0691,
729
  "step": 60000
730
  },
731
  {
732
  "epoch": 0.89,
733
- "learning_rate": 9.253632939748968e-05,
734
- "loss": 3.0989,
735
  "step": 60500
736
  },
737
  {
738
  "epoch": 0.89,
739
- "learning_rate": 9.241467428949837e-05,
740
- "loss": 3.1739,
741
  "step": 61000
742
  },
743
  {
744
  "epoch": 0.9,
745
- "learning_rate": 9.229211703185842e-05,
746
- "loss": 3.0593,
747
  "step": 61500
748
  },
749
  {
750
  "epoch": 0.91,
751
- "learning_rate": 9.216866023133246e-05,
752
- "loss": 3.0508,
753
  "step": 62000
754
  },
755
  {
756
  "epoch": 0.92,
757
- "learning_rate": 9.204430651381613e-05,
758
- "loss": 3.1162,
759
  "step": 62500
760
  },
761
  {
762
  "epoch": 0.92,
763
- "learning_rate": 9.191905852428232e-05,
764
- "loss": 3.1316,
765
  "step": 63000
766
  },
767
  {
768
  "epoch": 0.93,
769
- "learning_rate": 9.179291892672484e-05,
770
- "loss": 3.0565,
771
  "step": 63500
772
  },
773
  {
774
  "epoch": 0.94,
775
- "learning_rate": 9.166589040410175e-05,
776
- "loss": 3.1502,
777
  "step": 64000
778
  },
779
  {
780
  "epoch": 0.95,
781
- "learning_rate": 9.153797565827839e-05,
782
- "loss": 3.1613,
783
  "step": 64500
784
  },
785
  {
786
  "epoch": 0.95,
787
- "learning_rate": 9.140917740996979e-05,
788
- "loss": 2.9902,
789
  "step": 65000
790
  },
791
  {
792
  "epoch": 0.96,
793
- "learning_rate": 9.127949839868292e-05,
794
- "loss": 3.0026,
795
  "step": 65500
796
  },
797
  {
798
  "epoch": 0.97,
799
- "learning_rate": 9.114894138265832e-05,
800
- "loss": 3.1636,
801
  "step": 66000
802
  },
803
  {
804
  "epoch": 0.97,
805
- "learning_rate": 9.101750913881147e-05,
806
- "loss": 3.1233,
807
  "step": 66500
808
  },
809
  {
810
  "epoch": 0.98,
811
- "learning_rate": 9.088520446267374e-05,
812
- "loss": 3.0781,
813
  "step": 67000
814
  },
815
  {
816
  "epoch": 0.99,
817
- "learning_rate": 9.075203016833295e-05,
818
- "loss": 3.0872,
819
  "step": 67500
820
  },
821
  {
822
  "epoch": 1.0,
823
- "learning_rate": 9.061798908837341e-05,
824
- "loss": 3.1095,
825
  "step": 68000
826
  },
827
  {
@@ -829,7 +829,7 @@
829
  "eval_bleu": 1.0,
830
  "eval_brevity_penalty": 1.0,
831
  "eval_length_ratio": 1.0,
832
- "eval_loss": 2.9751689434051514,
833
  "eval_precisions": [
834
  1.0,
835
  1.0,
@@ -837,1694 +837,18 @@
837
  1.0
838
  ],
839
  "eval_reference_length": 7761920,
840
- "eval_runtime": 15377.8782,
841
- "eval_samples_per_second": 0.986,
842
- "eval_steps_per_second": 0.493,
843
  "eval_translation_length": 7761920,
844
  "step": 68219
845
- },
846
- {
847
- "epoch": 1.0,
848
- "learning_rate": 9.048308407381583e-05,
849
- "loss": 2.9246,
850
- "step": 68500
851
- },
852
- {
853
- "epoch": 1.01,
854
- "learning_rate": 9.034731799405654e-05,
855
- "loss": 2.8047,
856
- "step": 69000
857
- },
858
- {
859
- "epoch": 1.02,
860
- "learning_rate": 9.021069373680653e-05,
861
- "loss": 2.8717,
862
- "step": 69500
863
- },
864
- {
865
- "epoch": 1.03,
866
- "learning_rate": 9.007321420803e-05,
867
- "loss": 2.8453,
868
- "step": 70000
869
- },
870
- {
871
- "epoch": 1.03,
872
- "learning_rate": 8.993488233188263e-05,
873
- "loss": 2.8545,
874
- "step": 70500
875
- },
876
- {
877
- "epoch": 1.04,
878
- "learning_rate": 8.979570105064923e-05,
879
- "loss": 2.7871,
880
- "step": 71000
881
- },
882
- {
883
- "epoch": 1.05,
884
- "learning_rate": 8.965567332468128e-05,
885
- "loss": 2.772,
886
- "step": 71500
887
- },
888
- {
889
- "epoch": 1.06,
890
- "learning_rate": 8.951480213233397e-05,
891
- "loss": 2.915,
892
- "step": 72000
893
- },
894
- {
895
- "epoch": 1.06,
896
- "learning_rate": 8.937309046990271e-05,
897
- "loss": 2.8076,
898
- "step": 72500
899
- },
900
- {
901
- "epoch": 1.07,
902
- "learning_rate": 8.923054135155963e-05,
903
- "loss": 2.693,
904
- "step": 73000
905
- },
906
- {
907
- "epoch": 1.08,
908
- "learning_rate": 8.908715780928925e-05,
909
- "loss": 2.8504,
910
- "step": 73500
911
- },
912
- {
913
- "epoch": 1.08,
914
- "learning_rate": 8.89429428928241e-05,
915
- "loss": 2.7593,
916
- "step": 74000
917
- },
918
- {
919
- "epoch": 1.09,
920
- "learning_rate": 8.879789966957988e-05,
921
- "loss": 2.7953,
922
- "step": 74500
923
- },
924
- {
925
- "epoch": 1.1,
926
- "learning_rate": 8.865203122459008e-05,
927
- "loss": 2.7169,
928
- "step": 75000
929
- },
930
- {
931
- "epoch": 1.11,
932
- "learning_rate": 8.850534066044054e-05,
933
- "loss": 2.6889,
934
- "step": 75500
935
- },
936
- {
937
- "epoch": 1.11,
938
- "learning_rate": 8.835783109720333e-05,
939
- "loss": 2.8703,
940
- "step": 76000
941
- },
942
- {
943
- "epoch": 1.12,
944
- "learning_rate": 8.820950567237043e-05,
945
- "loss": 2.8989,
946
- "step": 76500
947
- },
948
- {
949
- "epoch": 1.13,
950
- "learning_rate": 8.806036754078701e-05,
951
- "loss": 2.7834,
952
- "step": 77000
953
- },
954
- {
955
- "epoch": 1.14,
956
- "learning_rate": 8.791041987458431e-05,
957
- "loss": 2.84,
958
- "step": 77500
959
- },
960
- {
961
- "epoch": 1.14,
962
- "learning_rate": 8.775966586311212e-05,
963
- "loss": 2.6913,
964
- "step": 78000
965
- },
966
- {
967
- "epoch": 1.15,
968
- "learning_rate": 8.760810871287108e-05,
969
- "loss": 2.7712,
970
- "step": 78500
971
- },
972
- {
973
- "epoch": 1.16,
974
- "learning_rate": 8.745575164744435e-05,
975
- "loss": 2.8229,
976
- "step": 79000
977
- },
978
- {
979
- "epoch": 1.17,
980
- "learning_rate": 8.730259790742906e-05,
981
- "loss": 2.8877,
982
- "step": 79500
983
- },
984
- {
985
- "epoch": 1.17,
986
- "learning_rate": 8.714865075036746e-05,
987
- "loss": 2.7965,
988
- "step": 80000
989
- },
990
- {
991
- "epoch": 1.18,
992
- "learning_rate": 8.699391345067759e-05,
993
- "loss": 2.9252,
994
- "step": 80500
995
- },
996
- {
997
- "epoch": 1.19,
998
- "learning_rate": 8.683838929958356e-05,
999
- "loss": 2.8585,
1000
- "step": 81000
1001
- },
1002
- {
1003
- "epoch": 1.19,
1004
- "learning_rate": 8.66820816050457e-05,
1005
- "loss": 2.7542,
1006
- "step": 81500
1007
- },
1008
- {
1009
- "epoch": 1.2,
1010
- "learning_rate": 8.652499369169005e-05,
1011
- "loss": 2.8124,
1012
- "step": 82000
1013
- },
1014
- {
1015
- "epoch": 1.21,
1016
- "learning_rate": 8.636712890073772e-05,
1017
- "loss": 2.8251,
1018
- "step": 82500
1019
- },
1020
- {
1021
- "epoch": 1.22,
1022
- "learning_rate": 8.62084905899339e-05,
1023
- "loss": 2.7148,
1024
- "step": 83000
1025
- },
1026
- {
1027
- "epoch": 1.22,
1028
- "learning_rate": 8.604908213347622e-05,
1029
- "loss": 2.8276,
1030
- "step": 83500
1031
- },
1032
- {
1033
- "epoch": 1.23,
1034
- "learning_rate": 8.58889069219432e-05,
1035
- "loss": 2.8799,
1036
- "step": 84000
1037
- },
1038
- {
1039
- "epoch": 1.24,
1040
- "learning_rate": 8.572796836222206e-05,
1041
- "loss": 2.7959,
1042
- "step": 84500
1043
- },
1044
- {
1045
- "epoch": 1.25,
1046
- "learning_rate": 8.556626987743621e-05,
1047
- "loss": 2.756,
1048
- "step": 85000
1049
- },
1050
- {
1051
- "epoch": 1.25,
1052
- "learning_rate": 8.54038149068725e-05,
1053
- "loss": 2.8956,
1054
- "step": 85500
1055
- },
1056
- {
1057
- "epoch": 1.26,
1058
- "learning_rate": 8.524060690590803e-05,
1059
- "loss": 2.8619,
1060
- "step": 86000
1061
- },
1062
- {
1063
- "epoch": 1.27,
1064
- "learning_rate": 8.507664934593668e-05,
1065
- "loss": 2.8005,
1066
- "step": 86500
1067
- },
1068
- {
1069
- "epoch": 1.28,
1070
- "learning_rate": 8.491194571429526e-05,
1071
- "loss": 2.8213,
1072
- "step": 87000
1073
- },
1074
- {
1075
- "epoch": 1.28,
1076
- "learning_rate": 8.474649951418936e-05,
1077
- "loss": 2.8356,
1078
- "step": 87500
1079
- },
1080
- {
1081
- "epoch": 1.29,
1082
- "learning_rate": 8.458031426461878e-05,
1083
- "loss": 2.877,
1084
- "step": 88000
1085
- },
1086
- {
1087
- "epoch": 1.3,
1088
- "learning_rate": 8.441339350030278e-05,
1089
- "loss": 2.9189,
1090
- "step": 88500
1091
- },
1092
- {
1093
- "epoch": 1.3,
1094
- "learning_rate": 8.424574077160476e-05,
1095
- "loss": 2.8497,
1096
- "step": 89000
1097
- },
1098
- {
1099
- "epoch": 1.31,
1100
- "learning_rate": 8.407735964445689e-05,
1101
- "loss": 2.807,
1102
- "step": 89500
1103
- },
1104
- {
1105
- "epoch": 1.32,
1106
- "learning_rate": 8.390825370028414e-05,
1107
- "loss": 2.8745,
1108
- "step": 90000
1109
- },
1110
- {
1111
- "epoch": 1.33,
1112
- "learning_rate": 8.373842653592818e-05,
1113
- "loss": 2.8054,
1114
- "step": 90500
1115
- },
1116
- {
1117
- "epoch": 1.33,
1118
- "learning_rate": 8.35678817635709e-05,
1119
- "loss": 2.7019,
1120
- "step": 91000
1121
- },
1122
- {
1123
- "epoch": 1.34,
1124
- "learning_rate": 8.339662301065747e-05,
1125
- "loss": 2.8497,
1126
- "step": 91500
1127
- },
1128
- {
1129
- "epoch": 1.35,
1130
- "learning_rate": 8.322465391981927e-05,
1131
- "loss": 2.8442,
1132
- "step": 92000
1133
- },
1134
- {
1135
- "epoch": 1.36,
1136
- "learning_rate": 8.30519781487964e-05,
1137
- "loss": 2.7681,
1138
- "step": 92500
1139
- },
1140
- {
1141
- "epoch": 1.36,
1142
- "learning_rate": 8.287859937035989e-05,
1143
- "loss": 2.8219,
1144
- "step": 93000
1145
- },
1146
- {
1147
- "epoch": 1.37,
1148
- "learning_rate": 8.270452127223352e-05,
1149
- "loss": 2.8409,
1150
- "step": 93500
1151
- },
1152
- {
1153
- "epoch": 1.38,
1154
- "learning_rate": 8.252974755701546e-05,
1155
- "loss": 2.7358,
1156
- "step": 94000
1157
- },
1158
- {
1159
- "epoch": 1.39,
1160
- "learning_rate": 8.235428194209947e-05,
1161
- "loss": 2.6975,
1162
- "step": 94500
1163
- },
1164
- {
1165
- "epoch": 1.39,
1166
- "learning_rate": 8.217812815959588e-05,
1167
- "loss": 2.7972,
1168
- "step": 95000
1169
- },
1170
- {
1171
- "epoch": 1.4,
1172
- "learning_rate": 8.200128995625211e-05,
1173
- "loss": 2.8172,
1174
- "step": 95500
1175
- },
1176
- {
1177
- "epoch": 1.41,
1178
- "learning_rate": 8.182377109337309e-05,
1179
- "loss": 2.8309,
1180
- "step": 96000
1181
- },
1182
- {
1183
- "epoch": 1.41,
1184
- "learning_rate": 8.164557534674122e-05,
1185
- "loss": 2.9371,
1186
- "step": 96500
1187
- },
1188
- {
1189
- "epoch": 1.42,
1190
- "learning_rate": 8.1466706506536e-05,
1191
- "loss": 2.738,
1192
- "step": 97000
1193
- },
1194
- {
1195
- "epoch": 1.43,
1196
- "learning_rate": 8.12871683772535e-05,
1197
- "loss": 2.8989,
1198
- "step": 97500
1199
- },
1200
- {
1201
- "epoch": 1.44,
1202
- "learning_rate": 8.110696477762538e-05,
1203
- "loss": 2.7852,
1204
- "step": 98000
1205
- },
1206
- {
1207
- "epoch": 1.44,
1208
- "learning_rate": 8.092609954053776e-05,
1209
- "loss": 2.862,
1210
- "step": 98500
1211
- },
1212
- {
1213
- "epoch": 1.45,
1214
- "learning_rate": 8.07445765129495e-05,
1215
- "loss": 2.8234,
1216
- "step": 99000
1217
- },
1218
- {
1219
- "epoch": 1.46,
1220
- "learning_rate": 8.056239955581064e-05,
1221
- "loss": 2.844,
1222
- "step": 99500
1223
- },
1224
- {
1225
- "epoch": 1.47,
1226
- "learning_rate": 8.037957254398004e-05,
1227
- "loss": 2.7922,
1228
- "step": 100000
1229
- },
1230
- {
1231
- "epoch": 1.47,
1232
- "learning_rate": 8.01960993661431e-05,
1233
- "loss": 2.817,
1234
- "step": 100500
1235
- },
1236
- {
1237
- "epoch": 1.48,
1238
- "learning_rate": 8.0011983924729e-05,
1239
- "loss": 2.8955,
1240
- "step": 101000
1241
- },
1242
- {
1243
- "epoch": 1.49,
1244
- "learning_rate": 7.982723013582772e-05,
1245
- "loss": 2.8279,
1246
- "step": 101500
1247
- },
1248
- {
1249
- "epoch": 1.5,
1250
- "learning_rate": 7.964184192910672e-05,
1251
- "loss": 2.8259,
1252
- "step": 102000
1253
- },
1254
- {
1255
- "epoch": 1.5,
1256
- "learning_rate": 7.94558232477274e-05,
1257
- "loss": 2.9235,
1258
- "step": 102500
1259
- },
1260
- {
1261
- "epoch": 1.51,
1262
- "learning_rate": 7.926917804826117e-05,
1263
- "loss": 2.8261,
1264
- "step": 103000
1265
- },
1266
- {
1267
- "epoch": 1.52,
1268
- "learning_rate": 7.908191030060532e-05,
1269
- "loss": 2.7384,
1270
- "step": 103500
1271
- },
1272
- {
1273
- "epoch": 1.52,
1274
- "learning_rate": 7.889402398789863e-05,
1275
- "loss": 2.8064,
1276
- "step": 104000
1277
- },
1278
- {
1279
- "epoch": 1.53,
1280
- "learning_rate": 7.870552310643656e-05,
1281
- "loss": 2.8212,
1282
- "step": 104500
1283
- },
1284
- {
1285
- "epoch": 1.54,
1286
- "learning_rate": 7.851641166558628e-05,
1287
- "loss": 2.8605,
1288
- "step": 105000
1289
- },
1290
- {
1291
- "epoch": 1.55,
1292
- "learning_rate": 7.832669368770149e-05,
1293
- "loss": 2.7907,
1294
- "step": 105500
1295
- },
1296
- {
1297
- "epoch": 1.55,
1298
- "learning_rate": 7.813637320803671e-05,
1299
- "loss": 2.7278,
1300
- "step": 106000
1301
- },
1302
- {
1303
- "epoch": 1.56,
1304
- "learning_rate": 7.794545427466153e-05,
1305
- "loss": 2.7808,
1306
- "step": 106500
1307
- },
1308
- {
1309
- "epoch": 1.57,
1310
- "learning_rate": 7.775394094837455e-05,
1311
- "loss": 2.8643,
1312
- "step": 107000
1313
- },
1314
- {
1315
- "epoch": 1.58,
1316
- "learning_rate": 7.756183730261694e-05,
1317
- "loss": 2.738,
1318
- "step": 107500
1319
- },
1320
- {
1321
- "epoch": 1.58,
1322
- "learning_rate": 7.736914742338577e-05,
1323
- "loss": 2.7307,
1324
- "step": 108000
1325
- },
1326
- {
1327
- "epoch": 1.59,
1328
- "learning_rate": 7.71758754091472e-05,
1329
- "loss": 2.8986,
1330
- "step": 108500
1331
- },
1332
- {
1333
- "epoch": 1.6,
1334
- "learning_rate": 7.698202537074926e-05,
1335
- "loss": 2.7493,
1336
- "step": 109000
1337
- },
1338
- {
1339
- "epoch": 1.61,
1340
- "learning_rate": 7.678760143133436e-05,
1341
- "loss": 2.8837,
1342
- "step": 109500
1343
- },
1344
- {
1345
- "epoch": 1.61,
1346
- "learning_rate": 7.659260772625176e-05,
1347
- "loss": 2.778,
1348
- "step": 110000
1349
- },
1350
- {
1351
- "epoch": 1.62,
1352
- "learning_rate": 7.639704840296936e-05,
1353
- "loss": 2.8215,
1354
- "step": 110500
1355
- },
1356
- {
1357
- "epoch": 1.63,
1358
- "learning_rate": 7.620092762098568e-05,
1359
- "loss": 2.8389,
1360
- "step": 111000
1361
- },
1362
- {
1363
- "epoch": 1.63,
1364
- "learning_rate": 7.600424955174132e-05,
1365
- "loss": 2.8627,
1366
- "step": 111500
1367
- },
1368
- {
1369
- "epoch": 1.64,
1370
- "learning_rate": 7.580701837853028e-05,
1371
- "loss": 2.8321,
1372
- "step": 112000
1373
- },
1374
- {
1375
- "epoch": 1.65,
1376
- "learning_rate": 7.560923829641088e-05,
1377
- "loss": 2.9496,
1378
- "step": 112500
1379
- },
1380
- {
1381
- "epoch": 1.66,
1382
- "learning_rate": 7.541091351211666e-05,
1383
- "loss": 2.8576,
1384
- "step": 113000
1385
- },
1386
- {
1387
- "epoch": 1.66,
1388
- "learning_rate": 7.521204824396678e-05,
1389
- "loss": 2.7548,
1390
- "step": 113500
1391
- },
1392
- {
1393
- "epoch": 1.67,
1394
- "learning_rate": 7.501264672177637e-05,
1395
- "loss": 2.7738,
1396
- "step": 114000
1397
- },
1398
- {
1399
- "epoch": 1.68,
1400
- "learning_rate": 7.481271318676662e-05,
1401
- "loss": 2.8088,
1402
- "step": 114500
1403
- },
1404
- {
1405
- "epoch": 1.69,
1406
- "learning_rate": 7.46122518914744e-05,
1407
- "loss": 2.8243,
1408
- "step": 115000
1409
- },
1410
- {
1411
- "epoch": 1.69,
1412
- "learning_rate": 7.441126709966203e-05,
1413
- "loss": 2.6794,
1414
- "step": 115500
1415
- },
1416
- {
1417
- "epoch": 1.7,
1418
- "learning_rate": 7.420976308622632e-05,
1419
- "loss": 2.8277,
1420
- "step": 116000
1421
- },
1422
- {
1423
- "epoch": 1.71,
1424
- "learning_rate": 7.400774413710793e-05,
1425
- "loss": 2.8341,
1426
- "step": 116500
1427
- },
1428
- {
1429
- "epoch": 1.72,
1430
- "learning_rate": 7.380521454920001e-05,
1431
- "loss": 2.8003,
1432
- "step": 117000
1433
- },
1434
- {
1435
- "epoch": 1.72,
1436
- "learning_rate": 7.360217863025687e-05,
1437
- "loss": 2.724,
1438
- "step": 117500
1439
- },
1440
- {
1441
- "epoch": 1.73,
1442
- "learning_rate": 7.33986406988024e-05,
1443
- "loss": 2.829,
1444
- "step": 118000
1445
- },
1446
- {
1447
- "epoch": 1.74,
1448
- "learning_rate": 7.319460508403811e-05,
1449
- "loss": 2.812,
1450
- "step": 118500
1451
- },
1452
- {
1453
- "epoch": 1.74,
1454
- "learning_rate": 7.299007612575117e-05,
1455
- "loss": 2.8172,
1456
- "step": 119000
1457
- },
1458
- {
1459
- "epoch": 1.75,
1460
- "learning_rate": 7.278505817422199e-05,
1461
- "loss": 2.8063,
1462
- "step": 119500
1463
- },
1464
- {
1465
- "epoch": 1.76,
1466
- "learning_rate": 7.257955559013181e-05,
1467
- "loss": 2.8775,
1468
- "step": 120000
1469
- },
1470
- {
1471
- "epoch": 1.77,
1472
- "learning_rate": 7.23735727444698e-05,
1473
- "loss": 2.7225,
1474
- "step": 120500
1475
- },
1476
- {
1477
- "epoch": 1.77,
1478
- "learning_rate": 7.216711401844028e-05,
1479
- "loss": 2.7283,
1480
- "step": 121000
1481
- },
1482
- {
1483
- "epoch": 1.78,
1484
- "learning_rate": 7.196018380336934e-05,
1485
- "loss": 2.8786,
1486
- "step": 121500
1487
- },
1488
- {
1489
- "epoch": 1.79,
1490
- "learning_rate": 7.175278650061156e-05,
1491
- "loss": 2.7372,
1492
- "step": 122000
1493
- },
1494
- {
1495
- "epoch": 1.8,
1496
- "learning_rate": 7.154492652145635e-05,
1497
- "loss": 2.7176,
1498
- "step": 122500
1499
- },
1500
- {
1501
- "epoch": 1.8,
1502
- "learning_rate": 7.133660828703414e-05,
1503
- "loss": 2.8233,
1504
- "step": 123000
1505
- },
1506
- {
1507
- "epoch": 1.81,
1508
- "learning_rate": 7.112783622822235e-05,
1509
- "loss": 2.8269,
1510
- "step": 123500
1511
- },
1512
- {
1513
- "epoch": 1.82,
1514
- "learning_rate": 7.091861478555114e-05,
1515
- "loss": 2.8155,
1516
- "step": 124000
1517
- },
1518
- {
1519
- "epoch": 1.83,
1520
- "learning_rate": 7.070894840910887e-05,
1521
- "loss": 2.7793,
1522
- "step": 124500
1523
- },
1524
- {
1525
- "epoch": 1.83,
1526
- "learning_rate": 7.049884155844762e-05,
1527
- "loss": 2.8524,
1528
- "step": 125000
1529
- },
1530
- {
1531
- "epoch": 1.84,
1532
- "learning_rate": 7.028829870248824e-05,
1533
- "loss": 2.8373,
1534
- "step": 125500
1535
- },
1536
- {
1537
- "epoch": 1.85,
1538
- "learning_rate": 7.007732431942529e-05,
1539
- "loss": 2.8219,
1540
- "step": 126000
1541
- },
1542
- {
1543
- "epoch": 1.85,
1544
- "learning_rate": 6.986592289663177e-05,
1545
- "loss": 2.6958,
1546
- "step": 126500
1547
- },
1548
- {
1549
- "epoch": 1.86,
1550
- "learning_rate": 6.965409893056375e-05,
1551
- "loss": 2.8492,
1552
- "step": 127000
1553
- },
1554
- {
1555
- "epoch": 1.87,
1556
- "learning_rate": 6.944185692666472e-05,
1557
- "loss": 2.8947,
1558
- "step": 127500
1559
- },
1560
- {
1561
- "epoch": 1.88,
1562
- "learning_rate": 6.922920139926964e-05,
1563
- "loss": 2.8311,
1564
- "step": 128000
1565
- },
1566
- {
1567
- "epoch": 1.88,
1568
- "learning_rate": 6.90161368715091e-05,
1569
- "loss": 2.8389,
1570
- "step": 128500
1571
- },
1572
- {
1573
- "epoch": 1.89,
1574
- "learning_rate": 6.880266787521298e-05,
1575
- "loss": 2.7657,
1576
- "step": 129000
1577
- },
1578
- {
1579
- "epoch": 1.9,
1580
- "learning_rate": 6.858879895081412e-05,
1581
- "loss": 2.8388,
1582
- "step": 129500
1583
- },
1584
- {
1585
- "epoch": 1.91,
1586
- "learning_rate": 6.837453464725174e-05,
1587
- "loss": 2.8377,
1588
- "step": 130000
1589
- },
1590
- {
1591
- "epoch": 1.91,
1592
- "learning_rate": 6.815987952187466e-05,
1593
- "loss": 2.8376,
1594
- "step": 130500
1595
- },
1596
- {
1597
- "epoch": 1.92,
1598
- "learning_rate": 6.794483814034439e-05,
1599
- "loss": 2.7048,
1600
- "step": 131000
1601
- },
1602
- {
1603
- "epoch": 1.93,
1604
- "learning_rate": 6.772941507653803e-05,
1605
- "loss": 2.9014,
1606
- "step": 131500
1607
- },
1608
- {
1609
- "epoch": 1.93,
1610
- "learning_rate": 6.751361491245093e-05,
1611
- "loss": 2.7394,
1612
- "step": 132000
1613
- },
1614
- {
1615
- "epoch": 1.94,
1616
- "learning_rate": 6.729744223809929e-05,
1617
- "loss": 2.7723,
1618
- "step": 132500
1619
- },
1620
- {
1621
- "epoch": 1.95,
1622
- "learning_rate": 6.708090165142255e-05,
1623
- "loss": 2.8502,
1624
- "step": 133000
1625
- },
1626
- {
1627
- "epoch": 1.96,
1628
- "learning_rate": 6.686399775818548e-05,
1629
- "loss": 2.7521,
1630
- "step": 133500
1631
- },
1632
- {
1633
- "epoch": 1.96,
1634
- "learning_rate": 6.664673517188036e-05,
1635
- "loss": 2.8726,
1636
- "step": 134000
1637
- },
1638
- {
1639
- "epoch": 1.97,
1640
- "learning_rate": 6.642911851362873e-05,
1641
- "loss": 2.9264,
1642
- "step": 134500
1643
- },
1644
- {
1645
- "epoch": 1.98,
1646
- "learning_rate": 6.621115241208316e-05,
1647
- "loss": 2.8401,
1648
- "step": 135000
1649
- },
1650
- {
1651
- "epoch": 1.99,
1652
- "learning_rate": 6.599284150332885e-05,
1653
- "loss": 2.8706,
1654
- "step": 135500
1655
- },
1656
- {
1657
- "epoch": 1.99,
1658
- "learning_rate": 6.57741904307849e-05,
1659
- "loss": 2.886,
1660
- "step": 136000
1661
- },
1662
- {
1663
- "epoch": 2.0,
1664
- "eval_bleu": 1.0,
1665
- "eval_brevity_penalty": 1.0,
1666
- "eval_length_ratio": 1.0,
1667
- "eval_loss": 2.8930435180664062,
1668
- "eval_precisions": [
1669
- 1.0,
1670
- 1.0,
1671
- 1.0,
1672
- 1.0
1673
- ],
1674
- "eval_reference_length": 7761920,
1675
- "eval_runtime": 15438.142,
1676
- "eval_samples_per_second": 0.982,
1677
- "eval_steps_per_second": 0.491,
1678
- "eval_translation_length": 7761920,
1679
- "step": 136438
1680
- },
1681
- {
1682
- "epoch": 2.0,
1683
- "learning_rate": 6.555520384510561e-05,
1684
- "loss": 2.8434,
1685
- "step": 136500
1686
- },
1687
- {
1688
- "epoch": 2.01,
1689
- "learning_rate": 6.533588640408162e-05,
1690
- "loss": 2.4374,
1691
- "step": 137000
1692
- },
1693
- {
1694
- "epoch": 2.02,
1695
- "learning_rate": 6.511624277254071e-05,
1696
- "loss": 2.421,
1697
- "step": 137500
1698
- },
1699
- {
1700
- "epoch": 2.02,
1701
- "learning_rate": 6.48962776222487e-05,
1702
- "loss": 2.4327,
1703
- "step": 138000
1704
- },
1705
- {
1706
- "epoch": 2.03,
1707
- "learning_rate": 6.467599563181004e-05,
1708
- "loss": 2.4567,
1709
- "step": 138500
1710
- },
1711
- {
1712
- "epoch": 2.04,
1713
- "learning_rate": 6.445540148656825e-05,
1714
- "loss": 2.3828,
1715
- "step": 139000
1716
- },
1717
- {
1718
- "epoch": 2.04,
1719
- "learning_rate": 6.423449987850634e-05,
1720
- "loss": 2.4582,
1721
- "step": 139500
1722
- },
1723
- {
1724
- "epoch": 2.05,
1725
- "learning_rate": 6.401329550614694e-05,
1726
- "loss": 2.4376,
1727
- "step": 140000
1728
- },
1729
- {
1730
- "epoch": 2.06,
1731
- "learning_rate": 6.379179307445245e-05,
1732
- "loss": 2.4871,
1733
- "step": 140500
1734
- },
1735
- {
1736
- "epoch": 2.07,
1737
- "learning_rate": 6.35699972947249e-05,
1738
- "loss": 2.4955,
1739
- "step": 141000
1740
- },
1741
- {
1742
- "epoch": 2.07,
1743
- "learning_rate": 6.334791288450577e-05,
1744
- "loss": 2.5095,
1745
- "step": 141500
1746
- },
1747
- {
1748
- "epoch": 2.08,
1749
- "learning_rate": 6.312554456747558e-05,
1750
- "loss": 2.4246,
1751
- "step": 142000
1752
- },
1753
- {
1754
- "epoch": 2.09,
1755
- "learning_rate": 6.290289707335355e-05,
1756
- "loss": 2.567,
1757
- "step": 142500
1758
- },
1759
- {
1760
- "epoch": 2.1,
1761
- "learning_rate": 6.26799751377969e-05,
1762
- "loss": 2.4974,
1763
- "step": 143000
1764
- },
1765
- {
1766
- "epoch": 2.1,
1767
- "learning_rate": 6.245678350230015e-05,
1768
- "loss": 2.4016,
1769
- "step": 143500
1770
- },
1771
- {
1772
- "epoch": 2.11,
1773
- "learning_rate": 6.223332691409428e-05,
1774
- "loss": 2.4861,
1775
- "step": 144000
1776
- },
1777
- {
1778
- "epoch": 2.12,
1779
- "learning_rate": 6.200961012604575e-05,
1780
- "loss": 2.4795,
1781
- "step": 144500
1782
- },
1783
- {
1784
- "epoch": 2.13,
1785
- "learning_rate": 6.178563789655537e-05,
1786
- "loss": 2.5037,
1787
- "step": 145000
1788
- },
1789
- {
1790
- "epoch": 2.13,
1791
- "learning_rate": 6.156141498945719e-05,
1792
- "loss": 2.4686,
1793
- "step": 145500
1794
- },
1795
- {
1796
- "epoch": 2.14,
1797
- "learning_rate": 6.133694617391704e-05,
1798
- "loss": 2.5575,
1799
- "step": 146000
1800
- },
1801
- {
1802
- "epoch": 2.15,
1803
- "learning_rate": 6.111223622433124e-05,
1804
- "loss": 2.5201,
1805
- "step": 146500
1806
- },
1807
- {
1808
- "epoch": 2.15,
1809
- "learning_rate": 6.088728992022491e-05,
1810
- "loss": 2.4335,
1811
- "step": 147000
1812
- },
1813
- {
1814
- "epoch": 2.16,
1815
- "learning_rate": 6.0662112046150376e-05,
1816
- "loss": 2.4387,
1817
- "step": 147500
1818
- },
1819
- {
1820
- "epoch": 2.17,
1821
- "learning_rate": 6.043670739158542e-05,
1822
- "loss": 2.5027,
1823
- "step": 148000
1824
- },
1825
- {
1826
- "epoch": 2.18,
1827
- "learning_rate": 6.021108075083142e-05,
1828
- "loss": 2.4055,
1829
- "step": 148500
1830
- },
1831
- {
1832
- "epoch": 2.18,
1833
- "learning_rate": 5.998523692291127e-05,
1834
- "loss": 2.478,
1835
- "step": 149000
1836
- },
1837
- {
1838
- "epoch": 2.19,
1839
- "learning_rate": 5.9759180711467446e-05,
1840
- "loss": 2.4491,
1841
- "step": 149500
1842
- },
1843
- {
1844
- "epoch": 2.2,
1845
- "learning_rate": 5.953291692465974e-05,
1846
- "loss": 2.5007,
1847
- "step": 150000
1848
- },
1849
- {
1850
- "epoch": 2.21,
1851
- "learning_rate": 5.930645037506301e-05,
1852
- "loss": 2.4752,
1853
- "step": 150500
1854
- },
1855
- {
1856
- "epoch": 2.21,
1857
- "learning_rate": 5.9079785879564876e-05,
1858
- "loss": 2.4569,
1859
- "step": 151000
1860
- },
1861
- {
1862
- "epoch": 2.22,
1863
- "learning_rate": 5.885292825926314e-05,
1864
- "loss": 2.441,
1865
- "step": 151500
1866
- },
1867
- {
1868
- "epoch": 2.23,
1869
- "learning_rate": 5.862588233936341e-05,
1870
- "loss": 2.4172,
1871
- "step": 152000
1872
- },
1873
- {
1874
- "epoch": 2.24,
1875
- "learning_rate": 5.8398652949076324e-05,
1876
- "loss": 2.4053,
1877
- "step": 152500
1878
- },
1879
- {
1880
- "epoch": 2.24,
1881
- "learning_rate": 5.81712449215149e-05,
1882
- "loss": 2.5097,
1883
- "step": 153000
1884
- },
1885
- {
1886
- "epoch": 2.25,
1887
- "learning_rate": 5.794366309359174e-05,
1888
- "loss": 2.4697,
1889
- "step": 153500
1890
- },
1891
- {
1892
- "epoch": 2.26,
1893
- "learning_rate": 5.771591230591612e-05,
1894
- "loss": 2.5604,
1895
- "step": 154000
1896
- },
1897
- {
1898
- "epoch": 2.26,
1899
- "learning_rate": 5.748799740269104e-05,
1900
- "loss": 2.3954,
1901
- "step": 154500
1902
- },
1903
- {
1904
- "epoch": 2.27,
1905
- "learning_rate": 5.7259923231610236e-05,
1906
- "loss": 2.4581,
1907
- "step": 155000
1908
- },
1909
- {
1910
- "epoch": 2.28,
1911
- "learning_rate": 5.703169464375498e-05,
1912
- "loss": 2.5286,
1913
- "step": 155500
1914
- },
1915
- {
1916
- "epoch": 2.29,
1917
- "learning_rate": 5.680331649349101e-05,
1918
- "loss": 2.3529,
1919
- "step": 156000
1920
- },
1921
- {
1922
- "epoch": 2.29,
1923
- "learning_rate": 5.657479363836519e-05,
1924
- "loss": 2.5152,
1925
- "step": 156500
1926
- },
1927
- {
1928
- "epoch": 2.3,
1929
- "learning_rate": 5.6346130939002193e-05,
1930
- "loss": 2.512,
1931
- "step": 157000
1932
- },
1933
- {
1934
- "epoch": 2.31,
1935
- "learning_rate": 5.6117333259001183e-05,
1936
- "loss": 2.5408,
1937
- "step": 157500
1938
- },
1939
- {
1940
- "epoch": 2.32,
1941
- "learning_rate": 5.5888405464832314e-05,
1942
- "loss": 2.5028,
1943
- "step": 158000
1944
- },
1945
- {
1946
- "epoch": 2.32,
1947
- "learning_rate": 5.565935242573323e-05,
1948
- "loss": 2.5252,
1949
- "step": 158500
1950
- },
1951
- {
1952
- "epoch": 2.33,
1953
- "learning_rate": 5.5430179013605544e-05,
1954
- "loss": 2.4696,
1955
- "step": 159000
1956
- },
1957
- {
1958
- "epoch": 2.34,
1959
- "learning_rate": 5.5200890102911096e-05,
1960
- "loss": 2.4254,
1961
- "step": 159500
1962
- },
1963
- {
1964
- "epoch": 2.35,
1965
- "learning_rate": 5.497149057056843e-05,
1966
- "loss": 2.346,
1967
- "step": 160000
1968
- },
1969
- {
1970
- "epoch": 2.35,
1971
- "learning_rate": 5.474198529584896e-05,
1972
- "loss": 2.4914,
1973
- "step": 160500
1974
- },
1975
- {
1976
- "epoch": 2.36,
1977
- "learning_rate": 5.451237916027319e-05,
1978
- "loss": 2.5014,
1979
- "step": 161000
1980
- },
1981
- {
1982
- "epoch": 2.37,
1983
- "learning_rate": 5.428267704750691e-05,
1984
- "loss": 2.4718,
1985
- "step": 161500
1986
- },
1987
- {
1988
- "epoch": 2.37,
1989
- "learning_rate": 5.4052883843257375e-05,
1990
- "loss": 2.4263,
1991
- "step": 162000
1992
- },
1993
- {
1994
- "epoch": 2.38,
1995
- "learning_rate": 5.3823004435169276e-05,
1996
- "loss": 2.5898,
1997
- "step": 162500
1998
- },
1999
- {
2000
- "epoch": 2.39,
2001
- "learning_rate": 5.3593043712720835e-05,
2002
- "loss": 2.5174,
2003
- "step": 163000
2004
- },
2005
- {
2006
- "epoch": 2.4,
2007
- "learning_rate": 5.336300656711989e-05,
2008
- "loss": 2.5126,
2009
- "step": 163500
2010
- },
2011
- {
2012
- "epoch": 2.4,
2013
- "learning_rate": 5.3132897891199684e-05,
2014
- "loss": 2.4919,
2015
- "step": 164000
2016
- },
2017
- {
2018
- "epoch": 2.41,
2019
- "learning_rate": 5.290272257931495e-05,
2020
- "loss": 2.4545,
2021
- "step": 164500
2022
- },
2023
- {
2024
- "epoch": 2.42,
2025
- "learning_rate": 5.267248552723771e-05,
2026
- "loss": 2.5104,
2027
- "step": 165000
2028
- },
2029
- {
2030
- "epoch": 2.43,
2031
- "learning_rate": 5.244219163205324e-05,
2032
- "loss": 2.464,
2033
- "step": 165500
2034
- },
2035
- {
2036
- "epoch": 2.43,
2037
- "learning_rate": 5.22118457920558e-05,
2038
- "loss": 2.4752,
2039
- "step": 166000
2040
- },
2041
- {
2042
- "epoch": 2.44,
2043
- "learning_rate": 5.198145290664456e-05,
2044
- "loss": 2.5599,
2045
- "step": 166500
2046
- },
2047
- {
2048
- "epoch": 2.45,
2049
- "learning_rate": 5.1751017876219256e-05,
2050
- "loss": 2.4875,
2051
- "step": 167000
2052
- },
2053
- {
2054
- "epoch": 2.46,
2055
- "learning_rate": 5.15205456020761e-05,
2056
- "loss": 2.4102,
2057
- "step": 167500
2058
- },
2059
- {
2060
- "epoch": 2.46,
2061
- "learning_rate": 5.129004098630348e-05,
2062
- "loss": 2.4686,
2063
- "step": 168000
2064
- },
2065
- {
2066
- "epoch": 2.47,
2067
- "learning_rate": 5.105950893167761e-05,
2068
- "loss": 2.5395,
2069
- "step": 168500
2070
- },
2071
- {
2072
- "epoch": 2.48,
2073
- "learning_rate": 5.082895434155841e-05,
2074
- "loss": 2.4642,
2075
- "step": 169000
2076
- },
2077
- {
2078
- "epoch": 2.48,
2079
- "learning_rate": 5.059838211978506e-05,
2080
- "loss": 2.6105,
2081
- "step": 169500
2082
- },
2083
- {
2084
- "epoch": 2.49,
2085
- "learning_rate": 5.0367797170571776e-05,
2086
- "loss": 2.5126,
2087
- "step": 170000
2088
- },
2089
- {
2090
- "epoch": 2.5,
2091
- "learning_rate": 5.013720439840348e-05,
2092
- "loss": 2.5685,
2093
- "step": 170500
2094
- },
2095
- {
2096
- "epoch": 2.51,
2097
- "learning_rate": 4.9906608707931526e-05,
2098
- "loss": 2.4634,
2099
- "step": 171000
2100
- },
2101
- {
2102
- "epoch": 2.51,
2103
- "learning_rate": 4.967601500386929e-05,
2104
- "loss": 2.4449,
2105
- "step": 171500
2106
- },
2107
- {
2108
- "epoch": 2.52,
2109
- "learning_rate": 4.9445428190887924e-05,
2110
- "loss": 2.515,
2111
- "step": 172000
2112
- },
2113
- {
2114
- "epoch": 2.53,
2115
- "learning_rate": 4.9214853173511975e-05,
2116
- "loss": 2.5057,
2117
- "step": 172500
2118
- },
2119
- {
2120
- "epoch": 2.54,
2121
- "learning_rate": 4.8984294856015164e-05,
2122
- "loss": 2.5329,
2123
- "step": 173000
2124
- },
2125
- {
2126
- "epoch": 2.54,
2127
- "learning_rate": 4.875375814231596e-05,
2128
- "loss": 2.4823,
2129
- "step": 173500
2130
- },
2131
- {
2132
- "epoch": 2.55,
2133
- "learning_rate": 4.852324793587333e-05,
2134
- "loss": 2.4907,
2135
- "step": 174000
2136
- },
2137
- {
2138
- "epoch": 2.56,
2139
- "learning_rate": 4.829276913958245e-05,
2140
- "loss": 2.523,
2141
- "step": 174500
2142
- },
2143
- {
2144
- "epoch": 2.57,
2145
- "learning_rate": 4.80623266556704e-05,
2146
- "loss": 2.3798,
2147
- "step": 175000
2148
- },
2149
- {
2150
- "epoch": 2.57,
2151
- "learning_rate": 4.783192538559191e-05,
2152
- "loss": 2.5055,
2153
- "step": 175500
2154
- },
2155
- {
2156
- "epoch": 2.58,
2157
- "learning_rate": 4.760157022992511e-05,
2158
- "loss": 2.542,
2159
- "step": 176000
2160
- },
2161
- {
2162
- "epoch": 2.59,
2163
- "learning_rate": 4.737126608826727e-05,
2164
- "loss": 2.5768,
2165
- "step": 176500
2166
- },
2167
- {
2168
- "epoch": 2.59,
2169
- "learning_rate": 4.7141017859130605e-05,
2170
- "loss": 2.5816,
2171
- "step": 177000
2172
- },
2173
- {
2174
- "epoch": 2.6,
2175
- "learning_rate": 4.6910830439838114e-05,
2176
- "loss": 2.4947,
2177
- "step": 177500
2178
- },
2179
- {
2180
- "epoch": 2.61,
2181
- "learning_rate": 4.668070872641934e-05,
2182
- "loss": 2.5179,
2183
- "step": 178000
2184
- },
2185
- {
2186
- "epoch": 2.62,
2187
- "learning_rate": 4.645065761350634e-05,
2188
- "loss": 2.5035,
2189
- "step": 178500
2190
- },
2191
- {
2192
- "epoch": 2.62,
2193
- "learning_rate": 4.622068199422945e-05,
2194
- "loss": 2.4886,
2195
- "step": 179000
2196
- },
2197
- {
2198
- "epoch": 2.63,
2199
- "learning_rate": 4.59907867601133e-05,
2200
- "loss": 2.434,
2201
- "step": 179500
2202
- },
2203
- {
2204
- "epoch": 2.64,
2205
- "learning_rate": 4.576097680097277e-05,
2206
- "loss": 2.4869,
2207
- "step": 180000
2208
- },
2209
- {
2210
- "epoch": 2.65,
2211
- "learning_rate": 4.553125700480892e-05,
2212
- "loss": 2.4565,
2213
- "step": 180500
2214
- },
2215
- {
2216
- "epoch": 2.65,
2217
- "learning_rate": 4.530163225770513e-05,
2218
- "loss": 2.496,
2219
- "step": 181000
2220
- },
2221
- {
2222
- "epoch": 2.66,
2223
- "learning_rate": 4.5072107443723025e-05,
2224
- "loss": 2.415,
2225
- "step": 181500
2226
- },
2227
- {
2228
- "epoch": 2.67,
2229
- "learning_rate": 4.484268744479875e-05,
2230
- "loss": 2.4461,
2231
- "step": 182000
2232
- },
2233
- {
2234
- "epoch": 2.68,
2235
- "learning_rate": 4.461337714063901e-05,
2236
- "loss": 2.4858,
2237
- "step": 182500
2238
- },
2239
- {
2240
- "epoch": 2.68,
2241
- "learning_rate": 4.438418140861738e-05,
2242
- "loss": 2.5667,
2243
- "step": 183000
2244
- },
2245
- {
2246
- "epoch": 2.69,
2247
- "learning_rate": 4.415510512367047e-05,
2248
- "loss": 2.5009,
2249
- "step": 183500
2250
- },
2251
- {
2252
- "epoch": 2.7,
2253
- "learning_rate": 4.392615315819427e-05,
2254
- "loss": 2.575,
2255
- "step": 184000
2256
- },
2257
- {
2258
- "epoch": 2.7,
2259
- "learning_rate": 4.3697330381940595e-05,
2260
- "loss": 2.3703,
2261
- "step": 184500
2262
- },
2263
- {
2264
- "epoch": 2.71,
2265
- "learning_rate": 4.346864166191336e-05,
2266
- "loss": 2.4276,
2267
- "step": 185000
2268
- },
2269
- {
2270
- "epoch": 2.72,
2271
- "learning_rate": 4.3240091862265176e-05,
2272
- "loss": 2.5392,
2273
- "step": 185500
2274
- },
2275
- {
2276
- "epoch": 2.73,
2277
- "learning_rate": 4.301168584419385e-05,
2278
- "loss": 2.4175,
2279
- "step": 186000
2280
- },
2281
- {
2282
- "epoch": 2.73,
2283
- "learning_rate": 4.2783428465838974e-05,
2284
- "loss": 2.457,
2285
- "step": 186500
2286
- },
2287
- {
2288
- "epoch": 2.74,
2289
- "learning_rate": 4.2555324582178655e-05,
2290
- "loss": 2.5549,
2291
- "step": 187000
2292
- },
2293
- {
2294
- "epoch": 2.75,
2295
- "learning_rate": 4.232737904492613e-05,
2296
- "loss": 2.5629,
2297
- "step": 187500
2298
- },
2299
- {
2300
- "epoch": 2.76,
2301
- "learning_rate": 4.2099596702426705e-05,
2302
- "loss": 2.529,
2303
- "step": 188000
2304
- },
2305
- {
2306
- "epoch": 2.76,
2307
- "learning_rate": 4.187198239955456e-05,
2308
- "loss": 2.5347,
2309
- "step": 188500
2310
- },
2311
- {
2312
- "epoch": 2.77,
2313
- "learning_rate": 4.164454097760973e-05,
2314
- "loss": 2.4773,
2315
- "step": 189000
2316
- },
2317
- {
2318
- "epoch": 2.78,
2319
- "learning_rate": 4.141727727421508e-05,
2320
- "loss": 2.4718,
2321
- "step": 189500
2322
- },
2323
- {
2324
- "epoch": 2.79,
2325
- "learning_rate": 4.119019612321346e-05,
2326
- "loss": 2.4831,
2327
- "step": 190000
2328
- },
2329
- {
2330
- "epoch": 2.79,
2331
- "learning_rate": 4.096330235456489e-05,
2332
- "loss": 2.5369,
2333
- "step": 190500
2334
- },
2335
- {
2336
- "epoch": 2.8,
2337
- "learning_rate": 4.073660079424377e-05,
2338
- "loss": 2.5185,
2339
- "step": 191000
2340
- },
2341
- {
2342
- "epoch": 2.81,
2343
- "learning_rate": 4.051009626413636e-05,
2344
- "loss": 2.4736,
2345
- "step": 191500
2346
- },
2347
- {
2348
- "epoch": 2.81,
2349
- "learning_rate": 4.028379358193805e-05,
2350
- "loss": 2.5441,
2351
- "step": 192000
2352
- },
2353
- {
2354
- "epoch": 2.82,
2355
- "learning_rate": 4.005769756105101e-05,
2356
- "loss": 2.4609,
2357
- "step": 192500
2358
- },
2359
- {
2360
- "epoch": 2.83,
2361
- "learning_rate": 3.983181301048179e-05,
2362
- "loss": 2.5473,
2363
- "step": 193000
2364
- },
2365
- {
2366
- "epoch": 2.84,
2367
- "learning_rate": 3.960614473473898e-05,
2368
- "loss": 2.5741,
2369
- "step": 193500
2370
- },
2371
- {
2372
- "epoch": 2.84,
2373
- "learning_rate": 3.93806975337311e-05,
2374
- "loss": 2.5476,
2375
- "step": 194000
2376
- },
2377
- {
2378
- "epoch": 2.85,
2379
- "learning_rate": 3.9155476202664446e-05,
2380
- "loss": 2.5093,
2381
- "step": 194500
2382
- },
2383
- {
2384
- "epoch": 2.86,
2385
- "learning_rate": 3.8930485531941084e-05,
2386
- "loss": 2.5792,
2387
- "step": 195000
2388
- },
2389
- {
2390
- "epoch": 2.87,
2391
- "learning_rate": 3.870573030705705e-05,
2392
- "loss": 2.5136,
2393
- "step": 195500
2394
- },
2395
- {
2396
- "epoch": 2.87,
2397
- "learning_rate": 3.848121530850049e-05,
2398
- "loss": 2.4505,
2399
- "step": 196000
2400
- },
2401
- {
2402
- "epoch": 2.88,
2403
- "learning_rate": 3.825694531164997e-05,
2404
- "loss": 2.4591,
2405
- "step": 196500
2406
- },
2407
- {
2408
- "epoch": 2.89,
2409
- "learning_rate": 3.803292508667294e-05,
2410
- "loss": 2.501,
2411
- "step": 197000
2412
- },
2413
- {
2414
- "epoch": 2.9,
2415
- "learning_rate": 3.780915939842428e-05,
2416
- "loss": 2.5764,
2417
- "step": 197500
2418
- },
2419
- {
2420
- "epoch": 2.9,
2421
- "learning_rate": 3.7585653006344886e-05,
2422
- "loss": 2.4637,
2423
- "step": 198000
2424
- },
2425
- {
2426
- "epoch": 2.91,
2427
- "learning_rate": 3.736241066436059e-05,
2428
- "loss": 2.5644,
2429
- "step": 198500
2430
- },
2431
- {
2432
- "epoch": 2.92,
2433
- "learning_rate": 3.713943712078083e-05,
2434
- "loss": 2.4658,
2435
- "step": 199000
2436
- },
2437
- {
2438
- "epoch": 2.92,
2439
- "learning_rate": 3.691673711819783e-05,
2440
- "loss": 2.5032,
2441
- "step": 199500
2442
- },
2443
- {
2444
- "epoch": 2.93,
2445
- "learning_rate": 3.669431539338567e-05,
2446
- "loss": 2.5171,
2447
- "step": 200000
2448
- },
2449
- {
2450
- "epoch": 2.94,
2451
- "learning_rate": 3.647217667719949e-05,
2452
- "loss": 2.5642,
2453
- "step": 200500
2454
- },
2455
- {
2456
- "epoch": 2.95,
2457
- "learning_rate": 3.6250325694474943e-05,
2458
- "loss": 2.5403,
2459
- "step": 201000
2460
- },
2461
- {
2462
- "epoch": 2.95,
2463
- "learning_rate": 3.6028767163927654e-05,
2464
- "loss": 2.5735,
2465
- "step": 201500
2466
- },
2467
- {
2468
- "epoch": 2.96,
2469
- "learning_rate": 3.580750579805285e-05,
2470
- "loss": 2.4973,
2471
- "step": 202000
2472
- },
2473
- {
2474
- "epoch": 2.97,
2475
- "learning_rate": 3.558654630302516e-05,
2476
- "loss": 2.4614,
2477
- "step": 202500
2478
- },
2479
- {
2480
- "epoch": 2.98,
2481
- "learning_rate": 3.536589337859844e-05,
2482
- "loss": 2.513,
2483
- "step": 203000
2484
- },
2485
- {
2486
- "epoch": 2.98,
2487
- "learning_rate": 3.514555171800597e-05,
2488
- "loss": 2.4906,
2489
- "step": 203500
2490
- },
2491
- {
2492
- "epoch": 2.99,
2493
- "learning_rate": 3.492552600786041e-05,
2494
- "loss": 2.5152,
2495
- "step": 204000
2496
- },
2497
- {
2498
- "epoch": 3.0,
2499
- "learning_rate": 3.470582092805431e-05,
2500
- "loss": 2.4693,
2501
- "step": 204500
2502
- },
2503
- {
2504
- "epoch": 3.0,
2505
- "eval_bleu": 1.0,
2506
- "eval_brevity_penalty": 1.0,
2507
- "eval_length_ratio": 1.0,
2508
- "eval_loss": 2.896097421646118,
2509
- "eval_precisions": [
2510
- 1.0,
2511
- 1.0,
2512
- 1.0,
2513
- 1.0
2514
- ],
2515
- "eval_reference_length": 7761920,
2516
- "eval_runtime": 14493.3817,
2517
- "eval_samples_per_second": 1.046,
2518
- "eval_steps_per_second": 0.523,
2519
- "eval_translation_length": 7761920,
2520
- "step": 204657
2521
  }
2522
  ],
2523
  "logging_steps": 500,
2524
  "max_steps": 341095,
2525
  "num_train_epochs": 5,
2526
- "save_steps": 500,
2527
- "total_flos": 9.433737890333983e+17,
2528
  "trial_name": null,
2529
  "trial_params": null
2530
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
+ "global_step": 68219,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.01,
13
+ "learning_rate": 8e-05,
14
+ "loss": 2.273,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.01,
19
+ "learning_rate": 7.999957460493864e-05,
20
+ "loss": 2.2497,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.02,
25
+ "learning_rate": 7.999829842880257e-05,
26
+ "loss": 2.3728,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.03,
31
+ "learning_rate": 7.999617149873574e-05,
32
+ "loss": 2.3576,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 0.04,
37
+ "learning_rate": 7.999319385997746e-05,
38
+ "loss": 2.3374,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 0.04,
43
+ "learning_rate": 7.998936557586135e-05,
44
+ "loss": 2.3538,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 0.05,
49
+ "learning_rate": 7.998468672781407e-05,
50
+ "loss": 2.366,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 0.06,
55
+ "learning_rate": 7.997915741535355e-05,
56
+ "loss": 2.3321,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 0.07,
61
+ "learning_rate": 7.997277775608694e-05,
62
+ "loss": 2.2838,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 0.07,
67
+ "learning_rate": 7.996554788570796e-05,
68
+ "loss": 2.2679,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 0.08,
73
+ "learning_rate": 7.995746795799422e-05,
74
+ "loss": 2.4041,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 0.09,
79
+ "learning_rate": 7.994853814480376e-05,
80
+ "loss": 2.3415,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 0.1,
85
+ "learning_rate": 7.99387586360715e-05,
86
+ "loss": 2.3589,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 0.1,
91
+ "learning_rate": 7.992812963980518e-05,
92
+ "loss": 2.3203,
93
  "step": 7000
94
  },
95
  {
96
  "epoch": 0.11,
97
+ "learning_rate": 7.991665138208094e-05,
98
+ "loss": 2.3517,
99
  "step": 7500
100
  },
101
  {
102
  "epoch": 0.12,
103
+ "learning_rate": 7.990432410703848e-05,
104
+ "loss": 2.4286,
105
  "step": 8000
106
  },
107
  {
108
  "epoch": 0.12,
109
+ "learning_rate": 7.989114807687589e-05,
110
+ "loss": 2.3227,
111
  "step": 8500
112
  },
113
  {
114
  "epoch": 0.13,
115
+ "learning_rate": 7.987712357184408e-05,
116
+ "loss": 2.3945,
117
  "step": 9000
118
  },
119
  {
120
  "epoch": 0.14,
121
+ "learning_rate": 7.98622508902408e-05,
122
+ "loss": 2.3775,
123
  "step": 9500
124
  },
125
  {
126
  "epoch": 0.15,
127
+ "learning_rate": 7.984653034840432e-05,
128
+ "loss": 2.3522,
129
  "step": 10000
130
  },
131
  {
132
  "epoch": 0.15,
133
+ "learning_rate": 7.982996228070671e-05,
134
+ "loss": 2.334,
135
  "step": 10500
136
  },
137
  {
138
  "epoch": 0.16,
139
+ "learning_rate": 7.981254703954664e-05,
140
+ "loss": 2.3742,
141
  "step": 11000
142
  },
143
  {
144
  "epoch": 0.17,
145
+ "learning_rate": 7.979428499534201e-05,
146
+ "loss": 2.2517,
147
  "step": 11500
148
  },
149
  {
150
  "epoch": 0.18,
151
+ "learning_rate": 7.977517653652199e-05,
152
+ "loss": 2.3994,
153
  "step": 12000
154
  },
155
  {
156
  "epoch": 0.18,
157
+ "learning_rate": 7.975522206951876e-05,
158
+ "loss": 2.3606,
159
  "step": 12500
160
  },
161
  {
162
  "epoch": 0.19,
163
+ "learning_rate": 7.973442201875895e-05,
164
+ "loss": 2.3634,
165
  "step": 13000
166
  },
167
  {
168
  "epoch": 0.2,
169
+ "learning_rate": 7.971277682665446e-05,
170
+ "loss": 2.3061,
171
  "step": 13500
172
  },
173
  {
174
  "epoch": 0.21,
175
+ "learning_rate": 7.969028695359319e-05,
176
+ "loss": 2.4985,
177
  "step": 14000
178
  },
179
  {
180
  "epoch": 0.21,
181
+ "learning_rate": 7.966695287792921e-05,
182
+ "loss": 2.3908,
183
  "step": 14500
184
  },
185
  {
186
  "epoch": 0.22,
187
+ "learning_rate": 7.96427750959725e-05,
188
+ "loss": 2.3605,
189
  "step": 15000
190
  },
191
  {
192
  "epoch": 0.23,
193
+ "learning_rate": 7.961775412197857e-05,
194
+ "loss": 2.347,
195
  "step": 15500
196
  },
197
  {
198
  "epoch": 0.23,
199
+ "learning_rate": 7.959189048813735e-05,
200
+ "loss": 2.3233,
201
  "step": 16000
202
  },
203
  {
204
  "epoch": 0.24,
205
+ "learning_rate": 7.95651847445619e-05,
206
+ "loss": 2.3415,
207
  "step": 16500
208
  },
209
  {
210
  "epoch": 0.25,
211
+ "learning_rate": 7.953763745927682e-05,
212
+ "loss": 2.4679,
213
  "step": 17000
214
  },
215
  {
216
  "epoch": 0.26,
217
+ "learning_rate": 7.950924921820606e-05,
218
+ "loss": 2.443,
219
  "step": 17500
220
  },
221
  {
222
  "epoch": 0.26,
223
+ "learning_rate": 7.948002062516052e-05,
224
+ "loss": 2.5141,
225
  "step": 18000
226
  },
227
  {
228
  "epoch": 0.27,
229
+ "learning_rate": 7.944995230182513e-05,
230
+ "loss": 2.4339,
231
  "step": 18500
232
  },
233
  {
234
  "epoch": 0.28,
235
+ "learning_rate": 7.941904488774571e-05,
236
+ "loss": 2.4308,
237
  "step": 19000
238
  },
239
  {
240
  "epoch": 0.29,
241
+ "learning_rate": 7.938729904031533e-05,
242
+ "loss": 2.4312,
243
  "step": 19500
244
  },
245
  {
246
  "epoch": 0.29,
247
+ "learning_rate": 7.93547154347603e-05,
248
+ "loss": 2.4756,
249
  "step": 20000
250
  },
251
  {
252
  "epoch": 0.3,
253
+ "learning_rate": 7.932129476412592e-05,
254
+ "loss": 2.4426,
255
  "step": 20500
256
  },
257
  {
258
  "epoch": 0.31,
259
+ "learning_rate": 7.928703773926155e-05,
260
+ "loss": 2.4779,
261
  "step": 21000
262
  },
263
  {
264
  "epoch": 0.32,
265
+ "learning_rate": 7.925194508880567e-05,
266
+ "loss": 2.4671,
267
  "step": 21500
268
  },
269
  {
270
  "epoch": 0.32,
271
+ "learning_rate": 7.921601755917029e-05,
272
+ "loss": 2.4473,
273
  "step": 22000
274
  },
275
  {
276
  "epoch": 0.33,
277
+ "learning_rate": 7.917925591452508e-05,
278
+ "loss": 2.3929,
279
  "step": 22500
280
  },
281
  {
282
  "epoch": 0.34,
283
+ "learning_rate": 7.914166093678117e-05,
284
+ "loss": 2.4158,
285
  "step": 23000
286
  },
287
  {
288
  "epoch": 0.34,
289
+ "learning_rate": 7.910323342557442e-05,
290
+ "loss": 2.4607,
291
  "step": 23500
292
  },
293
  {
294
  "epoch": 0.35,
295
+ "learning_rate": 7.906397419824855e-05,
296
+ "loss": 2.4866,
297
  "step": 24000
298
  },
299
  {
300
  "epoch": 0.36,
301
+ "learning_rate": 7.902388408983759e-05,
302
+ "loss": 2.3708,
303
  "step": 24500
304
  },
305
  {
306
  "epoch": 0.37,
307
+ "learning_rate": 7.898296395304824e-05,
308
+ "loss": 2.4718,
309
  "step": 25000
310
  },
311
  {
312
  "epoch": 0.37,
313
+ "learning_rate": 7.894121465824175e-05,
314
+ "loss": 2.4436,
315
  "step": 25500
316
  },
317
  {
318
  "epoch": 0.38,
319
+ "learning_rate": 7.889863709341528e-05,
320
+ "loss": 2.498,
321
  "step": 26000
322
  },
323
  {
324
  "epoch": 0.39,
325
+ "learning_rate": 7.885523216418312e-05,
326
+ "loss": 2.4418,
327
  "step": 26500
328
  },
329
  {
330
  "epoch": 0.4,
331
+ "learning_rate": 7.881100079375742e-05,
332
+ "loss": 2.3653,
333
  "step": 27000
334
  },
335
  {
336
  "epoch": 0.4,
337
+ "learning_rate": 7.876594392292848e-05,
338
+ "loss": 2.5256,
339
  "step": 27500
340
  },
341
  {
342
  "epoch": 0.41,
343
+ "learning_rate": 7.872006251004482e-05,
344
+ "loss": 2.549,
345
  "step": 28000
346
  },
347
  {
348
  "epoch": 0.42,
349
+ "learning_rate": 7.867335753099278e-05,
350
+ "loss": 2.4543,
351
  "step": 28500
352
  },
353
  {
354
  "epoch": 0.43,
355
+ "learning_rate": 7.86258299791757e-05,
356
+ "loss": 2.4647,
357
  "step": 29000
358
  },
359
  {
360
  "epoch": 0.43,
361
+ "learning_rate": 7.857748086549292e-05,
362
+ "loss": 2.5375,
363
  "step": 29500
364
  },
365
  {
366
  "epoch": 0.44,
367
+ "learning_rate": 7.852831121831812e-05,
368
+ "loss": 2.4895,
369
  "step": 30000
370
  },
371
  {
372
  "epoch": 0.45,
373
+ "learning_rate": 7.847832208347754e-05,
374
+ "loss": 2.4156,
375
  "step": 30500
376
  },
377
  {
378
  "epoch": 0.45,
379
+ "learning_rate": 7.842751452422775e-05,
380
+ "loss": 2.5006,
381
  "step": 31000
382
  },
383
  {
384
  "epoch": 0.46,
385
+ "learning_rate": 7.8375889621233e-05,
386
+ "loss": 2.459,
387
  "step": 31500
388
  },
389
  {
390
  "epoch": 0.47,
391
+ "learning_rate": 7.83234484725422e-05,
392
+ "loss": 2.469,
393
  "step": 32000
394
  },
395
  {
396
  "epoch": 0.48,
397
+ "learning_rate": 7.827019219356568e-05,
398
+ "loss": 2.4331,
399
  "step": 32500
400
  },
401
  {
402
  "epoch": 0.48,
403
+ "learning_rate": 7.821612191705128e-05,
404
+ "loss": 2.487,
405
  "step": 33000
406
  },
407
  {
408
  "epoch": 0.49,
409
+ "learning_rate": 7.816123879306048e-05,
410
+ "loss": 2.5139,
411
  "step": 33500
412
  },
413
  {
414
  "epoch": 0.5,
415
+ "learning_rate": 7.810554398894376e-05,
416
+ "loss": 2.5117,
417
  "step": 34000
418
  },
419
  {
420
  "epoch": 0.51,
421
+ "learning_rate": 7.804903868931584e-05,
422
+ "loss": 2.5537,
423
  "step": 34500
424
  },
425
  {
426
  "epoch": 0.51,
427
+ "learning_rate": 7.79917240960305e-05,
428
+ "loss": 2.4394,
429
  "step": 35000
430
  },
431
  {
432
  "epoch": 0.52,
433
+ "learning_rate": 7.7933601428155e-05,
434
+ "loss": 2.4285,
435
  "step": 35500
436
  },
437
  {
438
  "epoch": 0.53,
439
+ "learning_rate": 7.78746719219441e-05,
440
+ "loss": 2.5693,
441
  "step": 36000
442
  },
443
  {
444
  "epoch": 0.54,
445
+ "learning_rate": 7.781493683081388e-05,
446
+ "loss": 2.4932,
447
  "step": 36500
448
  },
449
  {
450
  "epoch": 0.54,
451
+ "learning_rate": 7.775439742531495e-05,
452
+ "loss": 2.4468,
453
  "step": 37000
454
  },
455
  {
456
  "epoch": 0.55,
457
+ "learning_rate": 7.769305499310553e-05,
458
+ "loss": 2.4614,
459
  "step": 37500
460
  },
461
  {
462
  "epoch": 0.56,
463
+ "learning_rate": 7.763091083892402e-05,
464
+ "loss": 2.6549,
465
  "step": 38000
466
  },
467
  {
468
  "epoch": 0.56,
469
+ "learning_rate": 7.756796628456121e-05,
470
+ "loss": 2.4871,
471
  "step": 38500
472
  },
473
  {
474
  "epoch": 0.57,
475
+ "learning_rate": 7.750422266883222e-05,
476
+ "loss": 2.413,
477
  "step": 39000
478
  },
479
  {
480
  "epoch": 0.58,
481
+ "learning_rate": 7.743968134754806e-05,
482
+ "loss": 2.3566,
483
  "step": 39500
484
  },
485
  {
486
  "epoch": 0.59,
487
+ "learning_rate": 7.737434369348664e-05,
488
+ "loss": 2.4936,
489
  "step": 40000
490
  },
491
  {
492
  "epoch": 0.59,
493
+ "learning_rate": 7.730821109636379e-05,
494
+ "loss": 2.453,
495
  "step": 40500
496
  },
497
  {
498
  "epoch": 0.6,
499
+ "learning_rate": 7.724128496280346e-05,
500
+ "loss": 2.4851,
501
  "step": 41000
502
  },
503
  {
504
  "epoch": 0.61,
505
+ "learning_rate": 7.717356671630802e-05,
506
+ "loss": 2.4564,
507
  "step": 41500
508
  },
509
  {
510
  "epoch": 0.62,
511
+ "learning_rate": 7.710505779722786e-05,
512
+ "loss": 2.5133,
513
  "step": 42000
514
  },
515
  {
516
  "epoch": 0.62,
517
+ "learning_rate": 7.703575966273073e-05,
518
+ "loss": 2.4546,
519
  "step": 42500
520
  },
521
  {
522
  "epoch": 0.63,
523
+ "learning_rate": 7.696567378677089e-05,
524
+ "loss": 2.5157,
525
  "step": 43000
526
  },
527
  {
528
  "epoch": 0.64,
529
+ "learning_rate": 7.689480166005756e-05,
530
+ "loss": 2.4248,
531
  "step": 43500
532
  },
533
  {
534
  "epoch": 0.64,
535
+ "learning_rate": 7.682314479002344e-05,
536
+ "loss": 2.4853,
537
  "step": 44000
538
  },
539
  {
540
  "epoch": 0.65,
541
+ "learning_rate": 7.67507047007924e-05,
542
+ "loss": 2.4615,
543
  "step": 44500
544
  },
545
  {
546
  "epoch": 0.66,
547
+ "learning_rate": 7.667748293314729e-05,
548
+ "loss": 2.5391,
549
  "step": 45000
550
  },
551
  {
552
  "epoch": 0.67,
553
+ "learning_rate": 7.6603481044497e-05,
554
+ "loss": 2.4464,
555
  "step": 45500
556
  },
557
  {
558
  "epoch": 0.67,
559
+ "learning_rate": 7.652870060884345e-05,
560
+ "loss": 2.4941,
561
  "step": 46000
562
  },
563
  {
564
  "epoch": 0.68,
565
+ "learning_rate": 7.645314321674803e-05,
566
+ "loss": 2.4708,
567
  "step": 46500
568
  },
569
  {
570
  "epoch": 0.69,
571
+ "learning_rate": 7.637681047529781e-05,
572
+ "loss": 2.5972,
573
  "step": 47000
574
  },
575
  {
576
  "epoch": 0.7,
577
+ "learning_rate": 7.629970400807136e-05,
578
+ "loss": 2.5369,
579
  "step": 47500
580
  },
581
  {
582
  "epoch": 0.7,
583
+ "learning_rate": 7.622182545510419e-05,
584
+ "loss": 2.4348,
585
  "step": 48000
586
  },
587
  {
588
  "epoch": 0.71,
589
+ "learning_rate": 7.61431764728539e-05,
590
+ "loss": 2.4546,
591
  "step": 48500
592
  },
593
  {
594
  "epoch": 0.72,
595
+ "learning_rate": 7.606375873416491e-05,
596
+ "loss": 2.4378,
597
  "step": 49000
598
  },
599
  {
600
  "epoch": 0.73,
601
+ "learning_rate": 7.598357392823292e-05,
602
+ "loss": 2.471,
603
  "step": 49500
604
  },
605
  {
606
  "epoch": 0.73,
607
+ "learning_rate": 7.590262376056896e-05,
608
+ "loss": 2.4677,
609
  "step": 50000
610
  },
611
  {
612
  "epoch": 0.74,
613
+ "learning_rate": 7.58209099529631e-05,
614
+ "loss": 2.4245,
615
  "step": 50500
616
  },
617
  {
618
  "epoch": 0.75,
619
+ "learning_rate": 7.573843424344783e-05,
620
+ "loss": 2.4734,
621
  "step": 51000
622
  },
623
  {
624
  "epoch": 0.75,
625
+ "learning_rate": 7.565519838626113e-05,
626
+ "loss": 2.4158,
627
  "step": 51500
628
  },
629
  {
630
  "epoch": 0.76,
631
+ "learning_rate": 7.557120415180916e-05,
632
+ "loss": 2.5098,
633
  "step": 52000
634
  },
635
  {
636
  "epoch": 0.77,
637
+ "learning_rate": 7.548645332662853e-05,
638
+ "loss": 2.5478,
639
  "step": 52500
640
  },
641
  {
642
  "epoch": 0.78,
643
+ "learning_rate": 7.540094771334835e-05,
644
+ "loss": 2.4502,
645
  "step": 53000
646
  },
647
  {
648
  "epoch": 0.78,
649
+ "learning_rate": 7.531468913065192e-05,
650
+ "loss": 2.4264,
651
  "step": 53500
652
  },
653
  {
654
  "epoch": 0.79,
655
+ "learning_rate": 7.522767941323798e-05,
656
+ "loss": 2.4903,
657
  "step": 54000
658
  },
659
  {
660
  "epoch": 0.8,
661
+ "learning_rate": 7.513992041178174e-05,
662
+ "loss": 2.4486,
663
  "step": 54500
664
  },
665
  {
666
  "epoch": 0.81,
667
+ "learning_rate": 7.505141399289549e-05,
668
+ "loss": 2.5171,
669
  "step": 55000
670
  },
671
  {
672
  "epoch": 0.81,
673
+ "learning_rate": 7.496216203908891e-05,
674
+ "loss": 2.5396,
675
  "step": 55500
676
  },
677
  {
678
  "epoch": 0.82,
679
+ "learning_rate": 7.487216644872901e-05,
680
+ "loss": 2.4514,
681
  "step": 56000
682
  },
683
  {
684
  "epoch": 0.83,
685
+ "learning_rate": 7.478142913599978e-05,
686
+ "loss": 2.4017,
687
  "step": 56500
688
  },
689
  {
690
  "epoch": 0.84,
691
+ "learning_rate": 7.468995203086146e-05,
692
+ "loss": 2.4591,
693
  "step": 57000
694
  },
695
  {
696
  "epoch": 0.84,
697
+ "learning_rate": 7.459773707900946e-05,
698
+ "loss": 2.5764,
699
  "step": 57500
700
  },
701
  {
702
  "epoch": 0.85,
703
+ "learning_rate": 7.450478624183306e-05,
704
+ "loss": 2.5013,
705
  "step": 58000
706
  },
707
  {
708
  "epoch": 0.86,
709
+ "learning_rate": 7.441110149637363e-05,
710
+ "loss": 2.51,
711
  "step": 58500
712
  },
713
  {
714
  "epoch": 0.86,
715
+ "learning_rate": 7.431668483528254e-05,
716
+ "loss": 2.3992,
717
  "step": 59000
718
  },
719
  {
720
  "epoch": 0.87,
721
+ "learning_rate": 7.422153826677887e-05,
722
+ "loss": 2.4671,
723
  "step": 59500
724
  },
725
  {
726
  "epoch": 0.88,
727
+ "learning_rate": 7.412566381460662e-05,
728
+ "loss": 2.5362,
729
  "step": 60000
730
  },
731
  {
732
  "epoch": 0.89,
733
+ "learning_rate": 7.402906351799175e-05,
734
+ "loss": 2.4981,
735
  "step": 60500
736
  },
737
  {
738
  "epoch": 0.89,
739
+ "learning_rate": 7.39317394315987e-05,
740
+ "loss": 2.4574,
741
  "step": 61000
742
  },
743
  {
744
  "epoch": 0.9,
745
+ "learning_rate": 7.383369362548674e-05,
746
+ "loss": 2.4777,
747
  "step": 61500
748
  },
749
  {
750
  "epoch": 0.91,
751
+ "learning_rate": 7.373492818506597e-05,
752
+ "loss": 2.4628,
753
  "step": 62000
754
  },
755
  {
756
  "epoch": 0.92,
757
+ "learning_rate": 7.363544521105292e-05,
758
+ "loss": 2.4506,
759
  "step": 62500
760
  },
761
  {
762
  "epoch": 0.92,
763
+ "learning_rate": 7.353524681942585e-05,
764
+ "loss": 2.5644,
765
  "step": 63000
766
  },
767
  {
768
  "epoch": 0.93,
769
+ "learning_rate": 7.343433514137987e-05,
770
+ "loss": 2.5131,
771
  "step": 63500
772
  },
773
  {
774
  "epoch": 0.94,
775
+ "learning_rate": 7.333271232328141e-05,
776
+ "loss": 2.5575,
777
  "step": 64000
778
  },
779
  {
780
  "epoch": 0.95,
781
+ "learning_rate": 7.32303805266227e-05,
782
+ "loss": 2.5901,
783
  "step": 64500
784
  },
785
  {
786
  "epoch": 0.95,
787
+ "learning_rate": 7.312734192797583e-05,
788
+ "loss": 2.5701,
789
  "step": 65000
790
  },
791
  {
792
  "epoch": 0.96,
793
+ "learning_rate": 7.302359871894635e-05,
794
+ "loss": 2.5201,
795
  "step": 65500
796
  },
797
  {
798
  "epoch": 0.97,
799
+ "learning_rate": 7.291915310612666e-05,
800
+ "loss": 2.5402,
801
  "step": 66000
802
  },
803
  {
804
  "epoch": 0.97,
805
+ "learning_rate": 7.281400731104918e-05,
806
+ "loss": 2.4797,
807
  "step": 66500
808
  },
809
  {
810
  "epoch": 0.98,
811
+ "learning_rate": 7.2708163570139e-05,
812
+ "loss": 2.4836,
813
  "step": 67000
814
  },
815
  {
816
  "epoch": 0.99,
817
+ "learning_rate": 7.260162413466636e-05,
818
+ "loss": 2.4707,
819
  "step": 67500
820
  },
821
  {
822
  "epoch": 1.0,
823
+ "learning_rate": 7.249439127069873e-05,
824
+ "loss": 2.4334,
825
  "step": 68000
826
  },
827
  {
 
829
  "eval_bleu": 1.0,
830
  "eval_brevity_penalty": 1.0,
831
  "eval_length_ratio": 1.0,
832
+ "eval_loss": 2.243044853210449,
833
  "eval_precisions": [
834
  1.0,
835
  1.0,
 
837
  1.0
838
  ],
839
  "eval_reference_length": 7761920,
840
+ "eval_runtime": 17988.3779,
841
+ "eval_samples_per_second": 0.843,
842
+ "eval_steps_per_second": 0.421,
843
  "eval_translation_length": 7761920,
844
  "step": 68219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
845
  }
846
  ],
847
  "logging_steps": 500,
848
  "max_steps": 341095,
849
  "num_train_epochs": 5,
850
+ "save_steps": 5000,
851
+ "total_flos": 3.144579296777994e+17,
852
  "trial_name": null,
853
  "trial_params": null
854
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a742f70b0846e59a06963ff7344d674f0f22eef8791af5874a171f202b5ca21
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcc6271f614588d827bad076034b6eed6bf1dc0f937c1c713e0d7ec1959bdb0d
3
  size 4728