dq158 commited on
Commit
b6da314
·
1 Parent(s): 2909c60

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:021b25b4ae02b4c67260af9aaa828a04ad34d01af99d0da4a565af18e7741c65
3
  size 37789864
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f805d53d7dddec9940b8dda0a4fc6f84c5e194e588072b3124f4b98d4dba6b2d
3
  size 37789864
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f1f2c45e7d776d5a5ad5cc885d98e86d0067fec7d8d3681868cb22bdd658413
3
  size 2622266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:138083fcab266a12c8bdf751a269df328c6c9ed84e6b49b0dd7314b5b256a7c2
3
  size 2622266
last-checkpoint/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 3.0806901454925537,
3
  "best_model_checkpoint": "dq158/pingusPongus/checkpoint-68803",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
@@ -11,823 +11,823 @@
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 0.0001,
14
- "loss": 4.4427,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 9.999998551451928e-05,
20
- "loss": 3.7748,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.02,
25
  "learning_rate": 9.999994205808551e-05,
26
- "loss": 3.6469,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.03,
31
  "learning_rate": 9.999986963072388e-05,
32
- "loss": 3.6827,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 0.04,
37
  "learning_rate": 9.999976823247632e-05,
38
- "loss": 3.6186,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 0.04,
43
  "learning_rate": 9.999963786340163e-05,
44
- "loss": 3.5199,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 0.05,
49
  "learning_rate": 9.999947852357531e-05,
50
- "loss": 3.5221,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 0.06,
55
  "learning_rate": 9.999929021308971e-05,
56
- "loss": 3.469,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 0.07,
61
  "learning_rate": 9.999907293205393e-05,
62
- "loss": 3.5388,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 0.07,
67
  "learning_rate": 9.999882668059387e-05,
68
- "loss": 3.4063,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 0.08,
73
  "learning_rate": 9.99985514588522e-05,
74
- "loss": 3.4314,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 0.09,
79
  "learning_rate": 9.99982472669884e-05,
80
- "loss": 3.4252,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 0.09,
85
  "learning_rate": 9.999791410517874e-05,
86
- "loss": 3.4736,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 0.1,
91
  "learning_rate": 9.999755197361624e-05,
92
- "loss": 3.4011,
93
  "step": 7000
94
  },
95
  {
96
  "epoch": 0.11,
97
  "learning_rate": 9.999716087251072e-05,
98
- "loss": 3.4709,
99
  "step": 7500
100
  },
101
  {
102
  "epoch": 0.12,
103
  "learning_rate": 9.99967408020888e-05,
104
- "loss": 3.401,
105
  "step": 8000
106
  },
107
  {
108
  "epoch": 0.12,
109
  "learning_rate": 9.999629176259391e-05,
110
- "loss": 3.4339,
111
  "step": 8500
112
  },
113
  {
114
  "epoch": 0.13,
115
  "learning_rate": 9.999581375428617e-05,
116
- "loss": 3.3573,
117
  "step": 9000
118
  },
119
  {
120
  "epoch": 0.14,
121
  "learning_rate": 9.999530677744258e-05,
122
- "loss": 3.364,
123
  "step": 9500
124
  },
125
  {
126
  "epoch": 0.15,
127
  "learning_rate": 9.999477083235691e-05,
128
- "loss": 3.4358,
129
  "step": 10000
130
  },
131
  {
132
  "epoch": 0.15,
133
  "learning_rate": 9.999420591933965e-05,
134
- "loss": 3.369,
135
  "step": 10500
136
  },
137
  {
138
  "epoch": 0.16,
139
  "learning_rate": 9.999361203871817e-05,
140
- "loss": 3.3874,
141
  "step": 11000
142
  },
143
  {
144
  "epoch": 0.17,
145
  "learning_rate": 9.999298919083656e-05,
146
- "loss": 3.3604,
147
  "step": 11500
148
  },
149
  {
150
  "epoch": 0.17,
151
  "learning_rate": 9.99923373760557e-05,
152
- "loss": 3.3134,
153
  "step": 12000
154
  },
155
  {
156
  "epoch": 0.18,
157
  "learning_rate": 9.999165659475324e-05,
158
- "loss": 3.3988,
159
  "step": 12500
160
  },
161
  {
162
  "epoch": 0.19,
163
  "learning_rate": 9.999094684732369e-05,
164
- "loss": 3.3562,
165
  "step": 13000
166
  },
167
  {
168
  "epoch": 0.2,
169
  "learning_rate": 9.999020813417826e-05,
170
- "loss": 3.4156,
171
  "step": 13500
172
  },
173
  {
174
  "epoch": 0.2,
175
  "learning_rate": 9.998944045574499e-05,
176
- "loss": 3.2524,
177
  "step": 14000
178
  },
179
  {
180
  "epoch": 0.21,
181
  "learning_rate": 9.998864381246869e-05,
182
- "loss": 3.4463,
183
  "step": 14500
184
  },
185
  {
186
  "epoch": 0.22,
187
  "learning_rate": 9.998781820481091e-05,
188
- "loss": 3.3492,
189
  "step": 15000
190
  },
191
  {
192
  "epoch": 0.23,
193
  "learning_rate": 9.998696363325009e-05,
194
- "loss": 3.4512,
195
  "step": 15500
196
  },
197
  {
198
  "epoch": 0.23,
199
  "learning_rate": 9.998608009828132e-05,
200
- "loss": 3.3218,
201
  "step": 16000
202
  },
203
  {
204
  "epoch": 0.24,
205
  "learning_rate": 9.998516760041659e-05,
206
- "loss": 3.2985,
207
  "step": 16500
208
  },
209
  {
210
  "epoch": 0.25,
211
  "learning_rate": 9.998422614018456e-05,
212
- "loss": 3.3771,
213
  "step": 17000
214
  },
215
  {
216
  "epoch": 0.25,
217
  "learning_rate": 9.998325571813079e-05,
218
- "loss": 3.3023,
219
  "step": 17500
220
  },
221
  {
222
  "epoch": 0.26,
223
  "learning_rate": 9.998225633481753e-05,
224
- "loss": 3.2226,
225
  "step": 18000
226
  },
227
  {
228
  "epoch": 0.27,
229
  "learning_rate": 9.998122799082386e-05,
230
- "loss": 3.3422,
231
  "step": 18500
232
  },
233
  {
234
  "epoch": 0.28,
235
  "learning_rate": 9.998017068674558e-05,
236
- "loss": 3.3089,
237
  "step": 19000
238
  },
239
  {
240
  "epoch": 0.28,
241
  "learning_rate": 9.997908442319536e-05,
242
- "loss": 3.2337,
243
  "step": 19500
244
  },
245
  {
246
  "epoch": 0.29,
247
  "learning_rate": 9.99779692008026e-05,
248
- "loss": 3.3586,
249
  "step": 20000
250
  },
251
  {
252
  "epoch": 0.3,
253
  "learning_rate": 9.997682502021345e-05,
254
- "loss": 3.2019,
255
  "step": 20500
256
  },
257
  {
258
  "epoch": 0.31,
259
  "learning_rate": 9.997565188209089e-05,
260
- "loss": 3.2937,
261
  "step": 21000
262
  },
263
  {
264
  "epoch": 0.31,
265
  "learning_rate": 9.997444978711465e-05,
266
- "loss": 3.4064,
267
  "step": 21500
268
  },
269
  {
270
  "epoch": 0.32,
271
  "learning_rate": 9.997321873598125e-05,
272
- "loss": 3.339,
273
  "step": 22000
274
  },
275
  {
276
  "epoch": 0.33,
277
  "learning_rate": 9.9971958729404e-05,
278
- "loss": 3.3237,
279
  "step": 22500
280
  },
281
  {
282
  "epoch": 0.33,
283
  "learning_rate": 9.997066976811294e-05,
284
- "loss": 3.3782,
285
  "step": 23000
286
  },
287
  {
288
  "epoch": 0.34,
289
  "learning_rate": 9.996935185285495e-05,
290
- "loss": 3.336,
291
  "step": 23500
292
  },
293
  {
294
  "epoch": 0.35,
295
  "learning_rate": 9.996800498439362e-05,
296
- "loss": 3.1749,
297
  "step": 24000
298
  },
299
  {
300
  "epoch": 0.36,
301
  "learning_rate": 9.99666291635094e-05,
302
- "loss": 3.252,
303
  "step": 24500
304
  },
305
  {
306
  "epoch": 0.36,
307
  "learning_rate": 9.996522439099943e-05,
308
- "loss": 3.3544,
309
  "step": 25000
310
  },
311
  {
312
  "epoch": 0.37,
313
  "learning_rate": 9.99637906676777e-05,
314
- "loss": 3.2898,
315
  "step": 25500
316
  },
317
  {
318
  "epoch": 0.38,
319
  "learning_rate": 9.996232799437487e-05,
320
- "loss": 3.2753,
321
  "step": 26000
322
  },
323
  {
324
  "epoch": 0.39,
325
  "learning_rate": 9.996083637193849e-05,
326
- "loss": 3.3539,
327
  "step": 26500
328
  },
329
  {
330
  "epoch": 0.39,
331
  "learning_rate": 9.995931580123284e-05,
332
- "loss": 3.2567,
333
  "step": 27000
334
  },
335
  {
336
  "epoch": 0.4,
337
  "learning_rate": 9.995776628313896e-05,
338
- "loss": 3.1842,
339
  "step": 27500
340
  },
341
  {
342
  "epoch": 0.41,
343
  "learning_rate": 9.995618781855464e-05,
344
- "loss": 3.2446,
345
  "step": 28000
346
  },
347
  {
348
  "epoch": 0.41,
349
  "learning_rate": 9.995458040839452e-05,
350
- "loss": 3.2132,
351
  "step": 28500
352
  },
353
  {
354
  "epoch": 0.42,
355
  "learning_rate": 9.995294405358993e-05,
356
- "loss": 3.2992,
357
  "step": 29000
358
  },
359
  {
360
  "epoch": 0.43,
361
  "learning_rate": 9.995127875508903e-05,
362
- "loss": 3.2555,
363
  "step": 29500
364
  },
365
  {
366
  "epoch": 0.44,
367
  "learning_rate": 9.99495845138567e-05,
368
- "loss": 3.3704,
369
  "step": 30000
370
  },
371
  {
372
  "epoch": 0.44,
373
  "learning_rate": 9.994786133087464e-05,
374
- "loss": 3.2629,
375
  "step": 30500
376
  },
377
  {
378
  "epoch": 0.45,
379
  "learning_rate": 9.994610920714126e-05,
380
- "loss": 3.224,
381
  "step": 31000
382
  },
383
  {
384
  "epoch": 0.46,
385
  "learning_rate": 9.994432814367183e-05,
386
- "loss": 3.31,
387
  "step": 31500
388
  },
389
  {
390
  "epoch": 0.47,
391
  "learning_rate": 9.99425181414983e-05,
392
- "loss": 3.2763,
393
  "step": 32000
394
  },
395
  {
396
  "epoch": 0.47,
397
  "learning_rate": 9.994067920166939e-05,
398
- "loss": 3.2862,
399
  "step": 32500
400
  },
401
  {
402
  "epoch": 0.48,
403
  "learning_rate": 9.993881132525067e-05,
404
- "loss": 3.3125,
405
  "step": 33000
406
  },
407
  {
408
  "epoch": 0.49,
409
  "learning_rate": 9.993691451332439e-05,
410
- "loss": 3.1288,
411
  "step": 33500
412
  },
413
  {
414
  "epoch": 0.49,
415
  "learning_rate": 9.993498876698963e-05,
416
- "loss": 3.2958,
417
  "step": 34000
418
  },
419
  {
420
  "epoch": 0.5,
421
  "learning_rate": 9.993303408736217e-05,
422
- "loss": 3.1933,
423
  "step": 34500
424
  },
425
  {
426
  "epoch": 0.51,
427
  "learning_rate": 9.993105047557461e-05,
428
- "loss": 3.2504,
429
  "step": 35000
430
  },
431
  {
432
  "epoch": 0.52,
433
  "learning_rate": 9.992903793277628e-05,
434
- "loss": 3.3293,
435
  "step": 35500
436
  },
437
  {
438
  "epoch": 0.52,
439
  "learning_rate": 9.99269964601333e-05,
440
- "loss": 3.1969,
441
  "step": 36000
442
  },
443
  {
444
  "epoch": 0.53,
445
  "learning_rate": 9.992492605882853e-05,
446
- "loss": 3.2087,
447
  "step": 36500
448
  },
449
  {
450
  "epoch": 0.54,
451
  "learning_rate": 9.99228267300616e-05,
452
- "loss": 3.3125,
453
  "step": 37000
454
  },
455
  {
456
  "epoch": 0.55,
457
  "learning_rate": 9.992069847504891e-05,
458
- "loss": 3.2677,
459
  "step": 37500
460
  },
461
  {
462
  "epoch": 0.55,
463
  "learning_rate": 9.99185412950236e-05,
464
- "loss": 3.2132,
465
  "step": 38000
466
  },
467
  {
468
  "epoch": 0.56,
469
  "learning_rate": 9.991635519123559e-05,
470
- "loss": 3.2534,
471
  "step": 38500
472
  },
473
  {
474
  "epoch": 0.57,
475
  "learning_rate": 9.991414016495155e-05,
476
- "loss": 3.1735,
477
  "step": 39000
478
  },
479
  {
480
  "epoch": 0.57,
481
  "learning_rate": 9.99118962174549e-05,
482
- "loss": 3.2422,
483
  "step": 39500
484
  },
485
  {
486
  "epoch": 0.58,
487
  "learning_rate": 9.990962335004584e-05,
488
- "loss": 3.1625,
489
  "step": 40000
490
  },
491
  {
492
  "epoch": 0.59,
493
  "learning_rate": 9.99073215640413e-05,
494
- "loss": 3.1163,
495
  "step": 40500
496
  },
497
  {
498
  "epoch": 0.6,
499
  "learning_rate": 9.990499086077498e-05,
500
- "loss": 3.1521,
501
  "step": 41000
502
  },
503
  {
504
  "epoch": 0.6,
505
  "learning_rate": 9.990263124159736e-05,
506
- "loss": 3.2036,
507
  "step": 41500
508
  },
509
  {
510
  "epoch": 0.61,
511
  "learning_rate": 9.990024270787561e-05,
512
- "loss": 3.181,
513
  "step": 42000
514
  },
515
  {
516
  "epoch": 0.62,
517
  "learning_rate": 9.989782526099372e-05,
518
- "loss": 3.1672,
519
  "step": 42500
520
  },
521
  {
522
  "epoch": 0.62,
523
  "learning_rate": 9.989537890235238e-05,
524
- "loss": 3.2336,
525
  "step": 43000
526
  },
527
  {
528
  "epoch": 0.63,
529
  "learning_rate": 9.989290363336908e-05,
530
- "loss": 3.1455,
531
  "step": 43500
532
  },
533
  {
534
  "epoch": 0.64,
535
  "learning_rate": 9.989039945547803e-05,
536
- "loss": 3.1859,
537
  "step": 44000
538
  },
539
  {
540
  "epoch": 0.65,
541
  "learning_rate": 9.98878663701302e-05,
542
- "loss": 3.1396,
543
  "step": 44500
544
  },
545
  {
546
  "epoch": 0.65,
547
  "learning_rate": 9.988530437879333e-05,
548
- "loss": 3.2585,
549
  "step": 45000
550
  },
551
  {
552
  "epoch": 0.66,
553
  "learning_rate": 9.988271348295184e-05,
554
- "loss": 3.2201,
555
  "step": 45500
556
  },
557
  {
558
  "epoch": 0.67,
559
  "learning_rate": 9.988009368410698e-05,
560
- "loss": 3.2758,
561
  "step": 46000
562
  },
563
  {
564
  "epoch": 0.68,
565
  "learning_rate": 9.98774449837767e-05,
566
- "loss": 3.1742,
567
  "step": 46500
568
  },
569
  {
570
  "epoch": 0.68,
571
  "learning_rate": 9.987476738349571e-05,
572
- "loss": 3.3212,
573
  "step": 47000
574
  },
575
  {
576
  "epoch": 0.69,
577
  "learning_rate": 9.987206088481545e-05,
578
- "loss": 3.1915,
579
  "step": 47500
580
  },
581
  {
582
  "epoch": 0.7,
583
  "learning_rate": 9.986932548930414e-05,
584
- "loss": 3.1608,
585
  "step": 48000
586
  },
587
  {
588
  "epoch": 0.7,
589
  "learning_rate": 9.986656119854672e-05,
590
- "loss": 3.217,
591
  "step": 48500
592
  },
593
  {
594
  "epoch": 0.71,
595
  "learning_rate": 9.986376801414485e-05,
596
- "loss": 3.1989,
597
  "step": 49000
598
  },
599
  {
600
  "epoch": 0.72,
601
  "learning_rate": 9.986094593771699e-05,
602
- "loss": 3.3067,
603
  "step": 49500
604
  },
605
  {
606
  "epoch": 0.73,
607
  "learning_rate": 9.985809497089827e-05,
608
- "loss": 3.2195,
609
  "step": 50000
610
  },
611
  {
612
  "epoch": 0.73,
613
  "learning_rate": 9.985521511534062e-05,
614
- "loss": 3.148,
615
  "step": 50500
616
  },
617
  {
618
  "epoch": 0.74,
619
  "learning_rate": 9.985230637271266e-05,
620
- "loss": 3.1987,
621
  "step": 51000
622
  },
623
  {
624
  "epoch": 0.75,
625
  "learning_rate": 9.984936874469979e-05,
626
- "loss": 3.1153,
627
  "step": 51500
628
  },
629
  {
630
  "epoch": 0.76,
631
  "learning_rate": 9.984640223300413e-05,
632
- "loss": 3.2841,
633
  "step": 52000
634
  },
635
  {
636
  "epoch": 0.76,
637
  "learning_rate": 9.98434068393445e-05,
638
- "loss": 3.2033,
639
  "step": 52500
640
  },
641
  {
642
  "epoch": 0.77,
643
  "learning_rate": 9.984038256545653e-05,
644
- "loss": 3.2102,
645
  "step": 53000
646
  },
647
  {
648
  "epoch": 0.78,
649
  "learning_rate": 9.983732941309253e-05,
650
- "loss": 3.1817,
651
  "step": 53500
652
  },
653
  {
654
  "epoch": 0.78,
655
  "learning_rate": 9.983424738402156e-05,
656
- "loss": 3.1485,
657
  "step": 54000
658
  },
659
  {
660
  "epoch": 0.79,
661
  "learning_rate": 9.98311364800294e-05,
662
- "loss": 3.2417,
663
  "step": 54500
664
  },
665
  {
666
  "epoch": 0.8,
667
  "learning_rate": 9.982799670291857e-05,
668
- "loss": 3.2174,
669
  "step": 55000
670
  },
671
  {
672
  "epoch": 0.81,
673
  "learning_rate": 9.98248280545083e-05,
674
- "loss": 3.2862,
675
  "step": 55500
676
  },
677
  {
678
  "epoch": 0.81,
679
  "learning_rate": 9.982163053663459e-05,
680
- "loss": 3.201,
681
  "step": 56000
682
  },
683
  {
684
  "epoch": 0.82,
685
  "learning_rate": 9.981840415115014e-05,
686
- "loss": 3.3873,
687
  "step": 56500
688
  },
689
  {
690
  "epoch": 0.83,
691
  "learning_rate": 9.981514889992436e-05,
692
- "loss": 3.1844,
693
  "step": 57000
694
  },
695
  {
696
  "epoch": 0.84,
697
  "learning_rate": 9.981186478484344e-05,
698
- "loss": 3.1807,
699
  "step": 57500
700
  },
701
  {
702
  "epoch": 0.84,
703
  "learning_rate": 9.980855180781021e-05,
704
- "loss": 3.2758,
705
  "step": 58000
706
  },
707
  {
708
  "epoch": 0.85,
709
  "learning_rate": 9.980520997074432e-05,
710
- "loss": 3.1406,
711
  "step": 58500
712
  },
713
  {
714
  "epoch": 0.86,
715
  "learning_rate": 9.980183927558207e-05,
716
- "loss": 3.1607,
717
  "step": 59000
718
  },
719
  {
720
  "epoch": 0.86,
721
  "learning_rate": 9.97984397242765e-05,
722
- "loss": 3.1829,
723
  "step": 59500
724
  },
725
  {
726
  "epoch": 0.87,
727
  "learning_rate": 9.979501131879741e-05,
728
- "loss": 3.2238,
729
  "step": 60000
730
  },
731
  {
732
  "epoch": 0.88,
733
  "learning_rate": 9.979155406113124e-05,
734
- "loss": 3.2348,
735
  "step": 60500
736
  },
737
  {
738
  "epoch": 0.89,
739
  "learning_rate": 9.978806795328121e-05,
740
- "loss": 3.2933,
741
  "step": 61000
742
  },
743
  {
744
  "epoch": 0.89,
745
  "learning_rate": 9.978455299726726e-05,
746
- "loss": 3.2051,
747
  "step": 61500
748
  },
749
  {
750
  "epoch": 0.9,
751
  "learning_rate": 9.978100919512598e-05,
752
- "loss": 3.1736,
753
  "step": 62000
754
  },
755
  {
756
  "epoch": 0.91,
757
  "learning_rate": 9.977743654891077e-05,
758
- "loss": 3.173,
759
  "step": 62500
760
  },
761
  {
762
  "epoch": 0.92,
763
  "learning_rate": 9.977383506069164e-05,
764
- "loss": 3.2732,
765
  "step": 63000
766
  },
767
  {
768
  "epoch": 0.92,
769
  "learning_rate": 9.977020473255539e-05,
770
- "loss": 3.2447,
771
  "step": 63500
772
  },
773
  {
774
  "epoch": 0.93,
775
  "learning_rate": 9.976654556660548e-05,
776
- "loss": 3.2526,
777
  "step": 64000
778
  },
779
  {
780
  "epoch": 0.94,
781
  "learning_rate": 9.976285756496211e-05,
782
- "loss": 3.1814,
783
  "step": 64500
784
  },
785
  {
786
  "epoch": 0.94,
787
  "learning_rate": 9.97591407297622e-05,
788
- "loss": 3.1533,
789
  "step": 65000
790
  },
791
  {
792
  "epoch": 0.95,
793
  "learning_rate": 9.975539506315933e-05,
794
- "loss": 3.0491,
795
  "step": 65500
796
  },
797
  {
798
  "epoch": 0.96,
799
  "learning_rate": 9.975162056732385e-05,
800
- "loss": 3.1918,
801
  "step": 66000
802
  },
803
  {
804
  "epoch": 0.97,
805
  "learning_rate": 9.974781724444272e-05,
806
- "loss": 3.1918,
807
  "step": 66500
808
  },
809
  {
810
  "epoch": 0.97,
811
  "learning_rate": 9.974398509671969e-05,
812
- "loss": 3.1769,
813
  "step": 67000
814
  },
815
  {
816
  "epoch": 0.98,
817
  "learning_rate": 9.974012412637517e-05,
818
- "loss": 3.196,
819
  "step": 67500
820
  },
821
  {
822
  "epoch": 0.99,
823
  "learning_rate": 9.97362343356463e-05,
824
- "loss": 3.1799,
825
  "step": 68000
826
  },
827
  {
828
  "epoch": 1.0,
829
  "learning_rate": 9.973231572678686e-05,
830
- "loss": 3.2516,
831
  "step": 68500
832
  },
833
  {
@@ -835,7 +835,7 @@
835
  "eval_bleu": 1.0,
836
  "eval_brevity_penalty": 1.0,
837
  "eval_length_ratio": 1.0,
838
- "eval_loss": 3.0806901454925537,
839
  "eval_precisions": [
840
  1.0,
841
  1.0,
@@ -843,9 +843,9 @@
843
  1.0
844
  ],
845
  "eval_reference_length": 7828480,
846
- "eval_runtime": 21022.3759,
847
- "eval_samples_per_second": 0.727,
848
- "eval_steps_per_second": 0.364,
849
  "eval_translation_length": 7828480,
850
  "step": 68803
851
  }
 
1
  {
2
+ "best_metric": 3.016300916671753,
3
  "best_model_checkpoint": "dq158/pingusPongus/checkpoint-68803",
4
  "epoch": 1.0,
5
  "eval_steps": 500,
 
11
  {
12
  "epoch": 0.01,
13
  "learning_rate": 0.0001,
14
+ "loss": 4.5647,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 9.999998551451928e-05,
20
+ "loss": 3.7798,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.02,
25
  "learning_rate": 9.999994205808551e-05,
26
+ "loss": 3.6154,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.03,
31
  "learning_rate": 9.999986963072388e-05,
32
+ "loss": 3.629,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 0.04,
37
  "learning_rate": 9.999976823247632e-05,
38
+ "loss": 3.5877,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 0.04,
43
  "learning_rate": 9.999963786340163e-05,
44
+ "loss": 3.5401,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 0.05,
49
  "learning_rate": 9.999947852357531e-05,
50
+ "loss": 3.5804,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 0.06,
55
  "learning_rate": 9.999929021308971e-05,
56
+ "loss": 3.4367,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 0.07,
61
  "learning_rate": 9.999907293205393e-05,
62
+ "loss": 3.4906,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 0.07,
67
  "learning_rate": 9.999882668059387e-05,
68
+ "loss": 3.4193,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 0.08,
73
  "learning_rate": 9.99985514588522e-05,
74
+ "loss": 3.4256,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 0.09,
79
  "learning_rate": 9.99982472669884e-05,
80
+ "loss": 3.4774,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 0.09,
85
  "learning_rate": 9.999791410517874e-05,
86
+ "loss": 3.4629,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 0.1,
91
  "learning_rate": 9.999755197361624e-05,
92
+ "loss": 3.3818,
93
  "step": 7000
94
  },
95
  {
96
  "epoch": 0.11,
97
  "learning_rate": 9.999716087251072e-05,
98
+ "loss": 3.4648,
99
  "step": 7500
100
  },
101
  {
102
  "epoch": 0.12,
103
  "learning_rate": 9.99967408020888e-05,
104
+ "loss": 3.4705,
105
  "step": 8000
106
  },
107
  {
108
  "epoch": 0.12,
109
  "learning_rate": 9.999629176259391e-05,
110
+ "loss": 3.4356,
111
  "step": 8500
112
  },
113
  {
114
  "epoch": 0.13,
115
  "learning_rate": 9.999581375428617e-05,
116
+ "loss": 3.4238,
117
  "step": 9000
118
  },
119
  {
120
  "epoch": 0.14,
121
  "learning_rate": 9.999530677744258e-05,
122
+ "loss": 3.4732,
123
  "step": 9500
124
  },
125
  {
126
  "epoch": 0.15,
127
  "learning_rate": 9.999477083235691e-05,
128
+ "loss": 3.3216,
129
  "step": 10000
130
  },
131
  {
132
  "epoch": 0.15,
133
  "learning_rate": 9.999420591933965e-05,
134
+ "loss": 3.465,
135
  "step": 10500
136
  },
137
  {
138
  "epoch": 0.16,
139
  "learning_rate": 9.999361203871817e-05,
140
+ "loss": 3.3641,
141
  "step": 11000
142
  },
143
  {
144
  "epoch": 0.17,
145
  "learning_rate": 9.999298919083656e-05,
146
+ "loss": 3.4407,
147
  "step": 11500
148
  },
149
  {
150
  "epoch": 0.17,
151
  "learning_rate": 9.99923373760557e-05,
152
+ "loss": 3.3962,
153
  "step": 12000
154
  },
155
  {
156
  "epoch": 0.18,
157
  "learning_rate": 9.999165659475324e-05,
158
+ "loss": 3.3776,
159
  "step": 12500
160
  },
161
  {
162
  "epoch": 0.19,
163
  "learning_rate": 9.999094684732369e-05,
164
+ "loss": 3.3157,
165
  "step": 13000
166
  },
167
  {
168
  "epoch": 0.2,
169
  "learning_rate": 9.999020813417826e-05,
170
+ "loss": 3.2517,
171
  "step": 13500
172
  },
173
  {
174
  "epoch": 0.2,
175
  "learning_rate": 9.998944045574499e-05,
176
+ "loss": 3.4232,
177
  "step": 14000
178
  },
179
  {
180
  "epoch": 0.21,
181
  "learning_rate": 9.998864381246869e-05,
182
+ "loss": 3.3539,
183
  "step": 14500
184
  },
185
  {
186
  "epoch": 0.22,
187
  "learning_rate": 9.998781820481091e-05,
188
+ "loss": 3.2431,
189
  "step": 15000
190
  },
191
  {
192
  "epoch": 0.23,
193
  "learning_rate": 9.998696363325009e-05,
194
+ "loss": 3.2796,
195
  "step": 15500
196
  },
197
  {
198
  "epoch": 0.23,
199
  "learning_rate": 9.998608009828132e-05,
200
+ "loss": 3.3468,
201
  "step": 16000
202
  },
203
  {
204
  "epoch": 0.24,
205
  "learning_rate": 9.998516760041659e-05,
206
+ "loss": 3.3159,
207
  "step": 16500
208
  },
209
  {
210
  "epoch": 0.25,
211
  "learning_rate": 9.998422614018456e-05,
212
+ "loss": 3.3635,
213
  "step": 17000
214
  },
215
  {
216
  "epoch": 0.25,
217
  "learning_rate": 9.998325571813079e-05,
218
+ "loss": 3.3708,
219
  "step": 17500
220
  },
221
  {
222
  "epoch": 0.26,
223
  "learning_rate": 9.998225633481753e-05,
224
+ "loss": 3.3435,
225
  "step": 18000
226
  },
227
  {
228
  "epoch": 0.27,
229
  "learning_rate": 9.998122799082386e-05,
230
+ "loss": 3.3649,
231
  "step": 18500
232
  },
233
  {
234
  "epoch": 0.28,
235
  "learning_rate": 9.998017068674558e-05,
236
+ "loss": 3.3519,
237
  "step": 19000
238
  },
239
  {
240
  "epoch": 0.28,
241
  "learning_rate": 9.997908442319536e-05,
242
+ "loss": 3.3408,
243
  "step": 19500
244
  },
245
  {
246
  "epoch": 0.29,
247
  "learning_rate": 9.99779692008026e-05,
248
+ "loss": 3.2678,
249
  "step": 20000
250
  },
251
  {
252
  "epoch": 0.3,
253
  "learning_rate": 9.997682502021345e-05,
254
+ "loss": 3.2453,
255
  "step": 20500
256
  },
257
  {
258
  "epoch": 0.31,
259
  "learning_rate": 9.997565188209089e-05,
260
+ "loss": 3.2948,
261
  "step": 21000
262
  },
263
  {
264
  "epoch": 0.31,
265
  "learning_rate": 9.997444978711465e-05,
266
+ "loss": 3.2849,
267
  "step": 21500
268
  },
269
  {
270
  "epoch": 0.32,
271
  "learning_rate": 9.997321873598125e-05,
272
+ "loss": 3.3274,
273
  "step": 22000
274
  },
275
  {
276
  "epoch": 0.33,
277
  "learning_rate": 9.9971958729404e-05,
278
+ "loss": 3.3084,
279
  "step": 22500
280
  },
281
  {
282
  "epoch": 0.33,
283
  "learning_rate": 9.997066976811294e-05,
284
+ "loss": 3.3019,
285
  "step": 23000
286
  },
287
  {
288
  "epoch": 0.34,
289
  "learning_rate": 9.996935185285495e-05,
290
+ "loss": 3.2998,
291
  "step": 23500
292
  },
293
  {
294
  "epoch": 0.35,
295
  "learning_rate": 9.996800498439362e-05,
296
+ "loss": 3.268,
297
  "step": 24000
298
  },
299
  {
300
  "epoch": 0.36,
301
  "learning_rate": 9.99666291635094e-05,
302
+ "loss": 3.248,
303
  "step": 24500
304
  },
305
  {
306
  "epoch": 0.36,
307
  "learning_rate": 9.996522439099943e-05,
308
+ "loss": 3.2204,
309
  "step": 25000
310
  },
311
  {
312
  "epoch": 0.37,
313
  "learning_rate": 9.99637906676777e-05,
314
+ "loss": 3.2922,
315
  "step": 25500
316
  },
317
  {
318
  "epoch": 0.38,
319
  "learning_rate": 9.996232799437487e-05,
320
+ "loss": 3.2716,
321
  "step": 26000
322
  },
323
  {
324
  "epoch": 0.39,
325
  "learning_rate": 9.996083637193849e-05,
326
+ "loss": 3.3181,
327
  "step": 26500
328
  },
329
  {
330
  "epoch": 0.39,
331
  "learning_rate": 9.995931580123284e-05,
332
+ "loss": 3.2899,
333
  "step": 27000
334
  },
335
  {
336
  "epoch": 0.4,
337
  "learning_rate": 9.995776628313896e-05,
338
+ "loss": 3.266,
339
  "step": 27500
340
  },
341
  {
342
  "epoch": 0.41,
343
  "learning_rate": 9.995618781855464e-05,
344
+ "loss": 3.3644,
345
  "step": 28000
346
  },
347
  {
348
  "epoch": 0.41,
349
  "learning_rate": 9.995458040839452e-05,
350
+ "loss": 3.2413,
351
  "step": 28500
352
  },
353
  {
354
  "epoch": 0.42,
355
  "learning_rate": 9.995294405358993e-05,
356
+ "loss": 3.2759,
357
  "step": 29000
358
  },
359
  {
360
  "epoch": 0.43,
361
  "learning_rate": 9.995127875508903e-05,
362
+ "loss": 3.3065,
363
  "step": 29500
364
  },
365
  {
366
  "epoch": 0.44,
367
  "learning_rate": 9.99495845138567e-05,
368
+ "loss": 3.3028,
369
  "step": 30000
370
  },
371
  {
372
  "epoch": 0.44,
373
  "learning_rate": 9.994786133087464e-05,
374
+ "loss": 3.2613,
375
  "step": 30500
376
  },
377
  {
378
  "epoch": 0.45,
379
  "learning_rate": 9.994610920714126e-05,
380
+ "loss": 3.2137,
381
  "step": 31000
382
  },
383
  {
384
  "epoch": 0.46,
385
  "learning_rate": 9.994432814367183e-05,
386
+ "loss": 3.2899,
387
  "step": 31500
388
  },
389
  {
390
  "epoch": 0.47,
391
  "learning_rate": 9.99425181414983e-05,
392
+ "loss": 3.2877,
393
  "step": 32000
394
  },
395
  {
396
  "epoch": 0.47,
397
  "learning_rate": 9.994067920166939e-05,
398
+ "loss": 3.2139,
399
  "step": 32500
400
  },
401
  {
402
  "epoch": 0.48,
403
  "learning_rate": 9.993881132525067e-05,
404
+ "loss": 3.2407,
405
  "step": 33000
406
  },
407
  {
408
  "epoch": 0.49,
409
  "learning_rate": 9.993691451332439e-05,
410
+ "loss": 3.3226,
411
  "step": 33500
412
  },
413
  {
414
  "epoch": 0.49,
415
  "learning_rate": 9.993498876698963e-05,
416
+ "loss": 3.2004,
417
  "step": 34000
418
  },
419
  {
420
  "epoch": 0.5,
421
  "learning_rate": 9.993303408736217e-05,
422
+ "loss": 3.2773,
423
  "step": 34500
424
  },
425
  {
426
  "epoch": 0.51,
427
  "learning_rate": 9.993105047557461e-05,
428
+ "loss": 3.2183,
429
  "step": 35000
430
  },
431
  {
432
  "epoch": 0.52,
433
  "learning_rate": 9.992903793277628e-05,
434
+ "loss": 3.2648,
435
  "step": 35500
436
  },
437
  {
438
  "epoch": 0.52,
439
  "learning_rate": 9.99269964601333e-05,
440
+ "loss": 3.1963,
441
  "step": 36000
442
  },
443
  {
444
  "epoch": 0.53,
445
  "learning_rate": 9.992492605882853e-05,
446
+ "loss": 3.2825,
447
  "step": 36500
448
  },
449
  {
450
  "epoch": 0.54,
451
  "learning_rate": 9.99228267300616e-05,
452
+ "loss": 3.2658,
453
  "step": 37000
454
  },
455
  {
456
  "epoch": 0.55,
457
  "learning_rate": 9.992069847504891e-05,
458
+ "loss": 3.1058,
459
  "step": 37500
460
  },
461
  {
462
  "epoch": 0.55,
463
  "learning_rate": 9.99185412950236e-05,
464
+ "loss": 3.1375,
465
  "step": 38000
466
  },
467
  {
468
  "epoch": 0.56,
469
  "learning_rate": 9.991635519123559e-05,
470
+ "loss": 3.2075,
471
  "step": 38500
472
  },
473
  {
474
  "epoch": 0.57,
475
  "learning_rate": 9.991414016495155e-05,
476
+ "loss": 3.2228,
477
  "step": 39000
478
  },
479
  {
480
  "epoch": 0.57,
481
  "learning_rate": 9.99118962174549e-05,
482
+ "loss": 3.2004,
483
  "step": 39500
484
  },
485
  {
486
  "epoch": 0.58,
487
  "learning_rate": 9.990962335004584e-05,
488
+ "loss": 3.1924,
489
  "step": 40000
490
  },
491
  {
492
  "epoch": 0.59,
493
  "learning_rate": 9.99073215640413e-05,
494
+ "loss": 3.2956,
495
  "step": 40500
496
  },
497
  {
498
  "epoch": 0.6,
499
  "learning_rate": 9.990499086077498e-05,
500
+ "loss": 3.2698,
501
  "step": 41000
502
  },
503
  {
504
  "epoch": 0.6,
505
  "learning_rate": 9.990263124159736e-05,
506
+ "loss": 3.2863,
507
  "step": 41500
508
  },
509
  {
510
  "epoch": 0.61,
511
  "learning_rate": 9.990024270787561e-05,
512
+ "loss": 3.3611,
513
  "step": 42000
514
  },
515
  {
516
  "epoch": 0.62,
517
  "learning_rate": 9.989782526099372e-05,
518
+ "loss": 3.1691,
519
  "step": 42500
520
  },
521
  {
522
  "epoch": 0.62,
523
  "learning_rate": 9.989537890235238e-05,
524
+ "loss": 3.1085,
525
  "step": 43000
526
  },
527
  {
528
  "epoch": 0.63,
529
  "learning_rate": 9.989290363336908e-05,
530
+ "loss": 3.1825,
531
  "step": 43500
532
  },
533
  {
534
  "epoch": 0.64,
535
  "learning_rate": 9.989039945547803e-05,
536
+ "loss": 3.2333,
537
  "step": 44000
538
  },
539
  {
540
  "epoch": 0.65,
541
  "learning_rate": 9.98878663701302e-05,
542
+ "loss": 3.2635,
543
  "step": 44500
544
  },
545
  {
546
  "epoch": 0.65,
547
  "learning_rate": 9.988530437879333e-05,
548
+ "loss": 3.2907,
549
  "step": 45000
550
  },
551
  {
552
  "epoch": 0.66,
553
  "learning_rate": 9.988271348295184e-05,
554
+ "loss": 3.1334,
555
  "step": 45500
556
  },
557
  {
558
  "epoch": 0.67,
559
  "learning_rate": 9.988009368410698e-05,
560
+ "loss": 3.2239,
561
  "step": 46000
562
  },
563
  {
564
  "epoch": 0.68,
565
  "learning_rate": 9.98774449837767e-05,
566
+ "loss": 3.1904,
567
  "step": 46500
568
  },
569
  {
570
  "epoch": 0.68,
571
  "learning_rate": 9.987476738349571e-05,
572
+ "loss": 3.2781,
573
  "step": 47000
574
  },
575
  {
576
  "epoch": 0.69,
577
  "learning_rate": 9.987206088481545e-05,
578
+ "loss": 3.19,
579
  "step": 47500
580
  },
581
  {
582
  "epoch": 0.7,
583
  "learning_rate": 9.986932548930414e-05,
584
+ "loss": 3.2235,
585
  "step": 48000
586
  },
587
  {
588
  "epoch": 0.7,
589
  "learning_rate": 9.986656119854672e-05,
590
+ "loss": 3.2302,
591
  "step": 48500
592
  },
593
  {
594
  "epoch": 0.71,
595
  "learning_rate": 9.986376801414485e-05,
596
+ "loss": 3.289,
597
  "step": 49000
598
  },
599
  {
600
  "epoch": 0.72,
601
  "learning_rate": 9.986094593771699e-05,
602
+ "loss": 3.2874,
603
  "step": 49500
604
  },
605
  {
606
  "epoch": 0.73,
607
  "learning_rate": 9.985809497089827e-05,
608
+ "loss": 3.144,
609
  "step": 50000
610
  },
611
  {
612
  "epoch": 0.73,
613
  "learning_rate": 9.985521511534062e-05,
614
+ "loss": 3.1967,
615
  "step": 50500
616
  },
617
  {
618
  "epoch": 0.74,
619
  "learning_rate": 9.985230637271266e-05,
620
+ "loss": 3.248,
621
  "step": 51000
622
  },
623
  {
624
  "epoch": 0.75,
625
  "learning_rate": 9.984936874469979e-05,
626
+ "loss": 3.1933,
627
  "step": 51500
628
  },
629
  {
630
  "epoch": 0.76,
631
  "learning_rate": 9.984640223300413e-05,
632
+ "loss": 3.2108,
633
  "step": 52000
634
  },
635
  {
636
  "epoch": 0.76,
637
  "learning_rate": 9.98434068393445e-05,
638
+ "loss": 3.0986,
639
  "step": 52500
640
  },
641
  {
642
  "epoch": 0.77,
643
  "learning_rate": 9.984038256545653e-05,
644
+ "loss": 3.2016,
645
  "step": 53000
646
  },
647
  {
648
  "epoch": 0.78,
649
  "learning_rate": 9.983732941309253e-05,
650
+ "loss": 3.1967,
651
  "step": 53500
652
  },
653
  {
654
  "epoch": 0.78,
655
  "learning_rate": 9.983424738402156e-05,
656
+ "loss": 3.2803,
657
  "step": 54000
658
  },
659
  {
660
  "epoch": 0.79,
661
  "learning_rate": 9.98311364800294e-05,
662
+ "loss": 3.2952,
663
  "step": 54500
664
  },
665
  {
666
  "epoch": 0.8,
667
  "learning_rate": 9.982799670291857e-05,
668
+ "loss": 3.18,
669
  "step": 55000
670
  },
671
  {
672
  "epoch": 0.81,
673
  "learning_rate": 9.98248280545083e-05,
674
+ "loss": 3.1768,
675
  "step": 55500
676
  },
677
  {
678
  "epoch": 0.81,
679
  "learning_rate": 9.982163053663459e-05,
680
+ "loss": 3.1986,
681
  "step": 56000
682
  },
683
  {
684
  "epoch": 0.82,
685
  "learning_rate": 9.981840415115014e-05,
686
+ "loss": 3.2241,
687
  "step": 56500
688
  },
689
  {
690
  "epoch": 0.83,
691
  "learning_rate": 9.981514889992436e-05,
692
+ "loss": 3.2754,
693
  "step": 57000
694
  },
695
  {
696
  "epoch": 0.84,
697
  "learning_rate": 9.981186478484344e-05,
698
+ "loss": 3.2162,
699
  "step": 57500
700
  },
701
  {
702
  "epoch": 0.84,
703
  "learning_rate": 9.980855180781021e-05,
704
+ "loss": 3.2659,
705
  "step": 58000
706
  },
707
  {
708
  "epoch": 0.85,
709
  "learning_rate": 9.980520997074432e-05,
710
+ "loss": 3.172,
711
  "step": 58500
712
  },
713
  {
714
  "epoch": 0.86,
715
  "learning_rate": 9.980183927558207e-05,
716
+ "loss": 3.3035,
717
  "step": 59000
718
  },
719
  {
720
  "epoch": 0.86,
721
  "learning_rate": 9.97984397242765e-05,
722
+ "loss": 3.1746,
723
  "step": 59500
724
  },
725
  {
726
  "epoch": 0.87,
727
  "learning_rate": 9.979501131879741e-05,
728
+ "loss": 3.1678,
729
  "step": 60000
730
  },
731
  {
732
  "epoch": 0.88,
733
  "learning_rate": 9.979155406113124e-05,
734
+ "loss": 3.162,
735
  "step": 60500
736
  },
737
  {
738
  "epoch": 0.89,
739
  "learning_rate": 9.978806795328121e-05,
740
+ "loss": 3.3041,
741
  "step": 61000
742
  },
743
  {
744
  "epoch": 0.89,
745
  "learning_rate": 9.978455299726726e-05,
746
+ "loss": 3.2086,
747
  "step": 61500
748
  },
749
  {
750
  "epoch": 0.9,
751
  "learning_rate": 9.978100919512598e-05,
752
+ "loss": 3.1832,
753
  "step": 62000
754
  },
755
  {
756
  "epoch": 0.91,
757
  "learning_rate": 9.977743654891077e-05,
758
+ "loss": 3.1839,
759
  "step": 62500
760
  },
761
  {
762
  "epoch": 0.92,
763
  "learning_rate": 9.977383506069164e-05,
764
+ "loss": 3.123,
765
  "step": 63000
766
  },
767
  {
768
  "epoch": 0.92,
769
  "learning_rate": 9.977020473255539e-05,
770
+ "loss": 3.2868,
771
  "step": 63500
772
  },
773
  {
774
  "epoch": 0.93,
775
  "learning_rate": 9.976654556660548e-05,
776
+ "loss": 3.1488,
777
  "step": 64000
778
  },
779
  {
780
  "epoch": 0.94,
781
  "learning_rate": 9.976285756496211e-05,
782
+ "loss": 3.2116,
783
  "step": 64500
784
  },
785
  {
786
  "epoch": 0.94,
787
  "learning_rate": 9.97591407297622e-05,
788
+ "loss": 3.2233,
789
  "step": 65000
790
  },
791
  {
792
  "epoch": 0.95,
793
  "learning_rate": 9.975539506315933e-05,
794
+ "loss": 3.1018,
795
  "step": 65500
796
  },
797
  {
798
  "epoch": 0.96,
799
  "learning_rate": 9.975162056732385e-05,
800
+ "loss": 3.1404,
801
  "step": 66000
802
  },
803
  {
804
  "epoch": 0.97,
805
  "learning_rate": 9.974781724444272e-05,
806
+ "loss": 3.1984,
807
  "step": 66500
808
  },
809
  {
810
  "epoch": 0.97,
811
  "learning_rate": 9.974398509671969e-05,
812
+ "loss": 3.2421,
813
  "step": 67000
814
  },
815
  {
816
  "epoch": 0.98,
817
  "learning_rate": 9.974012412637517e-05,
818
+ "loss": 3.1499,
819
  "step": 67500
820
  },
821
  {
822
  "epoch": 0.99,
823
  "learning_rate": 9.97362343356463e-05,
824
+ "loss": 3.174,
825
  "step": 68000
826
  },
827
  {
828
  "epoch": 1.0,
829
  "learning_rate": 9.973231572678686e-05,
830
+ "loss": 3.1816,
831
  "step": 68500
832
  },
833
  {
 
835
  "eval_bleu": 1.0,
836
  "eval_brevity_penalty": 1.0,
837
  "eval_length_ratio": 1.0,
838
+ "eval_loss": 3.016300916671753,
839
  "eval_precisions": [
840
  1.0,
841
  1.0,
 
843
  1.0
844
  ],
845
  "eval_reference_length": 7828480,
846
+ "eval_runtime": 20103.3013,
847
+ "eval_samples_per_second": 0.761,
848
+ "eval_steps_per_second": 0.38,
849
  "eval_translation_length": 7828480,
850
  "step": 68803
851
  }