sheepy928 commited on
Commit
8b7f544
1 Parent(s): 6dad752

Training in progress, step 900, checkpoint

Browse files
checkpoint-900/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3f0b1a0547175c19a1680ce564322547ef5e2733b885a76994c0c58f99c4522
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b36c3689596a89a01d421d33d9935d3cadd42d25fef48b7d724429a9ce114ef
3
  size 14244
checkpoint-900/trainer_state.json CHANGED
@@ -23,10 +23,14 @@
23
  {
24
  "epoch": 0.06,
25
  "eval_accuracy": 0.44479243019924036,
 
 
26
  "eval_loss": 1.065091609954834,
27
- "eval_runtime": 50.348,
28
- "eval_samples_per_second": 596.131,
29
- "eval_steps_per_second": 1.172,
 
 
30
  "step": 20
31
  },
32
  {
@@ -44,10 +48,14 @@
44
  {
45
  "epoch": 0.12,
46
  "eval_accuracy": 0.5033650962883988,
 
 
47
  "eval_loss": 1.0188277959823608,
48
- "eval_runtime": 49.614,
49
- "eval_samples_per_second": 604.95,
50
- "eval_steps_per_second": 1.189,
 
 
51
  "step": 40
52
  },
53
  {
@@ -65,10 +73,14 @@
65
  {
66
  "epoch": 0.18,
67
  "eval_accuracy": 0.5279203038581995,
 
 
68
  "eval_loss": 0.9871189594268799,
69
- "eval_runtime": 49.5201,
70
- "eval_samples_per_second": 606.098,
71
- "eval_steps_per_second": 1.191,
 
 
72
  "step": 60
73
  },
74
  {
@@ -86,10 +98,14 @@
86
  {
87
  "epoch": 0.24,
88
  "eval_accuracy": 0.5308189511561271,
 
 
89
  "eval_loss": 0.9888613224029541,
90
- "eval_runtime": 49.4469,
91
- "eval_samples_per_second": 606.994,
92
- "eval_steps_per_second": 1.193,
 
 
93
  "step": 80
94
  },
95
  {
@@ -107,10 +123,14 @@
107
  {
108
  "epoch": 0.3,
109
  "eval_accuracy": 0.5307856333710935,
 
 
110
  "eval_loss": 0.9762536883354187,
111
- "eval_runtime": 49.4804,
112
- "eval_samples_per_second": 606.583,
113
- "eval_steps_per_second": 1.192,
 
 
114
  "step": 100
115
  },
116
  {
@@ -128,10 +148,14 @@
128
  {
129
  "epoch": 0.36,
130
  "eval_accuracy": 0.5387819017791697,
 
 
131
  "eval_loss": 0.9713281989097595,
132
- "eval_runtime": 49.4983,
133
- "eval_samples_per_second": 606.364,
134
- "eval_steps_per_second": 1.192,
 
 
135
  "step": 120
136
  },
137
  {
@@ -149,10 +173,14 @@
149
  {
150
  "epoch": 0.42,
151
  "eval_accuracy": 0.5312520823615646,
 
 
152
  "eval_loss": 0.9766249656677246,
153
- "eval_runtime": 49.5213,
154
- "eval_samples_per_second": 606.083,
155
- "eval_steps_per_second": 1.191,
 
 
156
  "step": 140
157
  },
158
  {
@@ -170,10 +198,14 @@
170
  {
171
  "epoch": 0.48,
172
  "eval_accuracy": 0.5398147531152129,
 
 
173
  "eval_loss": 0.9589501619338989,
174
- "eval_runtime": 49.4977,
175
- "eval_samples_per_second": 606.372,
176
- "eval_steps_per_second": 1.192,
 
 
177
  "step": 160
178
  },
179
  {
@@ -191,10 +223,14 @@
191
  {
192
  "epoch": 0.54,
193
  "eval_accuracy": 0.5423469047777704,
 
 
194
  "eval_loss": 0.953514814376831,
195
- "eval_runtime": 49.4511,
196
- "eval_samples_per_second": 606.943,
197
- "eval_steps_per_second": 1.193,
 
 
198
  "step": 180
199
  },
200
  {
@@ -212,10 +248,14 @@
212
  {
213
  "epoch": 0.6,
214
  "eval_accuracy": 0.567201972412874,
 
 
215
  "eval_loss": 0.9273685812950134,
216
- "eval_runtime": 49.4122,
217
- "eval_samples_per_second": 607.421,
218
- "eval_steps_per_second": 1.194,
 
 
219
  "step": 200
220
  },
221
  {
@@ -233,10 +273,14 @@
233
  {
234
  "epoch": 0.66,
235
  "eval_accuracy": 0.573598987139335,
 
 
236
  "eval_loss": 0.912590503692627,
237
- "eval_runtime": 49.2827,
238
- "eval_samples_per_second": 609.017,
239
- "eval_steps_per_second": 1.197,
 
 
240
  "step": 220
241
  },
242
  {
@@ -254,10 +298,14 @@
254
  {
255
  "epoch": 0.72,
256
  "eval_accuracy": 0.5759645498767242,
 
 
257
  "eval_loss": 0.9052607417106628,
258
- "eval_runtime": 49.4457,
259
- "eval_samples_per_second": 607.01,
260
- "eval_steps_per_second": 1.193,
 
 
261
  "step": 240
262
  },
263
  {
@@ -275,10 +323,14 @@
275
  {
276
  "epoch": 0.78,
277
  "eval_accuracy": 0.5766975411474645,
 
 
278
  "eval_loss": 0.9178985953330994,
279
- "eval_runtime": 49.3132,
280
- "eval_samples_per_second": 608.641,
281
- "eval_steps_per_second": 1.196,
 
 
282
  "step": 260
283
  },
284
  {
@@ -296,10 +348,14 @@
296
  {
297
  "epoch": 0.84,
298
  "eval_accuracy": 0.58915839275005,
 
 
299
  "eval_loss": 0.8937407732009888,
300
- "eval_runtime": 49.4342,
301
- "eval_samples_per_second": 607.15,
302
- "eval_steps_per_second": 1.194,
 
 
303
  "step": 280
304
  },
305
  {
@@ -317,10 +373,14 @@
317
  {
318
  "epoch": 0.9,
319
  "eval_accuracy": 0.613013926834144,
 
 
320
  "eval_loss": 0.8468813300132751,
321
- "eval_runtime": 49.5046,
322
- "eval_samples_per_second": 606.287,
323
- "eval_steps_per_second": 1.192,
 
 
324
  "step": 300
325
  },
326
  {
@@ -338,10 +398,14 @@
338
  {
339
  "epoch": 0.96,
340
  "eval_accuracy": 0.6046511627906976,
 
 
341
  "eval_loss": 0.8615403771400452,
342
- "eval_runtime": 49.5041,
343
- "eval_samples_per_second": 606.294,
344
- "eval_steps_per_second": 1.192,
 
 
345
  "step": 320
346
  },
347
  {
@@ -359,10 +423,14 @@
359
  {
360
  "epoch": 1.02,
361
  "eval_accuracy": 0.6439328313453722,
 
 
362
  "eval_loss": 0.7896137237548828,
363
- "eval_runtime": 49.4448,
364
- "eval_samples_per_second": 607.02,
365
- "eval_steps_per_second": 1.193,
 
 
366
  "step": 340
367
  },
368
  {
@@ -380,10 +448,14 @@
380
  {
381
  "epoch": 1.08,
382
  "eval_accuracy": 0.5315852602119011,
 
 
383
  "eval_loss": 1.0123510360717773,
384
- "eval_runtime": 49.4838,
385
- "eval_samples_per_second": 606.542,
386
- "eval_steps_per_second": 1.192,
 
 
387
  "step": 360
388
  },
389
  {
@@ -401,10 +473,14 @@
401
  {
402
  "epoch": 1.14,
403
  "eval_accuracy": 0.6488638635303525,
 
 
404
  "eval_loss": 0.791083574295044,
405
- "eval_runtime": 49.4701,
406
- "eval_samples_per_second": 606.71,
407
- "eval_steps_per_second": 1.193,
 
 
408
  "step": 380
409
  },
410
  {
@@ -422,10 +498,14 @@
422
  {
423
  "epoch": 1.2,
424
  "eval_accuracy": 0.6699873392416872,
 
 
425
  "eval_loss": 0.7472424507141113,
426
- "eval_runtime": 49.4938,
427
- "eval_samples_per_second": 606.419,
428
- "eval_steps_per_second": 1.192,
 
 
429
  "step": 400
430
  },
431
  {
@@ -443,10 +523,14 @@
443
  {
444
  "epoch": 1.26,
445
  "eval_accuracy": 0.6580595721996402,
 
 
446
  "eval_loss": 0.7663838863372803,
447
- "eval_runtime": 49.5064,
448
- "eval_samples_per_second": 606.265,
449
- "eval_steps_per_second": 1.192,
 
 
450
  "step": 420
451
  },
452
  {
@@ -464,10 +548,14 @@
464
  {
465
  "epoch": 1.32,
466
  "eval_accuracy": 0.6991737189311654,
 
 
467
  "eval_loss": 0.6993714570999146,
468
- "eval_runtime": 49.4888,
469
- "eval_samples_per_second": 606.481,
470
- "eval_steps_per_second": 1.192,
 
 
471
  "step": 440
472
  },
473
  {
@@ -485,10 +573,14 @@
485
  {
486
  "epoch": 1.38,
487
  "eval_accuracy": 0.7283600986206437,
 
 
488
  "eval_loss": 0.6510820984840393,
489
- "eval_runtime": 49.5999,
490
- "eval_samples_per_second": 605.122,
491
- "eval_steps_per_second": 1.19,
 
 
492
  "step": 460
493
  },
494
  {
@@ -506,10 +598,14 @@
506
  {
507
  "epoch": 1.44,
508
  "eval_accuracy": 0.7576797494502565,
 
 
509
  "eval_loss": 0.5987845063209534,
510
- "eval_runtime": 49.5123,
511
- "eval_samples_per_second": 606.192,
512
- "eval_steps_per_second": 1.192,
 
 
513
  "step": 480
514
  },
515
  {
@@ -527,10 +623,14 @@
527
  {
528
  "epoch": 1.5,
529
  "eval_accuracy": 0.7564136736189778,
 
 
530
  "eval_loss": 0.5992804765701294,
531
- "eval_runtime": 49.5641,
532
- "eval_samples_per_second": 605.56,
533
- "eval_steps_per_second": 1.19,
 
 
534
  "step": 500
535
  },
536
  {
@@ -548,10 +648,14 @@
548
  {
549
  "epoch": 1.57,
550
  "eval_accuracy": 0.8125874591857133,
 
 
551
  "eval_loss": 0.5067983865737915,
552
- "eval_runtime": 49.3712,
553
- "eval_samples_per_second": 607.925,
554
- "eval_steps_per_second": 1.195,
 
 
555
  "step": 520
556
  },
557
  {
@@ -569,10 +673,14 @@
569
  {
570
  "epoch": 1.63,
571
  "eval_accuracy": 0.8520023988805224,
 
 
572
  "eval_loss": 0.42726200819015503,
573
- "eval_runtime": 49.3448,
574
- "eval_samples_per_second": 608.251,
575
- "eval_steps_per_second": 1.196,
 
 
576
  "step": 540
577
  },
578
  {
@@ -590,10 +698,14 @@
590
  {
591
  "epoch": 1.69,
592
  "eval_accuracy": 0.8721929766109149,
 
 
593
  "eval_loss": 0.37960025668144226,
594
- "eval_runtime": 49.2893,
595
- "eval_samples_per_second": 608.936,
596
- "eval_steps_per_second": 1.197,
 
 
597
  "step": 560
598
  },
599
  {
@@ -611,10 +723,14 @@
611
  {
612
  "epoch": 1.75,
613
  "eval_accuracy": 0.8756580262544146,
 
 
614
  "eval_loss": 0.3854842483997345,
615
- "eval_runtime": 49.5207,
616
- "eval_samples_per_second": 606.09,
617
- "eval_steps_per_second": 1.191,
 
 
618
  "step": 580
619
  },
620
  {
@@ -632,10 +748,14 @@
632
  {
633
  "epoch": 1.81,
634
  "eval_accuracy": 0.8893849536882789,
 
 
635
  "eval_loss": 0.35855188965797424,
636
- "eval_runtime": 49.3852,
637
- "eval_samples_per_second": 607.753,
638
- "eval_steps_per_second": 1.195,
 
 
639
  "step": 600
640
  },
641
  {
@@ -653,10 +773,14 @@
653
  {
654
  "epoch": 1.87,
655
  "eval_accuracy": 0.8971813153861531,
 
 
656
  "eval_loss": 0.321043998003006,
657
- "eval_runtime": 49.4982,
658
- "eval_samples_per_second": 606.366,
659
- "eval_steps_per_second": 1.192,
 
 
660
  "step": 620
661
  },
662
  {
@@ -674,10 +798,14 @@
674
  {
675
  "epoch": 1.93,
676
  "eval_accuracy": 0.9035450123275804,
 
 
677
  "eval_loss": 0.3006099760532379,
678
- "eval_runtime": 49.5179,
679
- "eval_samples_per_second": 606.124,
680
- "eval_steps_per_second": 1.191,
 
 
681
  "step": 640
682
  },
683
  {
@@ -695,10 +823,14 @@
695
  {
696
  "epoch": 1.99,
697
  "eval_accuracy": 0.9014126740854268,
 
 
698
  "eval_loss": 0.30540063977241516,
699
- "eval_runtime": 49.5044,
700
- "eval_samples_per_second": 606.289,
701
- "eval_steps_per_second": 1.192,
 
 
702
  "step": 660
703
  },
704
  {
@@ -716,10 +848,14 @@
716
  {
717
  "epoch": 2.05,
718
  "eval_accuracy": 0.8912507496501633,
 
 
719
  "eval_loss": 0.3174073100090027,
720
- "eval_runtime": 49.4656,
721
- "eval_samples_per_second": 606.765,
722
- "eval_steps_per_second": 1.193,
 
 
723
  "step": 680
724
  },
725
  {
@@ -737,10 +873,14 @@
737
  {
738
  "epoch": 2.11,
739
  "eval_accuracy": 0.9122076364363297,
 
 
740
  "eval_loss": 0.2769884169101715,
741
- "eval_runtime": 49.5225,
742
- "eval_samples_per_second": 606.068,
743
- "eval_steps_per_second": 1.191,
 
 
744
  "step": 700
745
  },
746
  {
@@ -758,10 +898,14 @@
758
  {
759
  "epoch": 2.17,
760
  "eval_accuracy": 0.9062437529153062,
 
 
761
  "eval_loss": 0.2979203760623932,
762
- "eval_runtime": 49.2758,
763
- "eval_samples_per_second": 609.102,
764
- "eval_steps_per_second": 1.197,
 
 
765
  "step": 720
766
  },
767
  {
@@ -779,10 +923,14 @@
779
  {
780
  "epoch": 2.23,
781
  "eval_accuracy": 0.8997801026187779,
 
 
782
  "eval_loss": 0.29734131693840027,
783
- "eval_runtime": 49.4882,
784
- "eval_samples_per_second": 606.488,
785
- "eval_steps_per_second": 1.192,
 
 
786
  "step": 740
787
  },
788
  {
@@ -800,10 +948,14 @@
800
  {
801
  "epoch": 2.29,
802
  "eval_accuracy": 0.9221696541613914,
 
 
803
  "eval_loss": 0.24671417474746704,
804
- "eval_runtime": 49.5036,
805
- "eval_samples_per_second": 606.299,
806
- "eval_steps_per_second": 1.192,
 
 
807
  "step": 760
808
  },
809
  {
@@ -821,10 +973,14 @@
821
  {
822
  "epoch": 2.35,
823
  "eval_accuracy": 0.9113080562404211,
 
 
824
  "eval_loss": 0.2760636806488037,
825
- "eval_runtime": 49.4644,
826
- "eval_samples_per_second": 606.78,
827
- "eval_steps_per_second": 1.193,
 
 
828
  "step": 780
829
  },
830
  {
@@ -842,10 +998,14 @@
842
  {
843
  "epoch": 2.41,
844
  "eval_accuracy": 0.9260011994402612,
 
 
845
  "eval_loss": 0.24102580547332764,
846
- "eval_runtime": 49.464,
847
- "eval_samples_per_second": 606.785,
848
- "eval_steps_per_second": 1.193,
 
 
849
  "step": 800
850
  },
851
  {
@@ -863,10 +1023,14 @@
863
  {
864
  "epoch": 2.47,
865
  "eval_accuracy": 0.922103018591324,
 
 
866
  "eval_loss": 0.2446586638689041,
867
- "eval_runtime": 49.2591,
868
- "eval_samples_per_second": 609.308,
869
- "eval_steps_per_second": 1.198,
 
 
870
  "step": 820
871
  },
872
  {
@@ -884,10 +1048,14 @@
884
  {
885
  "epoch": 2.53,
886
  "eval_accuracy": 0.9237355900579729,
 
 
887
  "eval_loss": 0.2475174069404602,
888
- "eval_runtime": 49.5127,
889
- "eval_samples_per_second": 606.188,
890
- "eval_steps_per_second": 1.192,
 
 
891
  "step": 840
892
  },
893
  {
@@ -905,10 +1073,14 @@
905
  {
906
  "epoch": 2.59,
907
  "eval_accuracy": 0.9265342840007996,
 
 
908
  "eval_loss": 0.2590079605579376,
909
- "eval_runtime": 49.5242,
910
- "eval_samples_per_second": 606.047,
911
- "eval_steps_per_second": 1.191,
 
 
912
  "step": 860
913
  },
914
  {
@@ -926,10 +1098,14 @@
926
  {
927
  "epoch": 2.65,
928
  "eval_accuracy": 0.930032651429333,
 
 
929
  "eval_loss": 0.22479340434074402,
930
- "eval_runtime": 49.4946,
931
- "eval_samples_per_second": 606.409,
932
- "eval_steps_per_second": 1.192,
 
 
933
  "step": 880
934
  },
935
  {
@@ -947,18 +1123,22 @@
947
  {
948
  "epoch": 2.71,
949
  "eval_accuracy": 0.9273005930565736,
 
 
950
  "eval_loss": 0.22854498028755188,
951
- "eval_runtime": 49.2942,
952
- "eval_samples_per_second": 608.875,
953
- "eval_steps_per_second": 1.197,
 
 
954
  "step": 900
955
  }
956
  ],
957
  "logging_steps": 10,
958
- "max_steps": 996,
959
- "num_train_epochs": 3,
960
  "save_steps": 100,
961
- "total_flos": 1.212384490375086e+17,
962
  "trial_name": null,
963
  "trial_params": null
964
  }
 
23
  {
24
  "epoch": 0.06,
25
  "eval_accuracy": 0.44479243019924036,
26
+ "eval_combined_score": 0.3403229798934559,
27
+ "eval_f1": 0.2738667532127967,
28
  "eval_loss": 1.065091609954834,
29
+ "eval_precision": 0.19784030596254612,
30
+ "eval_recall": 0.44479243019924036,
31
+ "eval_runtime": 49.6621,
32
+ "eval_samples_per_second": 604.364,
33
+ "eval_steps_per_second": 1.188,
34
  "step": 20
35
  },
36
  {
 
48
  {
49
  "epoch": 0.12,
50
  "eval_accuracy": 0.5033650962883988,
51
+ "eval_combined_score": 0.4633375988688383,
52
+ "eval_f1": 0.4137746858205889,
53
  "eval_loss": 1.0188277959823608,
54
+ "eval_precision": 0.43284551707796665,
55
+ "eval_recall": 0.5033650962883988,
56
+ "eval_runtime": 49.1985,
57
+ "eval_samples_per_second": 610.06,
58
+ "eval_steps_per_second": 1.199,
59
  "step": 40
60
  },
61
  {
 
73
  {
74
  "epoch": 0.18,
75
  "eval_accuracy": 0.5279203038581995,
76
+ "eval_combined_score": 0.48433586826626607,
77
+ "eval_f1": 0.4596383010419703,
78
  "eval_loss": 0.9871189594268799,
79
+ "eval_precision": 0.421864564306695,
80
+ "eval_recall": 0.5279203038581995,
81
+ "eval_runtime": 49.0237,
82
+ "eval_samples_per_second": 612.235,
83
+ "eval_steps_per_second": 1.204,
84
  "step": 60
85
  },
86
  {
 
98
  {
99
  "epoch": 0.24,
100
  "eval_accuracy": 0.5308189511561271,
101
+ "eval_combined_score": 0.4866197693698143,
102
+ "eval_f1": 0.46528335920671143,
103
  "eval_loss": 0.9888613224029541,
104
+ "eval_precision": 0.4195578159602916,
105
+ "eval_recall": 0.5308189511561271,
106
+ "eval_runtime": 49.518,
107
+ "eval_samples_per_second": 606.124,
108
+ "eval_steps_per_second": 1.191,
109
  "step": 80
110
  },
111
  {
 
123
  {
124
  "epoch": 0.3,
125
  "eval_accuracy": 0.5307856333710935,
126
+ "eval_combined_score": 0.48644275806230897,
127
+ "eval_f1": 0.4666072639999521,
128
  "eval_loss": 0.9762536883354187,
129
+ "eval_precision": 0.4175925015070968,
130
+ "eval_recall": 0.5307856333710935,
131
+ "eval_runtime": 49.4629,
132
+ "eval_samples_per_second": 606.798,
133
+ "eval_steps_per_second": 1.193,
134
  "step": 100
135
  },
136
  {
 
148
  {
149
  "epoch": 0.36,
150
  "eval_accuracy": 0.5387819017791697,
151
+ "eval_combined_score": 0.4944622623129249,
152
+ "eval_f1": 0.47105988260831677,
153
  "eval_loss": 0.9713281989097595,
154
+ "eval_precision": 0.4292253630850433,
155
+ "eval_recall": 0.5387819017791697,
156
+ "eval_runtime": 49.0682,
157
+ "eval_samples_per_second": 611.68,
158
+ "eval_steps_per_second": 1.202,
159
  "step": 120
160
  },
161
  {
 
173
  {
174
  "epoch": 0.42,
175
  "eval_accuracy": 0.5312520823615646,
176
+ "eval_combined_score": 0.48712844254868687,
177
+ "eval_f1": 0.4673598197970026,
178
  "eval_loss": 0.9766249656677246,
179
+ "eval_precision": 0.41864978567461564,
180
+ "eval_recall": 0.5312520823615646,
181
+ "eval_runtime": 49.1859,
182
+ "eval_samples_per_second": 610.215,
183
+ "eval_steps_per_second": 1.2,
184
  "step": 140
185
  },
186
  {
 
198
  {
199
  "epoch": 0.48,
200
  "eval_accuracy": 0.5398147531152129,
201
+ "eval_combined_score": 0.49477075635813,
202
+ "eval_f1": 0.4751263005883661,
203
  "eval_loss": 0.9589501619338989,
204
+ "eval_precision": 0.4243272186137281,
205
+ "eval_recall": 0.5398147531152129,
206
+ "eval_runtime": 49.0847,
207
+ "eval_samples_per_second": 611.473,
208
+ "eval_steps_per_second": 1.202,
209
  "step": 160
210
  },
211
  {
 
223
  {
224
  "epoch": 0.54,
225
  "eval_accuracy": 0.5423469047777704,
226
+ "eval_combined_score": 0.49718028497336725,
227
+ "eval_f1": 0.4771728160733735,
228
  "eval_loss": 0.953514814376831,
229
+ "eval_precision": 0.42685451426455484,
230
+ "eval_recall": 0.5423469047777704,
231
+ "eval_runtime": 49.0032,
232
+ "eval_samples_per_second": 612.491,
233
+ "eval_steps_per_second": 1.204,
234
  "step": 180
235
  },
236
  {
 
248
  {
249
  "epoch": 0.6,
250
  "eval_accuracy": 0.567201972412874,
251
+ "eval_combined_score": 0.5200447629299639,
252
+ "eval_f1": 0.49911443945569844,
253
  "eval_loss": 0.9273685812950134,
254
+ "eval_precision": 0.44666066743840943,
255
+ "eval_recall": 0.567201972412874,
256
+ "eval_runtime": 49.1912,
257
+ "eval_samples_per_second": 610.15,
258
+ "eval_steps_per_second": 1.199,
259
  "step": 200
260
  },
261
  {
 
273
  {
274
  "epoch": 0.66,
275
  "eval_accuracy": 0.573598987139335,
276
+ "eval_combined_score": 0.5278568904198743,
277
+ "eval_f1": 0.5026102591641352,
278
  "eval_loss": 0.912590503692627,
279
+ "eval_precision": 0.4616193282366919,
280
+ "eval_recall": 0.573598987139335,
281
+ "eval_runtime": 49.2154,
282
+ "eval_samples_per_second": 609.849,
283
+ "eval_steps_per_second": 1.199,
284
  "step": 220
285
  },
286
  {
 
298
  {
299
  "epoch": 0.72,
300
  "eval_accuracy": 0.5759645498767242,
301
+ "eval_combined_score": 0.5280177627115059,
302
+ "eval_f1": 0.5069205876947673,
303
  "eval_loss": 0.9052607417106628,
304
+ "eval_precision": 0.45322136339780783,
305
+ "eval_recall": 0.5759645498767242,
306
+ "eval_runtime": 49.1399,
307
+ "eval_samples_per_second": 610.787,
308
+ "eval_steps_per_second": 1.201,
309
  "step": 240
310
  },
311
  {
 
323
  {
324
  "epoch": 0.78,
325
  "eval_accuracy": 0.5766975411474645,
326
+ "eval_combined_score": 0.534067292009198,
327
+ "eval_f1": 0.5018221478654629,
328
  "eval_loss": 0.9178985953330994,
329
+ "eval_precision": 0.4810519378764,
330
+ "eval_recall": 0.5766975411474645,
331
+ "eval_runtime": 49.2092,
332
+ "eval_samples_per_second": 609.927,
333
+ "eval_steps_per_second": 1.199,
334
  "step": 260
335
  },
336
  {
 
348
  {
349
  "epoch": 0.84,
350
  "eval_accuracy": 0.58915839275005,
351
+ "eval_combined_score": 0.5407092882328752,
352
+ "eval_f1": 0.5183161389995846,
353
  "eval_loss": 0.8937407732009888,
354
+ "eval_precision": 0.46620422843181647,
355
+ "eval_recall": 0.58915839275005,
356
+ "eval_runtime": 49.2523,
357
+ "eval_samples_per_second": 609.393,
358
+ "eval_steps_per_second": 1.198,
359
  "step": 280
360
  },
361
  {
 
373
  {
374
  "epoch": 0.9,
375
  "eval_accuracy": 0.613013926834144,
376
+ "eval_combined_score": 0.5923759012239074,
377
+ "eval_f1": 0.5579282363395321,
378
  "eval_loss": 0.8468813300132751,
379
+ "eval_precision": 0.5855475148878095,
380
+ "eval_recall": 0.613013926834144,
381
+ "eval_runtime": 49.1896,
382
+ "eval_samples_per_second": 610.17,
383
+ "eval_steps_per_second": 1.199,
384
  "step": 300
385
  },
386
  {
 
398
  {
399
  "epoch": 0.96,
400
  "eval_accuracy": 0.6046511627906976,
401
+ "eval_combined_score": 0.590510828847508,
402
+ "eval_f1": 0.5352474275558012,
403
  "eval_loss": 0.8615403771400452,
404
+ "eval_precision": 0.6174935622528357,
405
+ "eval_recall": 0.6046511627906976,
406
+ "eval_runtime": 49.37,
407
+ "eval_samples_per_second": 607.94,
408
+ "eval_steps_per_second": 1.195,
409
  "step": 320
410
  },
411
  {
 
423
  {
424
  "epoch": 1.02,
425
  "eval_accuracy": 0.6439328313453722,
426
+ "eval_combined_score": 0.6332615383344228,
427
+ "eval_f1": 0.6212200513859825,
428
  "eval_loss": 0.7896137237548828,
429
+ "eval_precision": 0.6239604392609642,
430
+ "eval_recall": 0.6439328313453722,
431
+ "eval_runtime": 49.1146,
432
+ "eval_samples_per_second": 611.102,
433
+ "eval_steps_per_second": 1.201,
434
  "step": 340
435
  },
436
  {
 
448
  {
449
  "epoch": 1.08,
450
  "eval_accuracy": 0.5315852602119011,
451
+ "eval_combined_score": 0.5500800253024862,
452
+ "eval_f1": 0.4943799031015135,
453
  "eval_loss": 1.0123510360717773,
454
+ "eval_precision": 0.642769677684629,
455
+ "eval_recall": 0.5315852602119011,
456
+ "eval_runtime": 49.1941,
457
+ "eval_samples_per_second": 610.114,
458
+ "eval_steps_per_second": 1.199,
459
  "step": 360
460
  },
461
  {
 
473
  {
474
  "epoch": 1.14,
475
  "eval_accuracy": 0.6488638635303525,
476
+ "eval_combined_score": 0.648329789222037,
477
+ "eval_f1": 0.6371420074616733,
478
  "eval_loss": 0.791083574295044,
479
+ "eval_precision": 0.6584494223657698,
480
+ "eval_recall": 0.6488638635303525,
481
+ "eval_runtime": 49.3484,
482
+ "eval_samples_per_second": 608.207,
483
+ "eval_steps_per_second": 1.196,
484
  "step": 380
485
  },
486
  {
 
498
  {
499
  "epoch": 1.2,
500
  "eval_accuracy": 0.6699873392416872,
501
+ "eval_combined_score": 0.6621103162794577,
502
+ "eval_f1": 0.6459453754294268,
503
  "eval_loss": 0.7472424507141113,
504
+ "eval_precision": 0.6625212112050299,
505
+ "eval_recall": 0.6699873392416872,
506
+ "eval_runtime": 49.3153,
507
+ "eval_samples_per_second": 608.614,
508
+ "eval_steps_per_second": 1.196,
509
  "step": 400
510
  },
511
  {
 
523
  {
524
  "epoch": 1.26,
525
  "eval_accuracy": 0.6580595721996402,
526
+ "eval_combined_score": 0.6513132521613654,
527
+ "eval_f1": 0.6095365334747936,
528
  "eval_loss": 0.7663838863372803,
529
+ "eval_precision": 0.6795973307713873,
530
+ "eval_recall": 0.6580595721996402,
531
+ "eval_runtime": 49.2143,
532
+ "eval_samples_per_second": 609.863,
533
+ "eval_steps_per_second": 1.199,
534
  "step": 420
535
  },
536
  {
 
548
  {
549
  "epoch": 1.32,
550
  "eval_accuracy": 0.6991737189311654,
551
+ "eval_combined_score": 0.6905198116505921,
552
+ "eval_f1": 0.653333567856404,
553
  "eval_loss": 0.6993714570999146,
554
+ "eval_precision": 0.7103982408836337,
555
+ "eval_recall": 0.6991737189311654,
556
+ "eval_runtime": 49.1465,
557
+ "eval_samples_per_second": 610.704,
558
+ "eval_steps_per_second": 1.2,
559
  "step": 440
560
  },
561
  {
 
573
  {
574
  "epoch": 1.38,
575
  "eval_accuracy": 0.7283600986206437,
576
+ "eval_combined_score": 0.7200537958840363,
577
+ "eval_f1": 0.6890059409106147,
578
  "eval_loss": 0.6510820984840393,
579
+ "eval_precision": 0.7344890453842432,
580
+ "eval_recall": 0.7283600986206437,
581
+ "eval_runtime": 49.0446,
582
+ "eval_samples_per_second": 611.974,
583
+ "eval_steps_per_second": 1.203,
584
  "step": 460
585
  },
586
  {
 
598
  {
599
  "epoch": 1.44,
600
  "eval_accuracy": 0.7576797494502565,
601
+ "eval_combined_score": 0.7514829004826655,
602
+ "eval_f1": 0.7336051419487455,
603
  "eval_loss": 0.5987845063209534,
604
+ "eval_precision": 0.7569669610814035,
605
+ "eval_recall": 0.7576797494502565,
606
+ "eval_runtime": 49.3939,
607
+ "eval_samples_per_second": 607.646,
608
+ "eval_steps_per_second": 1.194,
609
  "step": 480
610
  },
611
  {
 
623
  {
624
  "epoch": 1.5,
625
  "eval_accuracy": 0.7564136736189778,
626
+ "eval_combined_score": 0.7674838523650971,
627
+ "eval_f1": 0.766902713706533,
628
  "eval_loss": 0.5992804765701294,
629
+ "eval_precision": 0.7902053485158999,
630
+ "eval_recall": 0.7564136736189778,
631
+ "eval_runtime": 49.1622,
632
+ "eval_samples_per_second": 610.509,
633
+ "eval_steps_per_second": 1.2,
634
  "step": 500
635
  },
636
  {
 
648
  {
649
  "epoch": 1.57,
650
  "eval_accuracy": 0.8125874591857133,
651
+ "eval_combined_score": 0.8104607682202668,
652
+ "eval_f1": 0.7941544044601274,
653
  "eval_loss": 0.5067983865737915,
654
+ "eval_precision": 0.8225137500495135,
655
+ "eval_recall": 0.8125874591857133,
656
+ "eval_runtime": 49.0571,
657
+ "eval_samples_per_second": 611.817,
658
+ "eval_steps_per_second": 1.203,
659
  "step": 520
660
  },
661
  {
 
673
  {
674
  "epoch": 1.63,
675
  "eval_accuracy": 0.8520023988805224,
676
+ "eval_combined_score": 0.8500052190439381,
677
+ "eval_f1": 0.8448837390352126,
678
  "eval_loss": 0.42726200819015503,
679
+ "eval_precision": 0.8511323393794948,
680
+ "eval_recall": 0.8520023988805224,
681
+ "eval_runtime": 49.0407,
682
+ "eval_samples_per_second": 612.023,
683
+ "eval_steps_per_second": 1.203,
684
  "step": 540
685
  },
686
  {
 
698
  {
699
  "epoch": 1.69,
700
  "eval_accuracy": 0.8721929766109149,
701
+ "eval_combined_score": 0.8711433017214012,
702
+ "eval_f1": 0.8698654313719106,
703
  "eval_loss": 0.37960025668144226,
704
+ "eval_precision": 0.8703218222918644,
705
+ "eval_recall": 0.8721929766109149,
706
+ "eval_runtime": 49.6519,
707
+ "eval_samples_per_second": 604.489,
708
+ "eval_steps_per_second": 1.188,
709
  "step": 560
710
  },
711
  {
 
723
  {
724
  "epoch": 1.75,
725
  "eval_accuracy": 0.8756580262544146,
726
+ "eval_combined_score": 0.876232023467392,
727
+ "eval_f1": 0.8758457824110393,
728
  "eval_loss": 0.3854842483997345,
729
+ "eval_precision": 0.8777662589496996,
730
+ "eval_recall": 0.8756580262544146,
731
+ "eval_runtime": 49.2229,
732
+ "eval_samples_per_second": 609.757,
733
+ "eval_steps_per_second": 1.199,
734
  "step": 580
735
  },
736
  {
 
748
  {
749
  "epoch": 1.81,
750
  "eval_accuracy": 0.8893849536882789,
751
+ "eval_combined_score": 0.8889004932680924,
752
+ "eval_f1": 0.8883136621852403,
753
  "eval_loss": 0.35855188965797424,
754
+ "eval_precision": 0.8885184035105714,
755
+ "eval_recall": 0.8893849536882789,
756
+ "eval_runtime": 49.2611,
757
+ "eval_samples_per_second": 609.285,
758
+ "eval_steps_per_second": 1.198,
759
  "step": 600
760
  },
761
  {
 
773
  {
774
  "epoch": 1.87,
775
  "eval_accuracy": 0.8971813153861531,
776
+ "eval_combined_score": 0.8972621407363919,
777
+ "eval_f1": 0.8972738562660447,
778
  "eval_loss": 0.321043998003006,
779
+ "eval_precision": 0.8974120759072168,
780
+ "eval_recall": 0.8971813153861531,
781
+ "eval_runtime": 49.4334,
782
+ "eval_samples_per_second": 607.16,
783
+ "eval_steps_per_second": 1.194,
784
  "step": 620
785
  },
786
  {
 
798
  {
799
  "epoch": 1.93,
800
  "eval_accuracy": 0.9035450123275804,
801
+ "eval_combined_score": 0.9033726098964578,
802
+ "eval_f1": 0.9030761902479666,
803
  "eval_loss": 0.3006099760532379,
804
+ "eval_precision": 0.903324224682704,
805
+ "eval_recall": 0.9035450123275804,
806
+ "eval_runtime": 49.1842,
807
+ "eval_samples_per_second": 610.237,
808
+ "eval_steps_per_second": 1.2,
809
  "step": 640
810
  },
811
  {
 
823
  {
824
  "epoch": 1.99,
825
  "eval_accuracy": 0.9014126740854268,
826
+ "eval_combined_score": 0.9026415340332592,
827
+ "eval_f1": 0.9025066760774267,
828
  "eval_loss": 0.30540063977241516,
829
+ "eval_precision": 0.9052341118847564,
830
+ "eval_recall": 0.9014126740854268,
831
+ "eval_runtime": 49.4063,
832
+ "eval_samples_per_second": 607.494,
833
+ "eval_steps_per_second": 1.194,
834
  "step": 660
835
  },
836
  {
 
848
  {
849
  "epoch": 2.05,
850
  "eval_accuracy": 0.8912507496501633,
851
+ "eval_combined_score": 0.8911549520027604,
852
+ "eval_f1": 0.8866402874920362,
853
  "eval_loss": 0.3174073100090027,
854
+ "eval_precision": 0.895478021218679,
855
+ "eval_recall": 0.8912507496501633,
856
+ "eval_runtime": 49.3463,
857
+ "eval_samples_per_second": 608.232,
858
+ "eval_steps_per_second": 1.196,
859
  "step": 680
860
  },
861
  {
 
873
  {
874
  "epoch": 2.11,
875
  "eval_accuracy": 0.9122076364363297,
876
+ "eval_combined_score": 0.9124808443428073,
877
+ "eval_f1": 0.9125293911738805,
878
  "eval_loss": 0.2769884169101715,
879
+ "eval_precision": 0.9129787133246888,
880
+ "eval_recall": 0.9122076364363297,
881
+ "eval_runtime": 49.3634,
882
+ "eval_samples_per_second": 608.021,
883
+ "eval_steps_per_second": 1.195,
884
  "step": 700
885
  },
886
  {
 
898
  {
899
  "epoch": 2.17,
900
  "eval_accuracy": 0.9062437529153062,
901
+ "eval_combined_score": 0.9068222505646787,
902
+ "eval_f1": 0.9054896753410417,
903
  "eval_loss": 0.2979203760623932,
904
+ "eval_precision": 0.9093118210870608,
905
+ "eval_recall": 0.9062437529153062,
906
+ "eval_runtime": 49.0419,
907
+ "eval_samples_per_second": 612.007,
908
+ "eval_steps_per_second": 1.203,
909
  "step": 720
910
  },
911
  {
 
923
  {
924
  "epoch": 2.23,
925
  "eval_accuracy": 0.8997801026187779,
926
+ "eval_combined_score": 0.9003031124352014,
927
+ "eval_f1": 0.8971406374938125,
928
  "eval_loss": 0.29734131693840027,
929
+ "eval_precision": 0.9045116070094374,
930
+ "eval_recall": 0.8997801026187779,
931
+ "eval_runtime": 49.1818,
932
+ "eval_samples_per_second": 610.267,
933
+ "eval_steps_per_second": 1.2,
934
  "step": 740
935
  },
936
  {
 
948
  {
949
  "epoch": 2.29,
950
  "eval_accuracy": 0.9221696541613914,
951
+ "eval_combined_score": 0.9221674217032323,
952
+ "eval_f1": 0.922071694968714,
953
  "eval_loss": 0.24671417474746704,
954
+ "eval_precision": 0.9222586835214323,
955
+ "eval_recall": 0.9221696541613914,
956
+ "eval_runtime": 49.2126,
957
+ "eval_samples_per_second": 609.884,
958
+ "eval_steps_per_second": 1.199,
959
  "step": 760
960
  },
961
  {
 
973
  {
974
  "epoch": 2.35,
975
  "eval_accuracy": 0.9113080562404211,
976
+ "eval_combined_score": 0.9131963179862455,
977
+ "eval_f1": 0.9128361841568111,
978
  "eval_loss": 0.2760636806488037,
979
+ "eval_precision": 0.917332975307328,
980
+ "eval_recall": 0.9113080562404211,
981
+ "eval_runtime": 49.2008,
982
+ "eval_samples_per_second": 610.031,
983
+ "eval_steps_per_second": 1.199,
984
  "step": 780
985
  },
986
  {
 
998
  {
999
  "epoch": 2.41,
1000
  "eval_accuracy": 0.9260011994402612,
1001
+ "eval_combined_score": 0.9259453001449679,
1002
+ "eval_f1": 0.9257439757889774,
1003
  "eval_loss": 0.24102580547332764,
1004
+ "eval_precision": 0.9260348259103722,
1005
+ "eval_recall": 0.9260011994402612,
1006
+ "eval_runtime": 49.2957,
1007
+ "eval_samples_per_second": 608.856,
1008
+ "eval_steps_per_second": 1.197,
1009
  "step": 800
1010
  },
1011
  {
 
1023
  {
1024
  "epoch": 2.47,
1025
  "eval_accuracy": 0.922103018591324,
1026
+ "eval_combined_score": 0.9220769784769673,
1027
+ "eval_f1": 0.9212994658547122,
1028
  "eval_loss": 0.2446586638689041,
1029
+ "eval_precision": 0.9228024108705088,
1030
+ "eval_recall": 0.922103018591324,
1031
+ "eval_runtime": 48.9946,
1032
+ "eval_samples_per_second": 612.598,
1033
+ "eval_steps_per_second": 1.204,
1034
  "step": 820
1035
  },
1036
  {
 
1048
  {
1049
  "epoch": 2.53,
1050
  "eval_accuracy": 0.9237355900579729,
1051
+ "eval_combined_score": 0.9240135925879284,
1052
+ "eval_f1": 0.9231767824872119,
1053
  "eval_loss": 0.2475174069404602,
1054
+ "eval_precision": 0.9254064077485559,
1055
+ "eval_recall": 0.9237355900579729,
1056
+ "eval_runtime": 49.3973,
1057
+ "eval_samples_per_second": 607.604,
1058
+ "eval_steps_per_second": 1.194,
1059
  "step": 840
1060
  },
1061
  {
 
1073
  {
1074
  "epoch": 2.59,
1075
  "eval_accuracy": 0.9265342840007996,
1076
+ "eval_combined_score": 0.9265368337935618,
1077
+ "eval_f1": 0.9258759190762016,
1078
  "eval_loss": 0.2590079605579376,
1079
+ "eval_precision": 0.927202848096446,
1080
+ "eval_recall": 0.9265342840007996,
1081
+ "eval_runtime": 48.961,
1082
+ "eval_samples_per_second": 613.019,
1083
+ "eval_steps_per_second": 1.205,
1084
  "step": 860
1085
  },
1086
  {
 
1098
  {
1099
  "epoch": 2.65,
1100
  "eval_accuracy": 0.930032651429333,
1101
+ "eval_combined_score": 0.9305237768064085,
1102
+ "eval_f1": 0.9305263249699511,
1103
  "eval_loss": 0.22479340434074402,
1104
+ "eval_precision": 0.9315034793970168,
1105
+ "eval_recall": 0.930032651429333,
1106
+ "eval_runtime": 49.2223,
1107
+ "eval_samples_per_second": 609.764,
1108
+ "eval_steps_per_second": 1.199,
1109
  "step": 880
1110
  },
1111
  {
 
1123
  {
1124
  "epoch": 2.71,
1125
  "eval_accuracy": 0.9273005930565736,
1126
+ "eval_combined_score": 0.9281516215920332,
1127
+ "eval_f1": 0.9280796047956419,
1128
  "eval_loss": 0.22854498028755188,
1129
+ "eval_precision": 0.9299256954593439,
1130
+ "eval_recall": 0.9273005930565736,
1131
+ "eval_runtime": 49.2291,
1132
+ "eval_samples_per_second": 609.68,
1133
+ "eval_steps_per_second": 1.198,
1134
  "step": 900
1135
  }
1136
  ],
1137
  "logging_steps": 10,
1138
+ "max_steps": 1660,
1139
+ "num_train_epochs": 5,
1140
  "save_steps": 100,
1141
+ "total_flos": 1.2123845307437875e+17,
1142
  "trial_name": null,
1143
  "trial_params": null
1144
  }
checkpoint-900/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f1be80ebe52f6e43af0b8aa087e72fad77310d5998b6e0b8f66a6a1d53be7b7
3
  size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae98e7de1b05a570517ae68653e3c31b639a52c739d05197601f467f38b01c66
3
  size 4536