motheecreator commited on
Commit
95ec253
1 Parent(s): d6409de

End of training

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.7052103650041794
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the image_folder dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 1.1280
36
- - Accuracy: 0.7052
37
 
38
  ## Model description
39
 
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.7057676232933965
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [google/vit-base-patch16-224-in21k](https://huggingface.co/google/vit-base-patch16-224-in21k) on the image_folder dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.9803
36
+ - Accuracy: 0.7058
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 4.99,
3
- "eval_accuracy": 0.6982446363889663,
4
- "eval_loss": 0.8523625135421753,
5
- "eval_runtime": 60.7812,
6
- "eval_samples_per_second": 118.096,
7
- "eval_steps_per_second": 3.702,
8
- "total_flos": 1.1101379964762415e+19,
9
- "train_loss": 0.7277007077421461,
10
- "train_runtime": 3200.5261,
11
- "train_samples_per_second": 44.85,
12
- "train_steps_per_second": 0.35
13
  }
 
1
  {
2
+ "epoch": 9.98,
3
+ "eval_accuracy": 0.7057676232933965,
4
+ "eval_loss": 0.9803113341331482,
5
+ "eval_runtime": 60.2511,
6
+ "eval_samples_per_second": 119.135,
7
+ "eval_steps_per_second": 3.734,
8
+ "total_flos": 2.2200667552042852e+19,
9
+ "train_loss": 0.27600424638284105,
10
+ "train_runtime": 6453.2956,
11
+ "train_samples_per_second": 44.487,
12
+ "train_steps_per_second": 0.347
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.99,
3
- "eval_accuracy": 0.6982446363889663,
4
- "eval_loss": 0.8523625135421753,
5
- "eval_runtime": 60.7812,
6
- "eval_samples_per_second": 118.096,
7
- "eval_steps_per_second": 3.702
8
  }
 
1
  {
2
+ "epoch": 9.98,
3
+ "eval_accuracy": 0.7057676232933965,
4
+ "eval_loss": 0.9803113341331482,
5
+ "eval_runtime": 60.2511,
6
+ "eval_samples_per_second": 119.135,
7
+ "eval_steps_per_second": 3.734
8
  }
runs/Apr30_00-59-58_786022f41cde/events.out.tfevents.1714445352.786022f41cde.42.6 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf8dc49760c3ee0adee024a5228dc39f7dace6ad559e1699aef2e9652b8975c
3
+ size 411
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 4.99,
3
- "total_flos": 1.1101379964762415e+19,
4
- "train_loss": 0.7277007077421461,
5
- "train_runtime": 3200.5261,
6
- "train_samples_per_second": 44.85,
7
- "train_steps_per_second": 0.35
8
  }
 
1
  {
2
+ "epoch": 9.98,
3
+ "total_flos": 2.2200667552042852e+19,
4
+ "train_loss": 0.27600424638284105,
5
+ "train_runtime": 6453.2956,
6
+ "train_samples_per_second": 44.487,
7
+ "train_steps_per_second": 0.347
8
  }
trainer_state.json CHANGED
@@ -1,746 +1,1463 @@
1
  {
2
- "best_metric": 0.6982446363889663,
3
- "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned/checkpoint-1120",
4
- "epoch": 4.988864142538976,
5
  "eval_steps": 500,
6
- "global_step": 1120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.04,
13
- "learning_rate": 4.464285714285715e-06,
14
- "loss": 1.0215,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.09,
19
- "learning_rate": 8.92857142857143e-06,
20
- "loss": 1.0059,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.13,
25
- "learning_rate": 1.3392857142857144e-05,
26
- "loss": 1.0021,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.18,
31
- "learning_rate": 1.785714285714286e-05,
32
- "loss": 1.0043,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.22,
37
- "learning_rate": 2.2321428571428575e-05,
38
- "loss": 0.975,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.27,
43
- "learning_rate": 2.6785714285714288e-05,
44
- "loss": 0.9459,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.31,
49
- "learning_rate": 3.125e-05,
50
- "loss": 0.9092,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.36,
55
- "learning_rate": 3.571428571428572e-05,
56
- "loss": 0.911,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.4,
61
- "learning_rate": 4.017857142857143e-05,
62
- "loss": 0.961,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.45,
67
- "learning_rate": 4.464285714285715e-05,
68
- "loss": 0.9321,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.49,
73
- "learning_rate": 4.910714285714286e-05,
74
- "loss": 0.9215,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.53,
79
- "learning_rate": 4.960317460317461e-05,
80
- "loss": 0.8922,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.58,
85
- "learning_rate": 4.910714285714286e-05,
86
- "loss": 0.9519,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.62,
91
- "learning_rate": 4.8611111111111115e-05,
92
- "loss": 0.8926,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.67,
97
- "learning_rate": 4.811507936507937e-05,
98
- "loss": 0.8787,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.71,
103
- "learning_rate": 4.761904761904762e-05,
104
- "loss": 0.8783,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.76,
109
- "learning_rate": 4.7123015873015876e-05,
110
- "loss": 0.8611,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.8,
115
- "learning_rate": 4.662698412698413e-05,
116
- "loss": 0.9162,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.85,
121
- "learning_rate": 4.613095238095239e-05,
122
- "loss": 0.8853,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.89,
127
- "learning_rate": 4.563492063492064e-05,
128
- "loss": 0.8687,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.94,
133
- "learning_rate": 4.5138888888888894e-05,
134
- "loss": 0.8379,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.98,
139
- "learning_rate": 4.464285714285715e-05,
140
- "loss": 0.8333,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 1.0,
145
- "eval_accuracy": 0.6473948174979103,
146
- "eval_loss": 0.9670450687408447,
147
- "eval_runtime": 60.2016,
148
- "eval_samples_per_second": 119.233,
149
- "eval_steps_per_second": 3.737,
150
  "step": 224
151
  },
152
  {
153
  "epoch": 1.02,
154
- "learning_rate": 4.41468253968254e-05,
155
- "loss": 0.8652,
156
  "step": 230
157
  },
158
  {
159
  "epoch": 1.07,
160
- "learning_rate": 4.3650793650793655e-05,
161
- "loss": 0.8686,
162
  "step": 240
163
  },
164
  {
165
  "epoch": 1.11,
166
- "learning_rate": 4.315476190476191e-05,
167
- "loss": 0.8604,
168
  "step": 250
169
  },
170
  {
171
  "epoch": 1.16,
172
- "learning_rate": 4.265873015873016e-05,
173
- "loss": 0.8377,
174
  "step": 260
175
  },
176
  {
177
  "epoch": 1.2,
178
- "learning_rate": 4.2162698412698416e-05,
179
- "loss": 0.8151,
180
  "step": 270
181
  },
182
  {
183
  "epoch": 1.25,
184
- "learning_rate": 4.166666666666667e-05,
185
- "loss": 0.8818,
186
  "step": 280
187
  },
188
  {
189
  "epoch": 1.29,
190
- "learning_rate": 4.117063492063492e-05,
191
- "loss": 0.8108,
192
  "step": 290
193
  },
194
  {
195
  "epoch": 1.34,
196
- "learning_rate": 4.067460317460318e-05,
197
- "loss": 0.8408,
198
  "step": 300
199
  },
200
  {
201
  "epoch": 1.38,
202
- "learning_rate": 4.017857142857143e-05,
203
- "loss": 0.8801,
204
  "step": 310
205
  },
206
  {
207
  "epoch": 1.43,
208
- "learning_rate": 3.968253968253968e-05,
209
- "loss": 0.8194,
210
  "step": 320
211
  },
212
  {
213
  "epoch": 1.47,
214
- "learning_rate": 3.918650793650794e-05,
215
- "loss": 0.862,
216
  "step": 330
217
  },
218
  {
219
  "epoch": 1.51,
220
- "learning_rate": 3.8690476190476195e-05,
221
- "loss": 0.8165,
222
  "step": 340
223
  },
224
  {
225
  "epoch": 1.56,
226
- "learning_rate": 3.8194444444444444e-05,
227
- "loss": 0.8342,
228
  "step": 350
229
  },
230
  {
231
  "epoch": 1.6,
232
- "learning_rate": 3.76984126984127e-05,
233
- "loss": 0.8397,
234
  "step": 360
235
  },
236
  {
237
  "epoch": 1.65,
238
- "learning_rate": 3.7202380952380956e-05,
239
- "loss": 0.8318,
240
  "step": 370
241
  },
242
  {
243
  "epoch": 1.69,
244
- "learning_rate": 3.6706349206349205e-05,
245
- "loss": 0.792,
246
  "step": 380
247
  },
248
  {
249
  "epoch": 1.74,
250
- "learning_rate": 3.621031746031746e-05,
251
- "loss": 0.844,
252
  "step": 390
253
  },
254
  {
255
  "epoch": 1.78,
256
- "learning_rate": 3.571428571428572e-05,
257
- "loss": 0.8179,
258
  "step": 400
259
  },
260
  {
261
  "epoch": 1.83,
262
- "learning_rate": 3.521825396825397e-05,
263
- "loss": 0.7968,
264
  "step": 410
265
  },
266
  {
267
  "epoch": 1.87,
268
- "learning_rate": 3.472222222222222e-05,
269
- "loss": 0.7903,
270
  "step": 420
271
  },
272
  {
273
  "epoch": 1.92,
274
- "learning_rate": 3.422619047619048e-05,
275
- "loss": 0.7977,
276
  "step": 430
277
  },
278
  {
279
  "epoch": 1.96,
280
- "learning_rate": 3.3730158730158734e-05,
281
- "loss": 0.7972,
282
  "step": 440
283
  },
284
  {
285
  "epoch": 2.0,
286
- "eval_accuracy": 0.6653663973251602,
287
- "eval_loss": 0.9123018383979797,
288
- "eval_runtime": 59.7062,
289
- "eval_samples_per_second": 120.222,
290
- "eval_steps_per_second": 3.768,
291
  "step": 449
292
  },
293
  {
294
  "epoch": 2.0,
295
- "learning_rate": 3.3234126984126983e-05,
296
- "loss": 0.8381,
297
  "step": 450
298
  },
299
  {
300
  "epoch": 2.05,
301
- "learning_rate": 3.273809523809524e-05,
302
- "loss": 0.7561,
303
  "step": 460
304
  },
305
  {
306
  "epoch": 2.09,
307
- "learning_rate": 3.2242063492063495e-05,
308
- "loss": 0.7124,
309
  "step": 470
310
  },
311
  {
312
  "epoch": 2.14,
313
- "learning_rate": 3.1746031746031745e-05,
314
- "loss": 0.7322,
315
  "step": 480
316
  },
317
  {
318
  "epoch": 2.18,
319
- "learning_rate": 3.125e-05,
320
- "loss": 0.7348,
321
  "step": 490
322
  },
323
  {
324
  "epoch": 2.23,
325
- "learning_rate": 3.075396825396826e-05,
326
- "loss": 0.7468,
327
  "step": 500
328
  },
329
  {
330
  "epoch": 2.27,
331
- "learning_rate": 3.0257936507936506e-05,
332
- "loss": 0.7106,
333
  "step": 510
334
  },
335
  {
336
  "epoch": 2.32,
337
- "learning_rate": 2.9761904761904762e-05,
338
- "loss": 0.7585,
339
  "step": 520
340
  },
341
  {
342
  "epoch": 2.36,
343
- "learning_rate": 2.9265873015873018e-05,
344
- "loss": 0.7383,
345
  "step": 530
346
  },
347
  {
348
  "epoch": 2.41,
349
- "learning_rate": 2.876984126984127e-05,
350
- "loss": 0.7238,
351
  "step": 540
352
  },
353
  {
354
  "epoch": 2.45,
355
- "learning_rate": 2.8273809523809523e-05,
356
- "loss": 0.7042,
357
  "step": 550
358
  },
359
  {
360
  "epoch": 2.49,
361
- "learning_rate": 2.777777777777778e-05,
362
- "loss": 0.7076,
363
  "step": 560
364
  },
365
  {
366
  "epoch": 2.54,
367
- "learning_rate": 2.7281746031746032e-05,
368
- "loss": 0.7441,
369
  "step": 570
370
  },
371
  {
372
  "epoch": 2.58,
373
- "learning_rate": 2.6785714285714288e-05,
374
- "loss": 0.6887,
375
  "step": 580
376
  },
377
  {
378
  "epoch": 2.63,
379
- "learning_rate": 2.628968253968254e-05,
380
- "loss": 0.7216,
381
  "step": 590
382
  },
383
  {
384
  "epoch": 2.67,
385
- "learning_rate": 2.5793650793650796e-05,
386
- "loss": 0.71,
387
  "step": 600
388
  },
389
  {
390
  "epoch": 2.72,
391
- "learning_rate": 2.529761904761905e-05,
392
- "loss": 0.7265,
393
  "step": 610
394
  },
395
  {
396
  "epoch": 2.76,
397
- "learning_rate": 2.4801587301587305e-05,
398
- "loss": 0.697,
399
  "step": 620
400
  },
401
  {
402
  "epoch": 2.81,
403
- "learning_rate": 2.4305555555555558e-05,
404
- "loss": 0.7578,
405
  "step": 630
406
  },
407
  {
408
  "epoch": 2.85,
409
- "learning_rate": 2.380952380952381e-05,
410
- "loss": 0.6838,
411
  "step": 640
412
  },
413
  {
414
  "epoch": 2.9,
415
- "learning_rate": 2.3313492063492066e-05,
416
- "loss": 0.7082,
417
  "step": 650
418
  },
419
  {
420
  "epoch": 2.94,
421
- "learning_rate": 2.281746031746032e-05,
422
- "loss": 0.6824,
423
  "step": 660
424
  },
425
  {
426
  "epoch": 2.98,
427
- "learning_rate": 2.2321428571428575e-05,
428
- "loss": 0.667,
429
  "step": 670
430
  },
431
  {
432
  "epoch": 3.0,
433
- "eval_accuracy": 0.6886319308999721,
434
- "eval_loss": 0.8676984906196594,
435
- "eval_runtime": 59.6434,
436
- "eval_samples_per_second": 120.349,
437
- "eval_steps_per_second": 3.772,
438
  "step": 673
439
  },
440
  {
441
  "epoch": 3.03,
442
- "learning_rate": 2.1825396825396827e-05,
443
- "loss": 0.6546,
444
  "step": 680
445
  },
446
  {
447
  "epoch": 3.07,
448
- "learning_rate": 2.132936507936508e-05,
449
- "loss": 0.6349,
450
  "step": 690
451
  },
452
  {
453
  "epoch": 3.12,
454
- "learning_rate": 2.0833333333333336e-05,
455
- "loss": 0.6229,
456
  "step": 700
457
  },
458
  {
459
  "epoch": 3.16,
460
- "learning_rate": 2.033730158730159e-05,
461
- "loss": 0.653,
462
  "step": 710
463
  },
464
  {
465
  "epoch": 3.21,
466
- "learning_rate": 1.984126984126984e-05,
467
- "loss": 0.6433,
468
  "step": 720
469
  },
470
  {
471
  "epoch": 3.25,
472
- "learning_rate": 1.9345238095238097e-05,
473
- "loss": 0.6291,
474
  "step": 730
475
  },
476
  {
477
  "epoch": 3.3,
478
- "learning_rate": 1.884920634920635e-05,
479
- "loss": 0.6621,
480
  "step": 740
481
  },
482
  {
483
  "epoch": 3.34,
484
- "learning_rate": 1.8353174603174602e-05,
485
- "loss": 0.6116,
486
  "step": 750
487
  },
488
  {
489
  "epoch": 3.39,
490
- "learning_rate": 1.785714285714286e-05,
491
- "loss": 0.6441,
492
  "step": 760
493
  },
494
  {
495
  "epoch": 3.43,
496
- "learning_rate": 1.736111111111111e-05,
497
- "loss": 0.642,
498
  "step": 770
499
  },
500
  {
501
  "epoch": 3.47,
502
- "learning_rate": 1.6865079365079367e-05,
503
- "loss": 0.6129,
504
  "step": 780
505
  },
506
  {
507
  "epoch": 3.52,
508
- "learning_rate": 1.636904761904762e-05,
509
- "loss": 0.5959,
510
  "step": 790
511
  },
512
  {
513
  "epoch": 3.56,
514
- "learning_rate": 1.5873015873015872e-05,
515
- "loss": 0.6257,
516
  "step": 800
517
  },
518
  {
519
  "epoch": 3.61,
520
- "learning_rate": 1.537698412698413e-05,
521
- "loss": 0.5942,
522
  "step": 810
523
  },
524
  {
525
  "epoch": 3.65,
526
- "learning_rate": 1.4880952380952381e-05,
527
- "loss": 0.5848,
528
  "step": 820
529
  },
530
  {
531
  "epoch": 3.7,
532
- "learning_rate": 1.4384920634920635e-05,
533
- "loss": 0.6071,
534
  "step": 830
535
  },
536
  {
537
  "epoch": 3.74,
538
- "learning_rate": 1.388888888888889e-05,
539
- "loss": 0.6009,
540
  "step": 840
541
  },
542
  {
543
  "epoch": 3.79,
544
- "learning_rate": 1.3392857142857144e-05,
545
- "loss": 0.6541,
546
  "step": 850
547
  },
548
  {
549
  "epoch": 3.83,
550
- "learning_rate": 1.2896825396825398e-05,
551
- "loss": 0.6306,
552
  "step": 860
553
  },
554
  {
555
  "epoch": 3.88,
556
- "learning_rate": 1.2400793650793652e-05,
557
- "loss": 0.6398,
558
  "step": 870
559
  },
560
  {
561
  "epoch": 3.92,
562
- "learning_rate": 1.1904761904761905e-05,
563
- "loss": 0.6285,
564
  "step": 880
565
  },
566
  {
567
  "epoch": 3.96,
568
- "learning_rate": 1.140873015873016e-05,
569
- "loss": 0.5729,
570
  "step": 890
571
  },
572
  {
573
  "epoch": 4.0,
574
- "eval_accuracy": 0.6937865700752298,
575
- "eval_loss": 0.8486846685409546,
576
- "eval_runtime": 59.646,
577
- "eval_samples_per_second": 120.343,
578
- "eval_steps_per_second": 3.772,
579
  "step": 898
580
  },
581
  {
582
  "epoch": 4.01,
583
- "learning_rate": 1.0912698412698414e-05,
584
- "loss": 0.5634,
585
  "step": 900
586
  },
587
  {
588
  "epoch": 4.05,
589
- "learning_rate": 1.0416666666666668e-05,
590
- "loss": 0.5553,
591
  "step": 910
592
  },
593
  {
594
  "epoch": 4.1,
595
- "learning_rate": 9.92063492063492e-06,
596
- "loss": 0.5474,
597
  "step": 920
598
  },
599
  {
600
  "epoch": 4.14,
601
- "learning_rate": 9.424603174603175e-06,
602
- "loss": 0.5299,
603
  "step": 930
604
  },
605
  {
606
  "epoch": 4.19,
607
- "learning_rate": 8.92857142857143e-06,
608
- "loss": 0.5541,
609
  "step": 940
610
  },
611
  {
612
  "epoch": 4.23,
613
- "learning_rate": 8.432539682539684e-06,
614
- "loss": 0.5341,
615
  "step": 950
616
  },
617
  {
618
  "epoch": 4.28,
619
- "learning_rate": 7.936507936507936e-06,
620
- "loss": 0.5315,
621
  "step": 960
622
  },
623
  {
624
  "epoch": 4.32,
625
- "learning_rate": 7.4404761904761905e-06,
626
- "loss": 0.5265,
627
  "step": 970
628
  },
629
  {
630
  "epoch": 4.37,
631
- "learning_rate": 6.944444444444445e-06,
632
- "loss": 0.5063,
633
  "step": 980
634
  },
635
  {
636
  "epoch": 4.41,
637
- "learning_rate": 6.448412698412699e-06,
638
- "loss": 0.53,
639
  "step": 990
640
  },
641
  {
642
  "epoch": 4.45,
643
- "learning_rate": 5.9523809523809525e-06,
644
- "loss": 0.5322,
645
  "step": 1000
646
  },
647
  {
648
  "epoch": 4.5,
649
- "learning_rate": 5.456349206349207e-06,
650
- "loss": 0.5397,
651
  "step": 1010
652
  },
653
  {
654
  "epoch": 4.54,
655
- "learning_rate": 4.96031746031746e-06,
656
- "loss": 0.533,
657
  "step": 1020
658
  },
659
  {
660
  "epoch": 4.59,
661
- "learning_rate": 4.464285714285715e-06,
662
- "loss": 0.6067,
663
  "step": 1030
664
  },
665
  {
666
  "epoch": 4.63,
667
- "learning_rate": 3.968253968253968e-06,
668
- "loss": 0.568,
669
  "step": 1040
670
  },
671
  {
672
  "epoch": 4.68,
673
- "learning_rate": 3.4722222222222224e-06,
674
- "loss": 0.5507,
675
  "step": 1050
676
  },
677
  {
678
  "epoch": 4.72,
679
- "learning_rate": 2.9761904761904763e-06,
680
- "loss": 0.5993,
681
  "step": 1060
682
  },
683
  {
684
  "epoch": 4.77,
685
- "learning_rate": 2.48015873015873e-06,
686
- "loss": 0.5343,
687
  "step": 1070
688
  },
689
  {
690
  "epoch": 4.81,
691
- "learning_rate": 1.984126984126984e-06,
692
- "loss": 0.5301,
693
  "step": 1080
694
  },
695
  {
696
  "epoch": 4.86,
697
- "learning_rate": 1.4880952380952381e-06,
698
- "loss": 0.5446,
699
  "step": 1090
700
  },
701
  {
702
  "epoch": 4.9,
703
- "learning_rate": 9.92063492063492e-07,
704
- "loss": 0.5496,
705
  "step": 1100
706
  },
707
  {
708
  "epoch": 4.94,
709
- "learning_rate": 4.96031746031746e-07,
710
- "loss": 0.5198,
711
  "step": 1110
712
  },
713
  {
714
  "epoch": 4.99,
715
- "learning_rate": 0.0,
716
- "loss": 0.5347,
717
  "step": 1120
718
  },
719
  {
720
- "epoch": 4.99,
721
- "eval_accuracy": 0.6982446363889663,
722
- "eval_loss": 0.8523625135421753,
723
- "eval_runtime": 60.8207,
724
- "eval_samples_per_second": 118.019,
725
- "eval_steps_per_second": 3.699,
726
- "step": 1120
727
  },
728
  {
729
- "epoch": 4.99,
730
- "step": 1120,
731
- "total_flos": 1.1101379964762415e+19,
732
- "train_loss": 0.7277007077421461,
733
- "train_runtime": 3200.5261,
734
- "train_samples_per_second": 44.85,
735
- "train_steps_per_second": 0.35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
736
  }
737
  ],
738
  "logging_steps": 10,
739
- "max_steps": 1120,
740
  "num_input_tokens_seen": 0,
741
- "num_train_epochs": 5,
742
  "save_steps": 500,
743
- "total_flos": 1.1101379964762415e+19,
744
  "train_batch_size": 32,
745
  "trial_name": null,
746
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7057676232933965,
3
+ "best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned/checkpoint-1122",
4
+ "epoch": 9.977728285077951,
5
  "eval_steps": 500,
6
+ "global_step": 2240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.04,
13
+ "learning_rate": 2.2321428571428573e-06,
14
+ "loss": 0.5702,
15
  "step": 10
16
  },
17
  {
18
  "epoch": 0.09,
19
+ "learning_rate": 4.464285714285715e-06,
20
+ "loss": 0.5531,
21
  "step": 20
22
  },
23
  {
24
  "epoch": 0.13,
25
+ "learning_rate": 6.696428571428572e-06,
26
+ "loss": 0.5328,
27
  "step": 30
28
  },
29
  {
30
  "epoch": 0.18,
31
+ "learning_rate": 8.92857142857143e-06,
32
+ "loss": 0.5412,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.22,
37
+ "learning_rate": 1.1160714285714287e-05,
38
+ "loss": 0.5131,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.27,
43
+ "learning_rate": 1.3392857142857144e-05,
44
+ "loss": 0.5021,
45
  "step": 60
46
  },
47
  {
48
  "epoch": 0.31,
49
+ "learning_rate": 1.5625e-05,
50
+ "loss": 0.4657,
51
  "step": 70
52
  },
53
  {
54
  "epoch": 0.36,
55
+ "learning_rate": 1.785714285714286e-05,
56
+ "loss": 0.4831,
57
  "step": 80
58
  },
59
  {
60
  "epoch": 0.4,
61
+ "learning_rate": 2.0089285714285717e-05,
62
+ "loss": 0.4942,
63
  "step": 90
64
  },
65
  {
66
  "epoch": 0.45,
67
+ "learning_rate": 2.2321428571428575e-05,
68
+ "loss": 0.531,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 0.49,
73
+ "learning_rate": 2.455357142857143e-05,
74
+ "loss": 0.4907,
75
  "step": 110
76
  },
77
  {
78
  "epoch": 0.53,
79
+ "learning_rate": 2.6785714285714288e-05,
80
+ "loss": 0.4482,
81
  "step": 120
82
  },
83
  {
84
  "epoch": 0.58,
85
+ "learning_rate": 2.9017857142857146e-05,
86
+ "loss": 0.5041,
87
  "step": 130
88
  },
89
  {
90
  "epoch": 0.62,
91
+ "learning_rate": 3.125e-05,
92
+ "loss": 0.4777,
93
  "step": 140
94
  },
95
  {
96
  "epoch": 0.67,
97
+ "learning_rate": 3.348214285714286e-05,
98
+ "loss": 0.4692,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 0.71,
103
+ "learning_rate": 3.571428571428572e-05,
104
+ "loss": 0.4646,
105
  "step": 160
106
  },
107
  {
108
  "epoch": 0.76,
109
+ "learning_rate": 3.794642857142857e-05,
110
+ "loss": 0.4503,
111
  "step": 170
112
  },
113
  {
114
  "epoch": 0.8,
115
+ "learning_rate": 4.017857142857143e-05,
116
+ "loss": 0.5269,
117
  "step": 180
118
  },
119
  {
120
  "epoch": 0.85,
121
+ "learning_rate": 4.2410714285714285e-05,
122
+ "loss": 0.4849,
123
  "step": 190
124
  },
125
  {
126
  "epoch": 0.89,
127
+ "learning_rate": 4.464285714285715e-05,
128
+ "loss": 0.4956,
129
  "step": 200
130
  },
131
  {
132
  "epoch": 0.94,
133
+ "learning_rate": 4.6875e-05,
134
+ "loss": 0.4993,
135
  "step": 210
136
  },
137
  {
138
  "epoch": 0.98,
139
+ "learning_rate": 4.910714285714286e-05,
140
+ "loss": 0.4887,
141
  "step": 220
142
  },
143
  {
144
  "epoch": 1.0,
145
+ "eval_accuracy": 0.6776260796879353,
146
+ "eval_loss": 0.9212561845779419,
147
+ "eval_runtime": 60.2714,
148
+ "eval_samples_per_second": 119.095,
149
+ "eval_steps_per_second": 3.733,
150
  "step": 224
151
  },
152
  {
153
  "epoch": 1.02,
154
+ "learning_rate": 4.985119047619048e-05,
155
+ "loss": 0.5157,
156
  "step": 230
157
  },
158
  {
159
  "epoch": 1.07,
160
+ "learning_rate": 4.960317460317461e-05,
161
+ "loss": 0.5364,
162
  "step": 240
163
  },
164
  {
165
  "epoch": 1.11,
166
+ "learning_rate": 4.9355158730158735e-05,
167
+ "loss": 0.5125,
168
  "step": 250
169
  },
170
  {
171
  "epoch": 1.16,
172
+ "learning_rate": 4.910714285714286e-05,
173
+ "loss": 0.5028,
174
  "step": 260
175
  },
176
  {
177
  "epoch": 1.2,
178
+ "learning_rate": 4.8859126984126984e-05,
179
+ "loss": 0.4801,
180
  "step": 270
181
  },
182
  {
183
  "epoch": 1.25,
184
+ "learning_rate": 4.8611111111111115e-05,
185
+ "loss": 0.5442,
186
  "step": 280
187
  },
188
  {
189
  "epoch": 1.29,
190
+ "learning_rate": 4.836309523809524e-05,
191
+ "loss": 0.4718,
192
  "step": 290
193
  },
194
  {
195
  "epoch": 1.34,
196
+ "learning_rate": 4.811507936507937e-05,
197
+ "loss": 0.5095,
198
  "step": 300
199
  },
200
  {
201
  "epoch": 1.38,
202
+ "learning_rate": 4.7867063492063496e-05,
203
+ "loss": 0.5151,
204
  "step": 310
205
  },
206
  {
207
  "epoch": 1.43,
208
+ "learning_rate": 4.761904761904762e-05,
209
+ "loss": 0.4909,
210
  "step": 320
211
  },
212
  {
213
  "epoch": 1.47,
214
+ "learning_rate": 4.7371031746031745e-05,
215
+ "loss": 0.4797,
216
  "step": 330
217
  },
218
  {
219
  "epoch": 1.51,
220
+ "learning_rate": 4.7123015873015876e-05,
221
+ "loss": 0.4604,
222
  "step": 340
223
  },
224
  {
225
  "epoch": 1.56,
226
+ "learning_rate": 4.6875e-05,
227
+ "loss": 0.5055,
228
  "step": 350
229
  },
230
  {
231
  "epoch": 1.6,
232
+ "learning_rate": 4.662698412698413e-05,
233
+ "loss": 0.5105,
234
  "step": 360
235
  },
236
  {
237
  "epoch": 1.65,
238
+ "learning_rate": 4.637896825396826e-05,
239
+ "loss": 0.4788,
240
  "step": 370
241
  },
242
  {
243
  "epoch": 1.69,
244
+ "learning_rate": 4.613095238095239e-05,
245
+ "loss": 0.4973,
246
  "step": 380
247
  },
248
  {
249
  "epoch": 1.74,
250
+ "learning_rate": 4.5882936507936506e-05,
251
+ "loss": 0.5572,
252
  "step": 390
253
  },
254
  {
255
  "epoch": 1.78,
256
+ "learning_rate": 4.563492063492064e-05,
257
+ "loss": 0.5182,
258
  "step": 400
259
  },
260
  {
261
  "epoch": 1.83,
262
+ "learning_rate": 4.538690476190476e-05,
263
+ "loss": 0.4698,
264
  "step": 410
265
  },
266
  {
267
  "epoch": 1.87,
268
+ "learning_rate": 4.5138888888888894e-05,
269
+ "loss": 0.4881,
270
  "step": 420
271
  },
272
  {
273
  "epoch": 1.92,
274
+ "learning_rate": 4.489087301587302e-05,
275
+ "loss": 0.4835,
276
  "step": 430
277
  },
278
  {
279
  "epoch": 1.96,
280
+ "learning_rate": 4.464285714285715e-05,
281
+ "loss": 0.4969,
282
  "step": 440
283
  },
284
  {
285
  "epoch": 2.0,
286
+ "eval_accuracy": 0.6926720534967957,
287
+ "eval_loss": 0.9037817716598511,
288
+ "eval_runtime": 60.4491,
289
+ "eval_samples_per_second": 118.744,
290
+ "eval_steps_per_second": 3.722,
291
  "step": 449
292
  },
293
  {
294
  "epoch": 2.0,
295
+ "learning_rate": 4.439484126984127e-05,
296
+ "loss": 0.4971,
297
  "step": 450
298
  },
299
  {
300
  "epoch": 2.05,
301
+ "learning_rate": 4.41468253968254e-05,
302
+ "loss": 0.4229,
303
  "step": 460
304
  },
305
  {
306
  "epoch": 2.09,
307
+ "learning_rate": 4.3898809523809523e-05,
308
+ "loss": 0.3694,
309
  "step": 470
310
  },
311
  {
312
  "epoch": 2.14,
313
+ "learning_rate": 4.3650793650793655e-05,
314
+ "loss": 0.4295,
315
  "step": 480
316
  },
317
  {
318
  "epoch": 2.18,
319
+ "learning_rate": 4.340277777777778e-05,
320
+ "loss": 0.3998,
321
  "step": 490
322
  },
323
  {
324
  "epoch": 2.23,
325
+ "learning_rate": 4.315476190476191e-05,
326
+ "loss": 0.4112,
327
  "step": 500
328
  },
329
  {
330
  "epoch": 2.27,
331
+ "learning_rate": 4.290674603174603e-05,
332
+ "loss": 0.3728,
333
  "step": 510
334
  },
335
  {
336
  "epoch": 2.32,
337
+ "learning_rate": 4.265873015873016e-05,
338
+ "loss": 0.4243,
339
  "step": 520
340
  },
341
  {
342
  "epoch": 2.36,
343
+ "learning_rate": 4.2410714285714285e-05,
344
+ "loss": 0.4209,
345
  "step": 530
346
  },
347
  {
348
  "epoch": 2.41,
349
+ "learning_rate": 4.2162698412698416e-05,
350
+ "loss": 0.3886,
351
  "step": 540
352
  },
353
  {
354
  "epoch": 2.45,
355
+ "learning_rate": 4.191468253968254e-05,
356
+ "loss": 0.3707,
357
  "step": 550
358
  },
359
  {
360
  "epoch": 2.49,
361
+ "learning_rate": 4.166666666666667e-05,
362
+ "loss": 0.4203,
363
  "step": 560
364
  },
365
  {
366
  "epoch": 2.54,
367
+ "learning_rate": 4.14186507936508e-05,
368
+ "loss": 0.4213,
369
  "step": 570
370
  },
371
  {
372
  "epoch": 2.58,
373
+ "learning_rate": 4.117063492063492e-05,
374
+ "loss": 0.3677,
375
  "step": 580
376
  },
377
  {
378
  "epoch": 2.63,
379
+ "learning_rate": 4.0922619047619046e-05,
380
+ "loss": 0.4198,
381
  "step": 590
382
  },
383
  {
384
  "epoch": 2.67,
385
+ "learning_rate": 4.067460317460318e-05,
386
+ "loss": 0.4307,
387
  "step": 600
388
  },
389
  {
390
  "epoch": 2.72,
391
+ "learning_rate": 4.04265873015873e-05,
392
+ "loss": 0.4073,
393
  "step": 610
394
  },
395
  {
396
  "epoch": 2.76,
397
+ "learning_rate": 4.017857142857143e-05,
398
+ "loss": 0.3957,
399
  "step": 620
400
  },
401
  {
402
  "epoch": 2.81,
403
+ "learning_rate": 3.993055555555556e-05,
404
+ "loss": 0.479,
405
  "step": 630
406
  },
407
  {
408
  "epoch": 2.85,
409
+ "learning_rate": 3.968253968253968e-05,
410
+ "loss": 0.4069,
411
  "step": 640
412
  },
413
  {
414
  "epoch": 2.9,
415
+ "learning_rate": 3.943452380952381e-05,
416
+ "loss": 0.417,
417
  "step": 650
418
  },
419
  {
420
  "epoch": 2.94,
421
+ "learning_rate": 3.918650793650794e-05,
422
+ "loss": 0.4129,
423
  "step": 660
424
  },
425
  {
426
  "epoch": 2.98,
427
+ "learning_rate": 3.893849206349206e-05,
428
+ "loss": 0.4095,
429
  "step": 670
430
  },
431
  {
432
  "epoch": 3.0,
433
+ "eval_accuracy": 0.6976873780997492,
434
+ "eval_loss": 0.9076758027076721,
435
+ "eval_runtime": 60.0956,
436
+ "eval_samples_per_second": 119.443,
437
+ "eval_steps_per_second": 3.744,
438
  "step": 673
439
  },
440
  {
441
  "epoch": 3.03,
442
+ "learning_rate": 3.8690476190476195e-05,
443
+ "loss": 0.3228,
444
  "step": 680
445
  },
446
  {
447
  "epoch": 3.07,
448
+ "learning_rate": 3.844246031746032e-05,
449
+ "loss": 0.3362,
450
  "step": 690
451
  },
452
  {
453
  "epoch": 3.12,
454
+ "learning_rate": 3.8194444444444444e-05,
455
+ "loss": 0.3026,
456
  "step": 700
457
  },
458
  {
459
  "epoch": 3.16,
460
+ "learning_rate": 3.794642857142857e-05,
461
+ "loss": 0.3237,
462
  "step": 710
463
  },
464
  {
465
  "epoch": 3.21,
466
+ "learning_rate": 3.76984126984127e-05,
467
+ "loss": 0.3192,
468
  "step": 720
469
  },
470
  {
471
  "epoch": 3.25,
472
+ "learning_rate": 3.7450396825396824e-05,
473
+ "loss": 0.3461,
474
  "step": 730
475
  },
476
  {
477
  "epoch": 3.3,
478
+ "learning_rate": 3.7202380952380956e-05,
479
+ "loss": 0.3568,
480
  "step": 740
481
  },
482
  {
483
  "epoch": 3.34,
484
+ "learning_rate": 3.695436507936508e-05,
485
+ "loss": 0.3143,
486
  "step": 750
487
  },
488
  {
489
  "epoch": 3.39,
490
+ "learning_rate": 3.6706349206349205e-05,
491
+ "loss": 0.3381,
492
  "step": 760
493
  },
494
  {
495
  "epoch": 3.43,
496
+ "learning_rate": 3.6458333333333336e-05,
497
+ "loss": 0.3297,
498
  "step": 770
499
  },
500
  {
501
  "epoch": 3.47,
502
+ "learning_rate": 3.621031746031746e-05,
503
+ "loss": 0.3333,
504
  "step": 780
505
  },
506
  {
507
  "epoch": 3.52,
508
+ "learning_rate": 3.5962301587301586e-05,
509
+ "loss": 0.3232,
510
  "step": 790
511
  },
512
  {
513
  "epoch": 3.56,
514
+ "learning_rate": 3.571428571428572e-05,
515
+ "loss": 0.3449,
516
  "step": 800
517
  },
518
  {
519
  "epoch": 3.61,
520
+ "learning_rate": 3.546626984126984e-05,
521
+ "loss": 0.3563,
522
  "step": 810
523
  },
524
  {
525
  "epoch": 3.65,
526
+ "learning_rate": 3.521825396825397e-05,
527
+ "loss": 0.3326,
528
  "step": 820
529
  },
530
  {
531
  "epoch": 3.7,
532
+ "learning_rate": 3.49702380952381e-05,
533
+ "loss": 0.333,
534
  "step": 830
535
  },
536
  {
537
  "epoch": 3.74,
538
+ "learning_rate": 3.472222222222222e-05,
539
+ "loss": 0.3357,
540
  "step": 840
541
  },
542
  {
543
  "epoch": 3.79,
544
+ "learning_rate": 3.4474206349206354e-05,
545
+ "loss": 0.3738,
546
  "step": 850
547
  },
548
  {
549
  "epoch": 3.83,
550
+ "learning_rate": 3.422619047619048e-05,
551
+ "loss": 0.3769,
552
  "step": 860
553
  },
554
  {
555
  "epoch": 3.88,
556
+ "learning_rate": 3.397817460317461e-05,
557
+ "loss": 0.3667,
558
  "step": 870
559
  },
560
  {
561
  "epoch": 3.92,
562
+ "learning_rate": 3.3730158730158734e-05,
563
+ "loss": 0.3459,
564
  "step": 880
565
  },
566
  {
567
  "epoch": 3.96,
568
+ "learning_rate": 3.348214285714286e-05,
569
+ "loss": 0.3344,
570
  "step": 890
571
  },
572
  {
573
  "epoch": 4.0,
574
+ "eval_accuracy": 0.6989412092504876,
575
+ "eval_loss": 0.939809262752533,
576
+ "eval_runtime": 60.4275,
577
+ "eval_samples_per_second": 118.787,
578
+ "eval_steps_per_second": 3.723,
579
  "step": 898
580
  },
581
  {
582
  "epoch": 4.01,
583
+ "learning_rate": 3.3234126984126983e-05,
584
+ "loss": 0.3389,
585
  "step": 900
586
  },
587
  {
588
  "epoch": 4.05,
589
+ "learning_rate": 3.2986111111111115e-05,
590
+ "loss": 0.274,
591
  "step": 910
592
  },
593
  {
594
  "epoch": 4.1,
595
+ "learning_rate": 3.273809523809524e-05,
596
+ "loss": 0.2425,
597
  "step": 920
598
  },
599
  {
600
  "epoch": 4.14,
601
+ "learning_rate": 3.249007936507937e-05,
602
+ "loss": 0.2447,
603
  "step": 930
604
  },
605
  {
606
  "epoch": 4.19,
607
+ "learning_rate": 3.2242063492063495e-05,
608
+ "loss": 0.2604,
609
  "step": 940
610
  },
611
  {
612
  "epoch": 4.23,
613
+ "learning_rate": 3.199404761904762e-05,
614
+ "loss": 0.26,
615
  "step": 950
616
  },
617
  {
618
  "epoch": 4.28,
619
+ "learning_rate": 3.1746031746031745e-05,
620
+ "loss": 0.2547,
621
  "step": 960
622
  },
623
  {
624
  "epoch": 4.32,
625
+ "learning_rate": 3.1498015873015876e-05,
626
+ "loss": 0.2642,
627
  "step": 970
628
  },
629
  {
630
  "epoch": 4.37,
631
+ "learning_rate": 3.125e-05,
632
+ "loss": 0.2564,
633
  "step": 980
634
  },
635
  {
636
  "epoch": 4.41,
637
+ "learning_rate": 3.100198412698413e-05,
638
+ "loss": 0.2667,
639
  "step": 990
640
  },
641
  {
642
  "epoch": 4.45,
643
+ "learning_rate": 3.075396825396826e-05,
644
+ "loss": 0.2687,
645
  "step": 1000
646
  },
647
  {
648
  "epoch": 4.5,
649
+ "learning_rate": 3.0505952380952385e-05,
650
+ "loss": 0.2744,
651
  "step": 1010
652
  },
653
  {
654
  "epoch": 4.54,
655
+ "learning_rate": 3.0257936507936506e-05,
656
+ "loss": 0.2594,
657
  "step": 1020
658
  },
659
  {
660
  "epoch": 4.59,
661
+ "learning_rate": 3.0009920634920634e-05,
662
+ "loss": 0.3404,
663
  "step": 1030
664
  },
665
  {
666
  "epoch": 4.63,
667
+ "learning_rate": 2.9761904761904762e-05,
668
+ "loss": 0.3398,
669
  "step": 1040
670
  },
671
  {
672
  "epoch": 4.68,
673
+ "learning_rate": 2.951388888888889e-05,
674
+ "loss": 0.2875,
675
  "step": 1050
676
  },
677
  {
678
  "epoch": 4.72,
679
+ "learning_rate": 2.9265873015873018e-05,
680
+ "loss": 0.3213,
681
  "step": 1060
682
  },
683
  {
684
  "epoch": 4.77,
685
+ "learning_rate": 2.9017857142857146e-05,
686
+ "loss": 0.3015,
687
  "step": 1070
688
  },
689
  {
690
  "epoch": 4.81,
691
+ "learning_rate": 2.876984126984127e-05,
692
+ "loss": 0.2809,
693
  "step": 1080
694
  },
695
  {
696
  "epoch": 4.86,
697
+ "learning_rate": 2.8521825396825395e-05,
698
+ "loss": 0.3107,
699
  "step": 1090
700
  },
701
  {
702
  "epoch": 4.9,
703
+ "learning_rate": 2.8273809523809523e-05,
704
+ "loss": 0.3181,
705
  "step": 1100
706
  },
707
  {
708
  "epoch": 4.94,
709
+ "learning_rate": 2.802579365079365e-05,
710
+ "loss": 0.2677,
711
  "step": 1110
712
  },
713
  {
714
  "epoch": 4.99,
715
+ "learning_rate": 2.777777777777778e-05,
716
+ "loss": 0.3055,
717
  "step": 1120
718
  },
719
  {
720
+ "epoch": 5.0,
721
+ "eval_accuracy": 0.7057676232933965,
722
+ "eval_loss": 0.9803113341331482,
723
+ "eval_runtime": 60.4436,
724
+ "eval_samples_per_second": 118.755,
725
+ "eval_steps_per_second": 3.722,
726
+ "step": 1122
727
  },
728
  {
729
+ "epoch": 5.03,
730
+ "learning_rate": 2.7529761904761907e-05,
731
+ "loss": 0.2474,
732
+ "step": 1130
733
+ },
734
+ {
735
+ "epoch": 5.08,
736
+ "learning_rate": 2.7281746031746032e-05,
737
+ "loss": 0.2207,
738
+ "step": 1140
739
+ },
740
+ {
741
+ "epoch": 5.12,
742
+ "learning_rate": 2.703373015873016e-05,
743
+ "loss": 0.2116,
744
+ "step": 1150
745
+ },
746
+ {
747
+ "epoch": 5.17,
748
+ "learning_rate": 2.6785714285714288e-05,
749
+ "loss": 0.1894,
750
+ "step": 1160
751
+ },
752
+ {
753
+ "epoch": 5.21,
754
+ "learning_rate": 2.6537698412698416e-05,
755
+ "loss": 0.2606,
756
+ "step": 1170
757
+ },
758
+ {
759
+ "epoch": 5.26,
760
+ "learning_rate": 2.628968253968254e-05,
761
+ "loss": 0.1872,
762
+ "step": 1180
763
+ },
764
+ {
765
+ "epoch": 5.3,
766
+ "learning_rate": 2.604166666666667e-05,
767
+ "loss": 0.215,
768
+ "step": 1190
769
+ },
770
+ {
771
+ "epoch": 5.35,
772
+ "learning_rate": 2.5793650793650796e-05,
773
+ "loss": 0.1991,
774
+ "step": 1200
775
+ },
776
+ {
777
+ "epoch": 5.39,
778
+ "learning_rate": 2.554563492063492e-05,
779
+ "loss": 0.2105,
780
+ "step": 1210
781
+ },
782
+ {
783
+ "epoch": 5.43,
784
+ "learning_rate": 2.529761904761905e-05,
785
+ "loss": 0.2384,
786
+ "step": 1220
787
+ },
788
+ {
789
+ "epoch": 5.48,
790
+ "learning_rate": 2.5049603174603177e-05,
791
+ "loss": 0.222,
792
+ "step": 1230
793
+ },
794
+ {
795
+ "epoch": 5.52,
796
+ "learning_rate": 2.4801587301587305e-05,
797
+ "loss": 0.2175,
798
+ "step": 1240
799
+ },
800
+ {
801
+ "epoch": 5.57,
802
+ "learning_rate": 2.455357142857143e-05,
803
+ "loss": 0.2362,
804
+ "step": 1250
805
+ },
806
+ {
807
+ "epoch": 5.61,
808
+ "learning_rate": 2.4305555555555558e-05,
809
+ "loss": 0.2768,
810
+ "step": 1260
811
+ },
812
+ {
813
+ "epoch": 5.66,
814
+ "learning_rate": 2.4057539682539686e-05,
815
+ "loss": 0.2614,
816
+ "step": 1270
817
+ },
818
+ {
819
+ "epoch": 5.7,
820
+ "learning_rate": 2.380952380952381e-05,
821
+ "loss": 0.2127,
822
+ "step": 1280
823
+ },
824
+ {
825
+ "epoch": 5.75,
826
+ "learning_rate": 2.3561507936507938e-05,
827
+ "loss": 0.2232,
828
+ "step": 1290
829
+ },
830
+ {
831
+ "epoch": 5.79,
832
+ "learning_rate": 2.3313492063492066e-05,
833
+ "loss": 0.2089,
834
+ "step": 1300
835
+ },
836
+ {
837
+ "epoch": 5.84,
838
+ "learning_rate": 2.3065476190476194e-05,
839
+ "loss": 0.2321,
840
+ "step": 1310
841
+ },
842
+ {
843
+ "epoch": 5.88,
844
+ "learning_rate": 2.281746031746032e-05,
845
+ "loss": 0.242,
846
+ "step": 1320
847
+ },
848
+ {
849
+ "epoch": 5.92,
850
+ "learning_rate": 2.2569444444444447e-05,
851
+ "loss": 0.2388,
852
+ "step": 1330
853
+ },
854
+ {
855
+ "epoch": 5.97,
856
+ "learning_rate": 2.2321428571428575e-05,
857
+ "loss": 0.2214,
858
+ "step": 1340
859
+ },
860
+ {
861
+ "epoch": 6.0,
862
+ "eval_accuracy": 0.6953190303705767,
863
+ "eval_loss": 1.0336716175079346,
864
+ "eval_runtime": 60.3923,
865
+ "eval_samples_per_second": 118.856,
866
+ "eval_steps_per_second": 3.726,
867
+ "step": 1347
868
+ },
869
+ {
870
+ "epoch": 6.01,
871
+ "learning_rate": 2.20734126984127e-05,
872
+ "loss": 0.2192,
873
+ "step": 1350
874
+ },
875
+ {
876
+ "epoch": 6.06,
877
+ "learning_rate": 2.1825396825396827e-05,
878
+ "loss": 0.1988,
879
+ "step": 1360
880
+ },
881
+ {
882
+ "epoch": 6.1,
883
+ "learning_rate": 2.1577380952380955e-05,
884
+ "loss": 0.1624,
885
+ "step": 1370
886
+ },
887
+ {
888
+ "epoch": 6.15,
889
+ "learning_rate": 2.132936507936508e-05,
890
+ "loss": 0.1565,
891
+ "step": 1380
892
+ },
893
+ {
894
+ "epoch": 6.19,
895
+ "learning_rate": 2.1081349206349208e-05,
896
+ "loss": 0.1683,
897
+ "step": 1390
898
+ },
899
+ {
900
+ "epoch": 6.24,
901
+ "learning_rate": 2.0833333333333336e-05,
902
+ "loss": 0.181,
903
+ "step": 1400
904
+ },
905
+ {
906
+ "epoch": 6.28,
907
+ "learning_rate": 2.058531746031746e-05,
908
+ "loss": 0.2063,
909
+ "step": 1410
910
+ },
911
+ {
912
+ "epoch": 6.33,
913
+ "learning_rate": 2.033730158730159e-05,
914
+ "loss": 0.1847,
915
+ "step": 1420
916
+ },
917
+ {
918
+ "epoch": 6.37,
919
+ "learning_rate": 2.0089285714285717e-05,
920
+ "loss": 0.1802,
921
+ "step": 1430
922
+ },
923
+ {
924
+ "epoch": 6.41,
925
+ "learning_rate": 1.984126984126984e-05,
926
+ "loss": 0.1626,
927
+ "step": 1440
928
+ },
929
+ {
930
+ "epoch": 6.46,
931
+ "learning_rate": 1.959325396825397e-05,
932
+ "loss": 0.1446,
933
+ "step": 1450
934
+ },
935
+ {
936
+ "epoch": 6.5,
937
+ "learning_rate": 1.9345238095238097e-05,
938
+ "loss": 0.1693,
939
+ "step": 1460
940
+ },
941
+ {
942
+ "epoch": 6.55,
943
+ "learning_rate": 1.9097222222222222e-05,
944
+ "loss": 0.1996,
945
+ "step": 1470
946
+ },
947
+ {
948
+ "epoch": 6.59,
949
+ "learning_rate": 1.884920634920635e-05,
950
+ "loss": 0.1653,
951
+ "step": 1480
952
+ },
953
+ {
954
+ "epoch": 6.64,
955
+ "learning_rate": 1.8601190476190478e-05,
956
+ "loss": 0.1622,
957
+ "step": 1490
958
+ },
959
+ {
960
+ "epoch": 6.68,
961
+ "learning_rate": 1.8353174603174602e-05,
962
+ "loss": 0.1654,
963
+ "step": 1500
964
+ },
965
+ {
966
+ "epoch": 6.73,
967
+ "learning_rate": 1.810515873015873e-05,
968
+ "loss": 0.1843,
969
+ "step": 1510
970
+ },
971
+ {
972
+ "epoch": 6.77,
973
+ "learning_rate": 1.785714285714286e-05,
974
+ "loss": 0.1779,
975
+ "step": 1520
976
+ },
977
+ {
978
+ "epoch": 6.82,
979
+ "learning_rate": 1.7609126984126986e-05,
980
+ "loss": 0.1801,
981
+ "step": 1530
982
+ },
983
+ {
984
+ "epoch": 6.86,
985
+ "learning_rate": 1.736111111111111e-05,
986
+ "loss": 0.1829,
987
+ "step": 1540
988
+ },
989
+ {
990
+ "epoch": 6.9,
991
+ "learning_rate": 1.711309523809524e-05,
992
+ "loss": 0.1789,
993
+ "step": 1550
994
+ },
995
+ {
996
+ "epoch": 6.95,
997
+ "learning_rate": 1.6865079365079367e-05,
998
+ "loss": 0.1894,
999
+ "step": 1560
1000
+ },
1001
+ {
1002
+ "epoch": 6.99,
1003
+ "learning_rate": 1.6617063492063492e-05,
1004
+ "loss": 0.1575,
1005
+ "step": 1570
1006
+ },
1007
+ {
1008
+ "epoch": 7.0,
1009
+ "eval_accuracy": 0.6976873780997492,
1010
+ "eval_loss": 1.0642220973968506,
1011
+ "eval_runtime": 59.9724,
1012
+ "eval_samples_per_second": 119.688,
1013
+ "eval_steps_per_second": 3.752,
1014
+ "step": 1571
1015
+ },
1016
+ {
1017
+ "epoch": 7.04,
1018
+ "learning_rate": 1.636904761904762e-05,
1019
+ "loss": 0.1495,
1020
+ "step": 1580
1021
+ },
1022
+ {
1023
+ "epoch": 7.08,
1024
+ "learning_rate": 1.6121031746031748e-05,
1025
+ "loss": 0.1392,
1026
+ "step": 1590
1027
+ },
1028
+ {
1029
+ "epoch": 7.13,
1030
+ "learning_rate": 1.5873015873015872e-05,
1031
+ "loss": 0.1095,
1032
+ "step": 1600
1033
+ },
1034
+ {
1035
+ "epoch": 7.17,
1036
+ "learning_rate": 1.5625e-05,
1037
+ "loss": 0.1353,
1038
+ "step": 1610
1039
+ },
1040
+ {
1041
+ "epoch": 7.22,
1042
+ "learning_rate": 1.537698412698413e-05,
1043
+ "loss": 0.1332,
1044
+ "step": 1620
1045
+ },
1046
+ {
1047
+ "epoch": 7.26,
1048
+ "learning_rate": 1.5128968253968253e-05,
1049
+ "loss": 0.1227,
1050
+ "step": 1630
1051
+ },
1052
+ {
1053
+ "epoch": 7.31,
1054
+ "learning_rate": 1.4880952380952381e-05,
1055
+ "loss": 0.1459,
1056
+ "step": 1640
1057
+ },
1058
+ {
1059
+ "epoch": 7.35,
1060
+ "learning_rate": 1.4632936507936509e-05,
1061
+ "loss": 0.1221,
1062
+ "step": 1650
1063
+ },
1064
+ {
1065
+ "epoch": 7.39,
1066
+ "learning_rate": 1.4384920634920635e-05,
1067
+ "loss": 0.1167,
1068
+ "step": 1660
1069
+ },
1070
+ {
1071
+ "epoch": 7.44,
1072
+ "learning_rate": 1.4136904761904762e-05,
1073
+ "loss": 0.1597,
1074
+ "step": 1670
1075
+ },
1076
+ {
1077
+ "epoch": 7.48,
1078
+ "learning_rate": 1.388888888888889e-05,
1079
+ "loss": 0.1395,
1080
+ "step": 1680
1081
+ },
1082
+ {
1083
+ "epoch": 7.53,
1084
+ "learning_rate": 1.3640873015873016e-05,
1085
+ "loss": 0.1197,
1086
+ "step": 1690
1087
+ },
1088
+ {
1089
+ "epoch": 7.57,
1090
+ "learning_rate": 1.3392857142857144e-05,
1091
+ "loss": 0.1435,
1092
+ "step": 1700
1093
+ },
1094
+ {
1095
+ "epoch": 7.62,
1096
+ "learning_rate": 1.314484126984127e-05,
1097
+ "loss": 0.1084,
1098
+ "step": 1710
1099
+ },
1100
+ {
1101
+ "epoch": 7.66,
1102
+ "learning_rate": 1.2896825396825398e-05,
1103
+ "loss": 0.1581,
1104
+ "step": 1720
1105
+ },
1106
+ {
1107
+ "epoch": 7.71,
1108
+ "learning_rate": 1.2648809523809524e-05,
1109
+ "loss": 0.1389,
1110
+ "step": 1730
1111
+ },
1112
+ {
1113
+ "epoch": 7.75,
1114
+ "learning_rate": 1.2400793650793652e-05,
1115
+ "loss": 0.1297,
1116
+ "step": 1740
1117
+ },
1118
+ {
1119
+ "epoch": 7.8,
1120
+ "learning_rate": 1.2152777777777779e-05,
1121
+ "loss": 0.1641,
1122
+ "step": 1750
1123
+ },
1124
+ {
1125
+ "epoch": 7.84,
1126
+ "learning_rate": 1.1904761904761905e-05,
1127
+ "loss": 0.127,
1128
+ "step": 1760
1129
+ },
1130
+ {
1131
+ "epoch": 7.88,
1132
+ "learning_rate": 1.1656746031746033e-05,
1133
+ "loss": 0.1252,
1134
+ "step": 1770
1135
+ },
1136
+ {
1137
+ "epoch": 7.93,
1138
+ "learning_rate": 1.140873015873016e-05,
1139
+ "loss": 0.1661,
1140
+ "step": 1780
1141
+ },
1142
+ {
1143
+ "epoch": 7.97,
1144
+ "learning_rate": 1.1160714285714287e-05,
1145
+ "loss": 0.1169,
1146
+ "step": 1790
1147
+ },
1148
+ {
1149
+ "epoch": 8.0,
1150
+ "eval_accuracy": 0.7029813318473113,
1151
+ "eval_loss": 1.0828742980957031,
1152
+ "eval_runtime": 59.6867,
1153
+ "eval_samples_per_second": 120.261,
1154
+ "eval_steps_per_second": 3.77,
1155
+ "step": 1796
1156
+ },
1157
+ {
1158
+ "epoch": 8.02,
1159
+ "learning_rate": 1.0912698412698414e-05,
1160
+ "loss": 0.1218,
1161
+ "step": 1800
1162
+ },
1163
+ {
1164
+ "epoch": 8.06,
1165
+ "learning_rate": 1.066468253968254e-05,
1166
+ "loss": 0.1021,
1167
+ "step": 1810
1168
+ },
1169
+ {
1170
+ "epoch": 8.11,
1171
+ "learning_rate": 1.0416666666666668e-05,
1172
+ "loss": 0.1241,
1173
+ "step": 1820
1174
+ },
1175
+ {
1176
+ "epoch": 8.15,
1177
+ "learning_rate": 1.0168650793650794e-05,
1178
+ "loss": 0.1004,
1179
+ "step": 1830
1180
+ },
1181
+ {
1182
+ "epoch": 8.2,
1183
+ "learning_rate": 9.92063492063492e-06,
1184
+ "loss": 0.1279,
1185
+ "step": 1840
1186
+ },
1187
+ {
1188
+ "epoch": 8.24,
1189
+ "learning_rate": 9.672619047619049e-06,
1190
+ "loss": 0.1132,
1191
+ "step": 1850
1192
+ },
1193
+ {
1194
+ "epoch": 8.29,
1195
+ "learning_rate": 9.424603174603175e-06,
1196
+ "loss": 0.1137,
1197
+ "step": 1860
1198
+ },
1199
+ {
1200
+ "epoch": 8.33,
1201
+ "learning_rate": 9.176587301587301e-06,
1202
+ "loss": 0.0887,
1203
+ "step": 1870
1204
+ },
1205
+ {
1206
+ "epoch": 8.37,
1207
+ "learning_rate": 8.92857142857143e-06,
1208
+ "loss": 0.1115,
1209
+ "step": 1880
1210
+ },
1211
+ {
1212
+ "epoch": 8.42,
1213
+ "learning_rate": 8.680555555555556e-06,
1214
+ "loss": 0.0955,
1215
+ "step": 1890
1216
+ },
1217
+ {
1218
+ "epoch": 8.46,
1219
+ "learning_rate": 8.432539682539684e-06,
1220
+ "loss": 0.1268,
1221
+ "step": 1900
1222
+ },
1223
+ {
1224
+ "epoch": 8.51,
1225
+ "learning_rate": 8.18452380952381e-06,
1226
+ "loss": 0.1007,
1227
+ "step": 1910
1228
+ },
1229
+ {
1230
+ "epoch": 8.55,
1231
+ "learning_rate": 7.936507936507936e-06,
1232
+ "loss": 0.1025,
1233
+ "step": 1920
1234
+ },
1235
+ {
1236
+ "epoch": 8.6,
1237
+ "learning_rate": 7.688492063492064e-06,
1238
+ "loss": 0.1082,
1239
+ "step": 1930
1240
+ },
1241
+ {
1242
+ "epoch": 8.64,
1243
+ "learning_rate": 7.4404761904761905e-06,
1244
+ "loss": 0.0988,
1245
+ "step": 1940
1246
+ },
1247
+ {
1248
+ "epoch": 8.69,
1249
+ "learning_rate": 7.192460317460318e-06,
1250
+ "loss": 0.1012,
1251
+ "step": 1950
1252
+ },
1253
+ {
1254
+ "epoch": 8.73,
1255
+ "learning_rate": 6.944444444444445e-06,
1256
+ "loss": 0.1512,
1257
+ "step": 1960
1258
+ },
1259
+ {
1260
+ "epoch": 8.78,
1261
+ "learning_rate": 6.696428571428572e-06,
1262
+ "loss": 0.0984,
1263
+ "step": 1970
1264
+ },
1265
+ {
1266
+ "epoch": 8.82,
1267
+ "learning_rate": 6.448412698412699e-06,
1268
+ "loss": 0.1254,
1269
+ "step": 1980
1270
+ },
1271
+ {
1272
+ "epoch": 8.86,
1273
+ "learning_rate": 6.200396825396826e-06,
1274
+ "loss": 0.0763,
1275
+ "step": 1990
1276
+ },
1277
+ {
1278
+ "epoch": 8.91,
1279
+ "learning_rate": 5.9523809523809525e-06,
1280
+ "loss": 0.0746,
1281
+ "step": 2000
1282
+ },
1283
+ {
1284
+ "epoch": 8.95,
1285
+ "learning_rate": 5.70436507936508e-06,
1286
+ "loss": 0.0933,
1287
+ "step": 2010
1288
+ },
1289
+ {
1290
+ "epoch": 9.0,
1291
+ "learning_rate": 5.456349206349207e-06,
1292
+ "loss": 0.0917,
1293
+ "step": 2020
1294
+ },
1295
+ {
1296
+ "epoch": 9.0,
1297
+ "eval_accuracy": 0.7047924212872666,
1298
+ "eval_loss": 1.1121457815170288,
1299
+ "eval_runtime": 59.8317,
1300
+ "eval_samples_per_second": 119.97,
1301
+ "eval_steps_per_second": 3.761,
1302
+ "step": 2020
1303
+ },
1304
+ {
1305
+ "epoch": 9.04,
1306
+ "learning_rate": 5.208333333333334e-06,
1307
+ "loss": 0.078,
1308
+ "step": 2030
1309
+ },
1310
+ {
1311
+ "epoch": 9.09,
1312
+ "learning_rate": 4.96031746031746e-06,
1313
+ "loss": 0.0989,
1314
+ "step": 2040
1315
+ },
1316
+ {
1317
+ "epoch": 9.13,
1318
+ "learning_rate": 4.7123015873015875e-06,
1319
+ "loss": 0.0725,
1320
+ "step": 2050
1321
+ },
1322
+ {
1323
+ "epoch": 9.18,
1324
+ "learning_rate": 4.464285714285715e-06,
1325
+ "loss": 0.0887,
1326
+ "step": 2060
1327
+ },
1328
+ {
1329
+ "epoch": 9.22,
1330
+ "learning_rate": 4.216269841269842e-06,
1331
+ "loss": 0.0723,
1332
+ "step": 2070
1333
+ },
1334
+ {
1335
+ "epoch": 9.27,
1336
+ "learning_rate": 3.968253968253968e-06,
1337
+ "loss": 0.0656,
1338
+ "step": 2080
1339
+ },
1340
+ {
1341
+ "epoch": 9.31,
1342
+ "learning_rate": 3.7202380952380952e-06,
1343
+ "loss": 0.078,
1344
+ "step": 2090
1345
+ },
1346
+ {
1347
+ "epoch": 9.35,
1348
+ "learning_rate": 3.4722222222222224e-06,
1349
+ "loss": 0.1117,
1350
+ "step": 2100
1351
+ },
1352
+ {
1353
+ "epoch": 9.4,
1354
+ "learning_rate": 3.2242063492063495e-06,
1355
+ "loss": 0.0865,
1356
+ "step": 2110
1357
+ },
1358
+ {
1359
+ "epoch": 9.44,
1360
+ "learning_rate": 2.9761904761904763e-06,
1361
+ "loss": 0.1098,
1362
+ "step": 2120
1363
+ },
1364
+ {
1365
+ "epoch": 9.49,
1366
+ "learning_rate": 2.7281746031746034e-06,
1367
+ "loss": 0.0955,
1368
+ "step": 2130
1369
+ },
1370
+ {
1371
+ "epoch": 9.53,
1372
+ "learning_rate": 2.48015873015873e-06,
1373
+ "loss": 0.0943,
1374
+ "step": 2140
1375
+ },
1376
+ {
1377
+ "epoch": 9.58,
1378
+ "learning_rate": 2.2321428571428573e-06,
1379
+ "loss": 0.0811,
1380
+ "step": 2150
1381
+ },
1382
+ {
1383
+ "epoch": 9.62,
1384
+ "learning_rate": 1.984126984126984e-06,
1385
+ "loss": 0.0648,
1386
+ "step": 2160
1387
+ },
1388
+ {
1389
+ "epoch": 9.67,
1390
+ "learning_rate": 1.7361111111111112e-06,
1391
+ "loss": 0.1009,
1392
+ "step": 2170
1393
+ },
1394
+ {
1395
+ "epoch": 9.71,
1396
+ "learning_rate": 1.4880952380952381e-06,
1397
+ "loss": 0.082,
1398
+ "step": 2180
1399
+ },
1400
+ {
1401
+ "epoch": 9.76,
1402
+ "learning_rate": 1.240079365079365e-06,
1403
+ "loss": 0.0714,
1404
+ "step": 2190
1405
+ },
1406
+ {
1407
+ "epoch": 9.8,
1408
+ "learning_rate": 9.92063492063492e-07,
1409
+ "loss": 0.0613,
1410
+ "step": 2200
1411
+ },
1412
+ {
1413
+ "epoch": 9.84,
1414
+ "learning_rate": 7.440476190476191e-07,
1415
+ "loss": 0.106,
1416
+ "step": 2210
1417
+ },
1418
+ {
1419
+ "epoch": 9.89,
1420
+ "learning_rate": 4.96031746031746e-07,
1421
+ "loss": 0.0915,
1422
+ "step": 2220
1423
+ },
1424
+ {
1425
+ "epoch": 9.93,
1426
+ "learning_rate": 2.48015873015873e-07,
1427
+ "loss": 0.0756,
1428
+ "step": 2230
1429
+ },
1430
+ {
1431
+ "epoch": 9.98,
1432
+ "learning_rate": 0.0,
1433
+ "loss": 0.0785,
1434
+ "step": 2240
1435
+ },
1436
+ {
1437
+ "epoch": 9.98,
1438
+ "eval_accuracy": 0.7052103650041794,
1439
+ "eval_loss": 1.1279975175857544,
1440
+ "eval_runtime": 66.0405,
1441
+ "eval_samples_per_second": 108.691,
1442
+ "eval_steps_per_second": 3.407,
1443
+ "step": 2240
1444
+ },
1445
+ {
1446
+ "epoch": 9.98,
1447
+ "step": 2240,
1448
+ "total_flos": 2.2200667552042852e+19,
1449
+ "train_loss": 0.27600424638284105,
1450
+ "train_runtime": 6453.2956,
1451
+ "train_samples_per_second": 44.487,
1452
+ "train_steps_per_second": 0.347
1453
  }
1454
  ],
1455
  "logging_steps": 10,
1456
+ "max_steps": 2240,
1457
  "num_input_tokens_seen": 0,
1458
+ "num_train_epochs": 10,
1459
  "save_steps": 500,
1460
+ "total_flos": 2.2200667552042852e+19,
1461
  "train_batch_size": 32,
1462
  "trial_name": null,
1463
  "trial_params": null