sheepy928 commited on
Commit
dbc1227
1 Parent(s): 7d1d31c

Training in progress, step 300, checkpoint

Browse files
checkpoint-300/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d27840db45fa9f67419833b94359281e32f6bdaf5c4aefe20c0976ecee23be1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:690f09f9159427695d39ed774d84663044c4bf791a59678cb51fefc40a846afa
3
  size 14244
checkpoint-300/trainer_state.json CHANGED
@@ -23,10 +23,14 @@
23
  {
24
  "epoch": 0.06,
25
  "eval_accuracy": 0.44479243019924036,
 
 
26
  "eval_loss": 1.065091609954834,
27
- "eval_runtime": 50.348,
28
- "eval_samples_per_second": 596.131,
29
- "eval_steps_per_second": 1.172,
 
 
30
  "step": 20
31
  },
32
  {
@@ -44,10 +48,14 @@
44
  {
45
  "epoch": 0.12,
46
  "eval_accuracy": 0.5033650962883988,
 
 
47
  "eval_loss": 1.0188277959823608,
48
- "eval_runtime": 49.614,
49
- "eval_samples_per_second": 604.95,
50
- "eval_steps_per_second": 1.189,
 
 
51
  "step": 40
52
  },
53
  {
@@ -65,10 +73,14 @@
65
  {
66
  "epoch": 0.18,
67
  "eval_accuracy": 0.5279203038581995,
 
 
68
  "eval_loss": 0.9871189594268799,
69
- "eval_runtime": 49.5201,
70
- "eval_samples_per_second": 606.098,
71
- "eval_steps_per_second": 1.191,
 
 
72
  "step": 60
73
  },
74
  {
@@ -86,10 +98,14 @@
86
  {
87
  "epoch": 0.24,
88
  "eval_accuracy": 0.5308189511561271,
 
 
89
  "eval_loss": 0.9888613224029541,
90
- "eval_runtime": 49.4469,
91
- "eval_samples_per_second": 606.994,
92
- "eval_steps_per_second": 1.193,
 
 
93
  "step": 80
94
  },
95
  {
@@ -107,10 +123,14 @@
107
  {
108
  "epoch": 0.3,
109
  "eval_accuracy": 0.5307856333710935,
 
 
110
  "eval_loss": 0.9762536883354187,
111
- "eval_runtime": 49.4804,
112
- "eval_samples_per_second": 606.583,
113
- "eval_steps_per_second": 1.192,
 
 
114
  "step": 100
115
  },
116
  {
@@ -128,10 +148,14 @@
128
  {
129
  "epoch": 0.36,
130
  "eval_accuracy": 0.5387819017791697,
 
 
131
  "eval_loss": 0.9713281989097595,
132
- "eval_runtime": 49.4983,
133
- "eval_samples_per_second": 606.364,
134
- "eval_steps_per_second": 1.192,
 
 
135
  "step": 120
136
  },
137
  {
@@ -149,10 +173,14 @@
149
  {
150
  "epoch": 0.42,
151
  "eval_accuracy": 0.5312520823615646,
 
 
152
  "eval_loss": 0.9766249656677246,
153
- "eval_runtime": 49.5213,
154
- "eval_samples_per_second": 606.083,
155
- "eval_steps_per_second": 1.191,
 
 
156
  "step": 140
157
  },
158
  {
@@ -170,10 +198,14 @@
170
  {
171
  "epoch": 0.48,
172
  "eval_accuracy": 0.5398147531152129,
 
 
173
  "eval_loss": 0.9589501619338989,
174
- "eval_runtime": 49.4977,
175
- "eval_samples_per_second": 606.372,
176
- "eval_steps_per_second": 1.192,
 
 
177
  "step": 160
178
  },
179
  {
@@ -191,10 +223,14 @@
191
  {
192
  "epoch": 0.54,
193
  "eval_accuracy": 0.5423469047777704,
 
 
194
  "eval_loss": 0.953514814376831,
195
- "eval_runtime": 49.4511,
196
- "eval_samples_per_second": 606.943,
197
- "eval_steps_per_second": 1.193,
 
 
198
  "step": 180
199
  },
200
  {
@@ -212,10 +248,14 @@
212
  {
213
  "epoch": 0.6,
214
  "eval_accuracy": 0.567201972412874,
 
 
215
  "eval_loss": 0.9273685812950134,
216
- "eval_runtime": 49.4122,
217
- "eval_samples_per_second": 607.421,
218
- "eval_steps_per_second": 1.194,
 
 
219
  "step": 200
220
  },
221
  {
@@ -233,10 +273,14 @@
233
  {
234
  "epoch": 0.66,
235
  "eval_accuracy": 0.573598987139335,
 
 
236
  "eval_loss": 0.912590503692627,
237
- "eval_runtime": 49.2827,
238
- "eval_samples_per_second": 609.017,
239
- "eval_steps_per_second": 1.197,
 
 
240
  "step": 220
241
  },
242
  {
@@ -254,10 +298,14 @@
254
  {
255
  "epoch": 0.72,
256
  "eval_accuracy": 0.5759645498767242,
 
 
257
  "eval_loss": 0.9052607417106628,
258
- "eval_runtime": 49.4457,
259
- "eval_samples_per_second": 607.01,
260
- "eval_steps_per_second": 1.193,
 
 
261
  "step": 240
262
  },
263
  {
@@ -275,10 +323,14 @@
275
  {
276
  "epoch": 0.78,
277
  "eval_accuracy": 0.5766975411474645,
 
 
278
  "eval_loss": 0.9178985953330994,
279
- "eval_runtime": 49.3132,
280
- "eval_samples_per_second": 608.641,
281
- "eval_steps_per_second": 1.196,
 
 
282
  "step": 260
283
  },
284
  {
@@ -296,10 +348,14 @@
296
  {
297
  "epoch": 0.84,
298
  "eval_accuracy": 0.58915839275005,
 
 
299
  "eval_loss": 0.8937407732009888,
300
- "eval_runtime": 49.4342,
301
- "eval_samples_per_second": 607.15,
302
- "eval_steps_per_second": 1.194,
 
 
303
  "step": 280
304
  },
305
  {
@@ -317,18 +373,22 @@
317
  {
318
  "epoch": 0.9,
319
  "eval_accuracy": 0.613013926834144,
 
 
320
  "eval_loss": 0.8468813300132751,
321
- "eval_runtime": 49.5046,
322
- "eval_samples_per_second": 606.287,
323
- "eval_steps_per_second": 1.192,
 
 
324
  "step": 300
325
  }
326
  ],
327
  "logging_steps": 10,
328
- "max_steps": 996,
329
- "num_train_epochs": 3,
330
  "save_steps": 100,
331
- "total_flos": 4.04142195474432e+16,
332
  "trial_name": null,
333
  "trial_params": null
334
  }
 
23
  {
24
  "epoch": 0.06,
25
  "eval_accuracy": 0.44479243019924036,
26
+ "eval_combined_score": 0.3403229798934559,
27
+ "eval_f1": 0.2738667532127967,
28
  "eval_loss": 1.065091609954834,
29
+ "eval_precision": 0.19784030596254612,
30
+ "eval_recall": 0.44479243019924036,
31
+ "eval_runtime": 49.6621,
32
+ "eval_samples_per_second": 604.364,
33
+ "eval_steps_per_second": 1.188,
34
  "step": 20
35
  },
36
  {
 
48
  {
49
  "epoch": 0.12,
50
  "eval_accuracy": 0.5033650962883988,
51
+ "eval_combined_score": 0.4633375988688383,
52
+ "eval_f1": 0.4137746858205889,
53
  "eval_loss": 1.0188277959823608,
54
+ "eval_precision": 0.43284551707796665,
55
+ "eval_recall": 0.5033650962883988,
56
+ "eval_runtime": 49.1985,
57
+ "eval_samples_per_second": 610.06,
58
+ "eval_steps_per_second": 1.199,
59
  "step": 40
60
  },
61
  {
 
73
  {
74
  "epoch": 0.18,
75
  "eval_accuracy": 0.5279203038581995,
76
+ "eval_combined_score": 0.48433586826626607,
77
+ "eval_f1": 0.4596383010419703,
78
  "eval_loss": 0.9871189594268799,
79
+ "eval_precision": 0.421864564306695,
80
+ "eval_recall": 0.5279203038581995,
81
+ "eval_runtime": 49.0237,
82
+ "eval_samples_per_second": 612.235,
83
+ "eval_steps_per_second": 1.204,
84
  "step": 60
85
  },
86
  {
 
98
  {
99
  "epoch": 0.24,
100
  "eval_accuracy": 0.5308189511561271,
101
+ "eval_combined_score": 0.4866197693698143,
102
+ "eval_f1": 0.46528335920671143,
103
  "eval_loss": 0.9888613224029541,
104
+ "eval_precision": 0.4195578159602916,
105
+ "eval_recall": 0.5308189511561271,
106
+ "eval_runtime": 49.518,
107
+ "eval_samples_per_second": 606.124,
108
+ "eval_steps_per_second": 1.191,
109
  "step": 80
110
  },
111
  {
 
123
  {
124
  "epoch": 0.3,
125
  "eval_accuracy": 0.5307856333710935,
126
+ "eval_combined_score": 0.48644275806230897,
127
+ "eval_f1": 0.4666072639999521,
128
  "eval_loss": 0.9762536883354187,
129
+ "eval_precision": 0.4175925015070968,
130
+ "eval_recall": 0.5307856333710935,
131
+ "eval_runtime": 49.4629,
132
+ "eval_samples_per_second": 606.798,
133
+ "eval_steps_per_second": 1.193,
134
  "step": 100
135
  },
136
  {
 
148
  {
149
  "epoch": 0.36,
150
  "eval_accuracy": 0.5387819017791697,
151
+ "eval_combined_score": 0.4944622623129249,
152
+ "eval_f1": 0.47105988260831677,
153
  "eval_loss": 0.9713281989097595,
154
+ "eval_precision": 0.4292253630850433,
155
+ "eval_recall": 0.5387819017791697,
156
+ "eval_runtime": 49.0682,
157
+ "eval_samples_per_second": 611.68,
158
+ "eval_steps_per_second": 1.202,
159
  "step": 120
160
  },
161
  {
 
173
  {
174
  "epoch": 0.42,
175
  "eval_accuracy": 0.5312520823615646,
176
+ "eval_combined_score": 0.48712844254868687,
177
+ "eval_f1": 0.4673598197970026,
178
  "eval_loss": 0.9766249656677246,
179
+ "eval_precision": 0.41864978567461564,
180
+ "eval_recall": 0.5312520823615646,
181
+ "eval_runtime": 49.1859,
182
+ "eval_samples_per_second": 610.215,
183
+ "eval_steps_per_second": 1.2,
184
  "step": 140
185
  },
186
  {
 
198
  {
199
  "epoch": 0.48,
200
  "eval_accuracy": 0.5398147531152129,
201
+ "eval_combined_score": 0.49477075635813,
202
+ "eval_f1": 0.4751263005883661,
203
  "eval_loss": 0.9589501619338989,
204
+ "eval_precision": 0.4243272186137281,
205
+ "eval_recall": 0.5398147531152129,
206
+ "eval_runtime": 49.0847,
207
+ "eval_samples_per_second": 611.473,
208
+ "eval_steps_per_second": 1.202,
209
  "step": 160
210
  },
211
  {
 
223
  {
224
  "epoch": 0.54,
225
  "eval_accuracy": 0.5423469047777704,
226
+ "eval_combined_score": 0.49718028497336725,
227
+ "eval_f1": 0.4771728160733735,
228
  "eval_loss": 0.953514814376831,
229
+ "eval_precision": 0.42685451426455484,
230
+ "eval_recall": 0.5423469047777704,
231
+ "eval_runtime": 49.0032,
232
+ "eval_samples_per_second": 612.491,
233
+ "eval_steps_per_second": 1.204,
234
  "step": 180
235
  },
236
  {
 
248
  {
249
  "epoch": 0.6,
250
  "eval_accuracy": 0.567201972412874,
251
+ "eval_combined_score": 0.5200447629299639,
252
+ "eval_f1": 0.49911443945569844,
253
  "eval_loss": 0.9273685812950134,
254
+ "eval_precision": 0.44666066743840943,
255
+ "eval_recall": 0.567201972412874,
256
+ "eval_runtime": 49.1912,
257
+ "eval_samples_per_second": 610.15,
258
+ "eval_steps_per_second": 1.199,
259
  "step": 200
260
  },
261
  {
 
273
  {
274
  "epoch": 0.66,
275
  "eval_accuracy": 0.573598987139335,
276
+ "eval_combined_score": 0.5278568904198743,
277
+ "eval_f1": 0.5026102591641352,
278
  "eval_loss": 0.912590503692627,
279
+ "eval_precision": 0.4616193282366919,
280
+ "eval_recall": 0.573598987139335,
281
+ "eval_runtime": 49.2154,
282
+ "eval_samples_per_second": 609.849,
283
+ "eval_steps_per_second": 1.199,
284
  "step": 220
285
  },
286
  {
 
298
  {
299
  "epoch": 0.72,
300
  "eval_accuracy": 0.5759645498767242,
301
+ "eval_combined_score": 0.5280177627115059,
302
+ "eval_f1": 0.5069205876947673,
303
  "eval_loss": 0.9052607417106628,
304
+ "eval_precision": 0.45322136339780783,
305
+ "eval_recall": 0.5759645498767242,
306
+ "eval_runtime": 49.1399,
307
+ "eval_samples_per_second": 610.787,
308
+ "eval_steps_per_second": 1.201,
309
  "step": 240
310
  },
311
  {
 
323
  {
324
  "epoch": 0.78,
325
  "eval_accuracy": 0.5766975411474645,
326
+ "eval_combined_score": 0.534067292009198,
327
+ "eval_f1": 0.5018221478654629,
328
  "eval_loss": 0.9178985953330994,
329
+ "eval_precision": 0.4810519378764,
330
+ "eval_recall": 0.5766975411474645,
331
+ "eval_runtime": 49.2092,
332
+ "eval_samples_per_second": 609.927,
333
+ "eval_steps_per_second": 1.199,
334
  "step": 260
335
  },
336
  {
 
348
  {
349
  "epoch": 0.84,
350
  "eval_accuracy": 0.58915839275005,
351
+ "eval_combined_score": 0.5407092882328752,
352
+ "eval_f1": 0.5183161389995846,
353
  "eval_loss": 0.8937407732009888,
354
+ "eval_precision": 0.46620422843181647,
355
+ "eval_recall": 0.58915839275005,
356
+ "eval_runtime": 49.2523,
357
+ "eval_samples_per_second": 609.393,
358
+ "eval_steps_per_second": 1.198,
359
  "step": 280
360
  },
361
  {
 
373
  {
374
  "epoch": 0.9,
375
  "eval_accuracy": 0.613013926834144,
376
+ "eval_combined_score": 0.5923759012239074,
377
+ "eval_f1": 0.5579282363395321,
378
  "eval_loss": 0.8468813300132751,
379
+ "eval_precision": 0.5855475148878095,
380
+ "eval_recall": 0.613013926834144,
381
+ "eval_runtime": 49.1896,
382
+ "eval_samples_per_second": 610.17,
383
+ "eval_steps_per_second": 1.199,
384
  "step": 300
385
  }
386
  ],
387
  "logging_steps": 10,
388
+ "max_steps": 1660,
389
+ "num_train_epochs": 5,
390
  "save_steps": 100,
391
+ "total_flos": 4.04142209630208e+16,
392
  "trial_name": null,
393
  "trial_params": null
394
  }
checkpoint-300/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f1be80ebe52f6e43af0b8aa087e72fad77310d5998b6e0b8f66a6a1d53be7b7
3
  size 4536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae98e7de1b05a570517ae68653e3c31b639a52c739d05197601f467f38b01c66
3
  size 4536