GlycerinLOL commited on
Commit
164b184
1 Parent(s): ba686a7

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +222 -22
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 8.0,
3
- "train_loss": 1.7074453571236545,
4
- "train_runtime": 28108.9431,
5
- "train_samples_per_second": 28.461,
6
- "train_steps_per_second": 0.222
7
  }
 
1
  {
2
+ "epoch": 16.0,
3
+ "train_loss": 0.8017544373965294,
4
+ "train_runtime": 29679.9237,
5
+ "train_samples_per_second": 53.908,
6
+ "train_steps_per_second": 0.421
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 8.0,
3
- "train_loss": 1.7074453571236545,
4
- "train_runtime": 28108.9431,
5
- "train_samples_per_second": 28.461,
6
- "train_steps_per_second": 0.222
7
  }
 
1
  {
2
+ "epoch": 16.0,
3
+ "train_loss": 0.8017544373965294,
4
+ "train_runtime": 29679.9237,
5
+ "train_samples_per_second": 53.908,
6
+ "train_steps_per_second": 0.421
7
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.99744,
5
  "eval_steps": 500,
6
- "global_step": 6248,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -195,35 +195,235 @@
195
  {
196
  "epoch": 8.0,
197
  "eval_f1": 0.9123,
198
- "eval_gen_len": 25.38509090909091,
199
- "eval_loss": 1.540870189666748,
200
  "eval_precision": 0.9131,
201
- "eval_recall": 0.9117,
202
- "eval_rouge1": 0.4869,
203
- "eval_rouge2": 0.2373,
204
- "eval_rougeL": 0.406,
205
- "eval_rougeLsum": 0.4058,
206
- "eval_runtime": 863.7281,
207
- "eval_samples_per_second": 6.368,
208
- "eval_steps_per_second": 0.398,
209
  "step": 6248
210
  },
211
  {
212
- "epoch": 8.0,
213
- "step": 6248,
214
- "total_flos": 1.1554159142381814e+18,
215
- "train_loss": 1.7074453571236545,
216
- "train_runtime": 28108.9431,
217
- "train_samples_per_second": 28.461,
218
- "train_steps_per_second": 0.222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  }
220
  ],
221
  "logging_steps": 500,
222
- "max_steps": 6248,
223
  "num_input_tokens_seen": 0,
224
- "num_train_epochs": 8,
225
  "save_steps": 500,
226
- "total_flos": 1.1554159142381814e+18,
227
  "train_batch_size": 32,
228
  "trial_name": null,
229
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 15.99712,
5
  "eval_steps": 500,
6
+ "global_step": 12496,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
195
  {
196
  "epoch": 8.0,
197
  "eval_f1": 0.9123,
198
+ "eval_gen_len": 25.383636363636363,
199
+ "eval_loss": 1.5380274057388306,
200
  "eval_precision": 0.9131,
201
+ "eval_recall": 0.9118,
202
+ "eval_rouge1": 0.4872,
203
+ "eval_rouge2": 0.2387,
204
+ "eval_rougeL": 0.407,
205
+ "eval_rougeLsum": 0.4071,
206
+ "eval_runtime": 863.1004,
207
+ "eval_samples_per_second": 6.372,
208
+ "eval_steps_per_second": 0.399,
209
  "step": 6248
210
  },
211
  {
212
+ "epoch": 8.32,
213
+ "learning_rate": 9.596670934699104e-06,
214
+ "loss": 1.5886,
215
+ "step": 6500
216
+ },
217
+ {
218
+ "epoch": 8.96,
219
+ "learning_rate": 8.796414852752882e-06,
220
+ "loss": 1.5836,
221
+ "step": 7000
222
+ },
223
+ {
224
+ "epoch": 9.0,
225
+ "eval_f1": 0.9126,
226
+ "eval_gen_len": 25.499454545454544,
227
+ "eval_loss": 1.527321696281433,
228
+ "eval_precision": 0.9133,
229
+ "eval_recall": 0.9122,
230
+ "eval_rouge1": 0.4891,
231
+ "eval_rouge2": 0.2399,
232
+ "eval_rougeL": 0.4088,
233
+ "eval_rougeLsum": 0.4089,
234
+ "eval_runtime": 859.8962,
235
+ "eval_samples_per_second": 6.396,
236
+ "eval_steps_per_second": 0.4,
237
+ "step": 7029
238
+ },
239
+ {
240
+ "epoch": 9.6,
241
+ "learning_rate": 7.996158770806658e-06,
242
+ "loss": 1.5667,
243
+ "step": 7500
244
+ },
245
+ {
246
+ "epoch": 10.0,
247
+ "eval_f1": 0.9127,
248
+ "eval_gen_len": 25.386727272727274,
249
+ "eval_loss": 1.5195879936218262,
250
+ "eval_precision": 0.9135,
251
+ "eval_recall": 0.9123,
252
+ "eval_rouge1": 0.4906,
253
+ "eval_rouge2": 0.2416,
254
+ "eval_rougeL": 0.411,
255
+ "eval_rougeLsum": 0.4112,
256
+ "eval_runtime": 855.4017,
257
+ "eval_samples_per_second": 6.43,
258
+ "eval_steps_per_second": 0.402,
259
+ "step": 7810
260
+ },
261
+ {
262
+ "epoch": 10.24,
263
+ "learning_rate": 7.195902688860435e-06,
264
+ "loss": 1.5577,
265
+ "step": 8000
266
+ },
267
+ {
268
+ "epoch": 10.88,
269
+ "learning_rate": 6.395646606914213e-06,
270
+ "loss": 1.5521,
271
+ "step": 8500
272
+ },
273
+ {
274
+ "epoch": 11.0,
275
+ "eval_f1": 0.9127,
276
+ "eval_gen_len": 25.21909090909091,
277
+ "eval_loss": 1.5124093294143677,
278
+ "eval_precision": 0.9137,
279
+ "eval_recall": 0.912,
280
+ "eval_rouge1": 0.4899,
281
+ "eval_rouge2": 0.2406,
282
+ "eval_rougeL": 0.4102,
283
+ "eval_rougeLsum": 0.4103,
284
+ "eval_runtime": 851.6688,
285
+ "eval_samples_per_second": 6.458,
286
+ "eval_steps_per_second": 0.404,
287
+ "step": 8592
288
+ },
289
+ {
290
+ "epoch": 11.52,
291
+ "learning_rate": 5.595390524967991e-06,
292
+ "loss": 1.5413,
293
+ "step": 9000
294
+ },
295
+ {
296
+ "epoch": 12.0,
297
+ "eval_f1": 0.9128,
298
+ "eval_gen_len": 25.349090909090908,
299
+ "eval_loss": 1.5083255767822266,
300
+ "eval_precision": 0.9137,
301
+ "eval_recall": 0.9123,
302
+ "eval_rouge1": 0.4914,
303
+ "eval_rouge2": 0.2416,
304
+ "eval_rougeL": 0.4118,
305
+ "eval_rougeLsum": 0.412,
306
+ "eval_runtime": 856.2804,
307
+ "eval_samples_per_second": 6.423,
308
+ "eval_steps_per_second": 0.402,
309
+ "step": 9373
310
+ },
311
+ {
312
+ "epoch": 12.16,
313
+ "learning_rate": 4.795134443021768e-06,
314
+ "loss": 1.5354,
315
+ "step": 9500
316
+ },
317
+ {
318
+ "epoch": 12.8,
319
+ "learning_rate": 3.9948783610755446e-06,
320
+ "loss": 1.5291,
321
+ "step": 10000
322
+ },
323
+ {
324
+ "epoch": 13.0,
325
+ "eval_f1": 0.913,
326
+ "eval_gen_len": 25.208181818181817,
327
+ "eval_loss": 1.5044068098068237,
328
+ "eval_precision": 0.914,
329
+ "eval_recall": 0.9123,
330
+ "eval_rouge1": 0.4913,
331
+ "eval_rouge2": 0.2419,
332
+ "eval_rougeL": 0.4118,
333
+ "eval_rougeLsum": 0.4119,
334
+ "eval_runtime": 858.747,
335
+ "eval_samples_per_second": 6.405,
336
+ "eval_steps_per_second": 0.401,
337
+ "step": 10154
338
+ },
339
+ {
340
+ "epoch": 13.44,
341
+ "learning_rate": 3.194622279129322e-06,
342
+ "loss": 1.527,
343
+ "step": 10500
344
+ },
345
+ {
346
+ "epoch": 14.0,
347
+ "eval_f1": 0.913,
348
+ "eval_gen_len": 25.10690909090909,
349
+ "eval_loss": 1.5025616884231567,
350
+ "eval_precision": 0.9141,
351
+ "eval_recall": 0.9123,
352
+ "eval_rouge1": 0.4917,
353
+ "eval_rouge2": 0.2426,
354
+ "eval_rougeL": 0.4126,
355
+ "eval_rougeLsum": 0.4128,
356
+ "eval_runtime": 849.8377,
357
+ "eval_samples_per_second": 6.472,
358
+ "eval_steps_per_second": 0.405,
359
+ "step": 10935
360
+ },
361
+ {
362
+ "epoch": 14.08,
363
+ "learning_rate": 2.3943661971830984e-06,
364
+ "loss": 1.5206,
365
+ "step": 11000
366
+ },
367
+ {
368
+ "epoch": 14.72,
369
+ "learning_rate": 1.594110115236876e-06,
370
+ "loss": 1.5203,
371
+ "step": 11500
372
+ },
373
+ {
374
+ "epoch": 15.0,
375
+ "eval_f1": 0.9131,
376
+ "eval_gen_len": 25.106181818181817,
377
+ "eval_loss": 1.5006238222122192,
378
+ "eval_precision": 0.9143,
379
+ "eval_recall": 0.9123,
380
+ "eval_rouge1": 0.4921,
381
+ "eval_rouge2": 0.243,
382
+ "eval_rougeL": 0.4135,
383
+ "eval_rougeLsum": 0.4136,
384
+ "eval_runtime": 854.2433,
385
+ "eval_samples_per_second": 6.438,
386
+ "eval_steps_per_second": 0.403,
387
+ "step": 11717
388
+ },
389
+ {
390
+ "epoch": 15.36,
391
+ "learning_rate": 7.93854033290653e-07,
392
+ "loss": 1.5126,
393
+ "step": 12000
394
+ },
395
+ {
396
+ "epoch": 16.0,
397
+ "eval_f1": 0.9132,
398
+ "eval_gen_len": 25.133454545454544,
399
+ "eval_loss": 1.5003753900527954,
400
+ "eval_precision": 0.9143,
401
+ "eval_recall": 0.9124,
402
+ "eval_rouge1": 0.4923,
403
+ "eval_rouge2": 0.2429,
404
+ "eval_rougeL": 0.4134,
405
+ "eval_rougeLsum": 0.4134,
406
+ "eval_runtime": 851.4198,
407
+ "eval_samples_per_second": 6.46,
408
+ "eval_steps_per_second": 0.404,
409
+ "step": 12496
410
+ },
411
+ {
412
+ "epoch": 16.0,
413
+ "step": 12496,
414
+ "total_flos": 2.310831828476363e+18,
415
+ "train_loss": 0.8017544373965294,
416
+ "train_runtime": 29679.9237,
417
+ "train_samples_per_second": 53.908,
418
+ "train_steps_per_second": 0.421
419
  }
420
  ],
421
  "logging_steps": 500,
422
+ "max_steps": 12496,
423
  "num_input_tokens_seen": 0,
424
+ "num_train_epochs": 16,
425
  "save_steps": 500,
426
+ "total_flos": 2.310831828476363e+18,
427
  "train_batch_size": 32,
428
  "trial_name": null,
429
  "trial_params": null