GlycerinLOL commited on
Commit
19f8eba
1 Parent(s): a7630b1

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +204 -22
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 16.0,
3
- "train_loss": 1.1567621652086957,
4
- "train_runtime": 40538.4288,
5
- "train_samples_per_second": 19.734,
6
- "train_steps_per_second": 0.206
7
  }
 
1
  {
2
+ "epoch": 24.0,
3
+ "train_loss": 0.22391605226564926,
4
+ "train_runtime": 17118.5241,
5
+ "train_samples_per_second": 70.1,
6
+ "train_steps_per_second": 0.73
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 16.0,
3
- "train_loss": 1.1567621652086957,
4
- "train_runtime": 40538.4288,
5
- "train_samples_per_second": 19.734,
6
- "train_steps_per_second": 0.206
7
  }
 
1
  {
2
+ "epoch": 24.0,
3
+ "train_loss": 0.22391605226564926,
4
+ "train_runtime": 17118.5241,
5
+ "train_samples_per_second": 70.1,
6
+ "train_steps_per_second": 0.73
7
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.0,
5
  "eval_steps": 500,
6
- "global_step": 8336,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -346,36 +346,218 @@
346
  },
347
  {
348
  "epoch": 16.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  "eval_f1": 0.9034,
350
- "eval_gen_len": 19.900727272727273,
351
- "eval_loss": 1.5434002876281738,
352
  "eval_precision": 0.9159,
353
  "eval_recall": 0.8916,
354
- "eval_rouge1": 0.4476,
355
- "eval_rouge2": 0.2292,
356
- "eval_rougeL": 0.3868,
357
- "eval_rougeLsum": 0.3865,
358
- "eval_runtime": 313.163,
359
- "eval_samples_per_second": 8.781,
360
- "eval_steps_per_second": 0.549,
361
- "step": 8336
362
  },
363
  {
364
- "epoch": 16.0,
365
- "step": 8336,
366
- "total_flos": 1.7130441774590853e+18,
367
- "train_loss": 1.1567621652086957,
368
- "train_runtime": 40538.4288,
369
- "train_samples_per_second": 19.734,
370
- "train_steps_per_second": 0.206
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  }
372
  ],
373
  "logging_steps": 500,
374
- "max_steps": 8336,
375
  "num_input_tokens_seen": 0,
376
- "num_train_epochs": 16,
377
  "save_steps": 500,
378
- "total_flos": 1.7130441774590853e+18,
379
  "train_batch_size": 24,
380
  "trial_name": null,
381
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 24.0,
5
  "eval_steps": 500,
6
+ "global_step": 12504,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
346
  },
347
  {
348
  "epoch": 16.0,
349
+ "eval_f1": 0.9025,
350
+ "eval_gen_len": 19.942545454545453,
351
+ "eval_loss": 1.5439822673797607,
352
+ "eval_precision": 0.9151,
353
+ "eval_recall": 0.8905,
354
+ "eval_rouge1": 0.4427,
355
+ "eval_rouge2": 0.225,
356
+ "eval_rougeL": 0.382,
357
+ "eval_rougeLsum": 0.382,
358
+ "eval_runtime": 314.8749,
359
+ "eval_samples_per_second": 8.734,
360
+ "eval_steps_per_second": 0.546,
361
+ "step": 8336
362
+ },
363
+ {
364
+ "epoch": 16.31,
365
+ "learning_rate": 6.404350607805503e-06,
366
+ "loss": 0.8806,
367
+ "step": 8500
368
+ },
369
+ {
370
+ "epoch": 17.0,
371
+ "eval_f1": 0.9036,
372
+ "eval_gen_len": 19.88509090909091,
373
+ "eval_loss": 1.5509530305862427,
374
+ "eval_precision": 0.9159,
375
+ "eval_recall": 0.8919,
376
+ "eval_rouge1": 0.4495,
377
+ "eval_rouge2": 0.2279,
378
+ "eval_rougeL": 0.3868,
379
+ "eval_rougeLsum": 0.3869,
380
+ "eval_runtime": 312.7951,
381
+ "eval_samples_per_second": 8.792,
382
+ "eval_steps_per_second": 0.55,
383
+ "step": 8857
384
+ },
385
+ {
386
+ "epoch": 17.27,
387
+ "learning_rate": 5.6046065259117085e-06,
388
+ "loss": 0.8683,
389
+ "step": 9000
390
+ },
391
+ {
392
+ "epoch": 18.0,
393
+ "eval_f1": 0.9038,
394
+ "eval_gen_len": 19.88290909090909,
395
+ "eval_loss": 1.56792151927948,
396
+ "eval_precision": 0.9161,
397
+ "eval_recall": 0.8921,
398
+ "eval_rouge1": 0.4473,
399
+ "eval_rouge2": 0.2282,
400
+ "eval_rougeL": 0.3856,
401
+ "eval_rougeLsum": 0.3857,
402
+ "eval_runtime": 314.8371,
403
+ "eval_samples_per_second": 8.735,
404
+ "eval_steps_per_second": 0.546,
405
+ "step": 9378
406
+ },
407
+ {
408
+ "epoch": 18.23,
409
+ "learning_rate": 4.804862444017915e-06,
410
+ "loss": 0.8413,
411
+ "step": 9500
412
+ },
413
+ {
414
+ "epoch": 19.0,
415
+ "eval_f1": 0.9035,
416
+ "eval_gen_len": 19.913454545454545,
417
+ "eval_loss": 1.574545979499817,
418
+ "eval_precision": 0.9159,
419
+ "eval_recall": 0.8918,
420
+ "eval_rouge1": 0.4492,
421
+ "eval_rouge2": 0.2282,
422
+ "eval_rougeL": 0.3861,
423
+ "eval_rougeLsum": 0.3864,
424
+ "eval_runtime": 311.5846,
425
+ "eval_samples_per_second": 8.826,
426
+ "eval_steps_per_second": 0.552,
427
+ "step": 9899
428
+ },
429
+ {
430
+ "epoch": 19.19,
431
+ "learning_rate": 4.005118362124121e-06,
432
+ "loss": 0.8257,
433
+ "step": 10000
434
+ },
435
+ {
436
+ "epoch": 20.0,
437
+ "eval_f1": 0.9031,
438
+ "eval_gen_len": 19.899636363636365,
439
+ "eval_loss": 1.583512544631958,
440
+ "eval_precision": 0.9153,
441
+ "eval_recall": 0.8915,
442
+ "eval_rouge1": 0.4471,
443
+ "eval_rouge2": 0.2266,
444
+ "eval_rougeL": 0.3852,
445
+ "eval_rougeLsum": 0.3853,
446
+ "eval_runtime": 311.7771,
447
+ "eval_samples_per_second": 8.82,
448
+ "eval_steps_per_second": 0.552,
449
+ "step": 10420
450
+ },
451
+ {
452
+ "epoch": 20.15,
453
+ "learning_rate": 3.2053742802303266e-06,
454
+ "loss": 0.8097,
455
+ "step": 10500
456
+ },
457
+ {
458
+ "epoch": 21.0,
459
+ "eval_f1": 0.9034,
460
+ "eval_gen_len": 19.907272727272726,
461
+ "eval_loss": 1.59569251537323,
462
+ "eval_precision": 0.9156,
463
+ "eval_recall": 0.8919,
464
+ "eval_rouge1": 0.4472,
465
+ "eval_rouge2": 0.2271,
466
+ "eval_rougeL": 0.3856,
467
+ "eval_rougeLsum": 0.3856,
468
+ "eval_runtime": 309.5923,
469
+ "eval_samples_per_second": 8.883,
470
+ "eval_steps_per_second": 0.556,
471
+ "step": 10941
472
+ },
473
+ {
474
+ "epoch": 21.11,
475
+ "learning_rate": 2.4056301983365325e-06,
476
+ "loss": 0.7926,
477
+ "step": 11000
478
+ },
479
+ {
480
+ "epoch": 22.0,
481
  "eval_f1": 0.9034,
482
+ "eval_gen_len": 19.892,
483
+ "eval_loss": 1.595582127571106,
484
  "eval_precision": 0.9159,
485
  "eval_recall": 0.8916,
486
+ "eval_rouge1": 0.4479,
487
+ "eval_rouge2": 0.2282,
488
+ "eval_rougeL": 0.3855,
489
+ "eval_rougeLsum": 0.3857,
490
+ "eval_runtime": 311.5772,
491
+ "eval_samples_per_second": 8.826,
492
+ "eval_steps_per_second": 0.552,
493
+ "step": 11462
494
  },
495
  {
496
+ "epoch": 22.07,
497
+ "learning_rate": 1.6058861164427384e-06,
498
+ "loss": 0.7841,
499
+ "step": 11500
500
+ },
501
+ {
502
+ "epoch": 23.0,
503
+ "eval_f1": 0.9028,
504
+ "eval_gen_len": 19.912,
505
+ "eval_loss": 1.5990447998046875,
506
+ "eval_precision": 0.9155,
507
+ "eval_recall": 0.8908,
508
+ "eval_rouge1": 0.4444,
509
+ "eval_rouge2": 0.2261,
510
+ "eval_rougeL": 0.3833,
511
+ "eval_rougeLsum": 0.3834,
512
+ "eval_runtime": 311.6057,
513
+ "eval_samples_per_second": 8.825,
514
+ "eval_steps_per_second": 0.552,
515
+ "step": 11983
516
+ },
517
+ {
518
+ "epoch": 23.03,
519
+ "learning_rate": 8.061420345489445e-07,
520
+ "loss": 0.7734,
521
+ "step": 12000
522
+ },
523
+ {
524
+ "epoch": 23.99,
525
+ "learning_rate": 6.397952655150352e-09,
526
+ "loss": 0.7669,
527
+ "step": 12500
528
+ },
529
+ {
530
+ "epoch": 24.0,
531
+ "eval_f1": 0.9034,
532
+ "eval_gen_len": 19.90290909090909,
533
+ "eval_loss": 1.6053136587142944,
534
+ "eval_precision": 0.9159,
535
+ "eval_recall": 0.8916,
536
+ "eval_rouge1": 0.4481,
537
+ "eval_rouge2": 0.2283,
538
+ "eval_rougeL": 0.3861,
539
+ "eval_rougeLsum": 0.3863,
540
+ "eval_runtime": 314.5795,
541
+ "eval_samples_per_second": 8.742,
542
+ "eval_steps_per_second": 0.547,
543
+ "step": 12504
544
+ },
545
+ {
546
+ "epoch": 24.0,
547
+ "step": 12504,
548
+ "total_flos": 2.569106349028344e+18,
549
+ "train_loss": 0.22391605226564926,
550
+ "train_runtime": 17118.5241,
551
+ "train_samples_per_second": 70.1,
552
+ "train_steps_per_second": 0.73
553
  }
554
  ],
555
  "logging_steps": 500,
556
+ "max_steps": 12504,
557
  "num_input_tokens_seen": 0,
558
+ "num_train_epochs": 24,
559
  "save_steps": 500,
560
+ "total_flos": 2.569106349028344e+18,
561
  "train_batch_size": 24,
562
  "trial_name": null,
563
  "trial_params": null