ProgramInNonsense commited on
Commit
b8d8cf7
·
verified ·
1 Parent(s): b7d27ba

Training in progress, step 900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d018a885c48fc0c37b1517b6a917608711216b0c986199a88ed63bfa264bb6f
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47a16b51ed86ef9284b498af46d0376b4111bc98db6e3a93d475a79295e2f251
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d0277322814cbfc588f9a257df415c737454820ed7d47488062a26cd5645983
3
  size 139313554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcc2335ec30b7353024ca0587baa051275591342c5014a352aee71d8fc65afba
3
  size 139313554
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b13fcf3f291144a6cd2c774e108aa3a14da9f19c8ed985937da466add28008e9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f9de2f7947115389a7a99bea9a1bbdfa4f757d6c06e8a050b7f2a473acaf2e5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d02fc640701307686c6a3396481672cdf8282d493508ee82d1db38b43c52dc7
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:844b58ea4caa8e4462bacce55fd7b9bbec4884fc493ed5628b1d32bf13abecce
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.5808264017105103,
3
  "best_model_checkpoint": "./output/checkpoint-600",
4
- "epoch": 3.456221198156682,
5
  "eval_steps": 150,
6
- "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -572,6 +572,119 @@
572
  "eval_samples_per_second": 9.401,
573
  "eval_steps_per_second": 9.401,
574
  "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
  }
576
  ],
577
  "logging_steps": 10,
@@ -591,7 +704,7 @@
591
  "attributes": {}
592
  }
593
  },
594
- "total_flos": 3144486839671680.0,
595
  "train_batch_size": 8,
596
  "trial_name": null,
597
  "trial_params": null
 
1
  {
2
  "best_metric": 1.5808264017105103,
3
  "best_model_checkpoint": "./output/checkpoint-600",
4
+ "epoch": 4.147465437788019,
5
  "eval_steps": 150,
6
+ "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
572
  "eval_samples_per_second": 9.401,
573
  "eval_steps_per_second": 9.401,
574
  "step": 750
575
+ },
576
+ {
577
+ "epoch": 3.5023041474654377,
578
+ "grad_norm": 2.300726890563965,
579
+ "learning_rate": 5.2574461868845316e-05,
580
+ "loss": 1.4251,
581
+ "step": 760
582
+ },
583
+ {
584
+ "epoch": 3.5483870967741935,
585
+ "grad_norm": 2.2960753440856934,
586
+ "learning_rate": 5.2501545891838315e-05,
587
+ "loss": 1.3402,
588
+ "step": 770
589
+ },
590
+ {
591
+ "epoch": 3.5944700460829493,
592
+ "grad_norm": 2.15303897857666,
593
+ "learning_rate": 5.242760219920405e-05,
594
+ "loss": 1.3517,
595
+ "step": 780
596
+ },
597
+ {
598
+ "epoch": 3.640552995391705,
599
+ "grad_norm": 2.8471062183380127,
600
+ "learning_rate": 5.235263383047812e-05,
601
+ "loss": 1.3804,
602
+ "step": 790
603
+ },
604
+ {
605
+ "epoch": 3.686635944700461,
606
+ "grad_norm": 2.315626859664917,
607
+ "learning_rate": 5.2276643867316525e-05,
608
+ "loss": 1.3088,
609
+ "step": 800
610
+ },
611
+ {
612
+ "epoch": 3.7327188940092166,
613
+ "grad_norm": 2.1980669498443604,
614
+ "learning_rate": 5.219963543336902e-05,
615
+ "loss": 1.3247,
616
+ "step": 810
617
+ },
618
+ {
619
+ "epoch": 3.7788018433179724,
620
+ "grad_norm": 2.2485177516937256,
621
+ "learning_rate": 5.212161169415071e-05,
622
+ "loss": 1.3587,
623
+ "step": 820
624
+ },
625
+ {
626
+ "epoch": 3.824884792626728,
627
+ "grad_norm": 2.9055187702178955,
628
+ "learning_rate": 5.204257585691191e-05,
629
+ "loss": 1.304,
630
+ "step": 830
631
+ },
632
+ {
633
+ "epoch": 3.870967741935484,
634
+ "grad_norm": 1.9540749788284302,
635
+ "learning_rate": 5.196253117050633e-05,
636
+ "loss": 1.5041,
637
+ "step": 840
638
+ },
639
+ {
640
+ "epoch": 3.9170506912442398,
641
+ "grad_norm": 2.1801793575286865,
642
+ "learning_rate": 5.188148092525751e-05,
643
+ "loss": 1.4353,
644
+ "step": 850
645
+ },
646
+ {
647
+ "epoch": 3.9631336405529956,
648
+ "grad_norm": 2.4277756214141846,
649
+ "learning_rate": 5.179942845282357e-05,
650
+ "loss": 1.3884,
651
+ "step": 860
652
+ },
653
+ {
654
+ "epoch": 4.009216589861751,
655
+ "grad_norm": 1.8367973566055298,
656
+ "learning_rate": 5.17163771260603e-05,
657
+ "loss": 1.4612,
658
+ "step": 870
659
+ },
660
+ {
661
+ "epoch": 4.055299539170507,
662
+ "grad_norm": 2.414250373840332,
663
+ "learning_rate": 5.163233035888244e-05,
664
+ "loss": 1.2588,
665
+ "step": 880
666
+ },
667
+ {
668
+ "epoch": 4.1013824884792625,
669
+ "grad_norm": 2.6931984424591064,
670
+ "learning_rate": 5.154729160612338e-05,
671
+ "loss": 1.2111,
672
+ "step": 890
673
+ },
674
+ {
675
+ "epoch": 4.147465437788019,
676
+ "grad_norm": 2.5944881439208984,
677
+ "learning_rate": 5.146126436339321e-05,
678
+ "loss": 1.2324,
679
+ "step": 900
680
+ },
681
+ {
682
+ "epoch": 4.147465437788019,
683
+ "eval_loss": 1.6766996383666992,
684
+ "eval_runtime": 22.1869,
685
+ "eval_samples_per_second": 8.699,
686
+ "eval_steps_per_second": 8.699,
687
+ "step": 900
688
  }
689
  ],
690
  "logging_steps": 10,
 
704
  "attributes": {}
705
  }
706
  },
707
+ "total_flos": 3780963021939840.0,
708
  "train_batch_size": 8,
709
  "trial_name": null,
710
  "trial_params": null