alicegoesdown commited on
Commit
43dab4f
·
verified ·
1 Parent(s): 69f6dca

Training in progress, step 1050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f859cc22c73b0a6cf946ee2b61f61733a8b83b2ff84f372bfd741792359f6f0b
3
  size 921238736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30a19f851e92120b55e4b3e48cb7a19666339f280f7846f0368ed6553229846c
3
  size 921238736
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3f0593d4c89fb34e1c63f7c504936e4d9f6a0c336b414a597807ccbe5898bb8
3
  size 1808993594
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f86f420488c8931f87fe380cac8026e362a39fcc956c3cbceb4f6eaa1202966a
3
  size 1808993594
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a51199259c5448a07ec8ef2f6da3d50c8fab80223defefc69846f7f0185cb01
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85a5717b56a12e8c1a74fbcc04f95d33d38ad282e6759eb62471db1fd54435cd
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3617162226dda42321eb60706610eaf575f5dd5b30df0d73eddae3c1ecde0276
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf19dc22a4fe1ad09e77bb25c83ade70823bae5378e9f8bd12663aae71b06a4
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.13146419823169708,
3
  "best_model_checkpoint": "./output/checkpoint-150",
4
- "epoch": 4.433497536945813,
5
  "eval_steps": 150,
6
- "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -685,6 +685,119 @@
685
  "eval_samples_per_second": 11.569,
686
  "eval_steps_per_second": 11.569,
687
  "step": 900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
688
  }
689
  ],
690
  "logging_steps": 10,
@@ -704,7 +817,7 @@
704
  "attributes": {}
705
  }
706
  },
707
- "total_flos": 5.43855473081303e+16,
708
  "train_batch_size": 8,
709
  "trial_name": null,
710
  "trial_params": null
 
1
  {
2
  "best_metric": 0.13146419823169708,
3
  "best_model_checkpoint": "./output/checkpoint-150",
4
+ "epoch": 5.172413793103448,
5
  "eval_steps": 150,
6
+ "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
685
  "eval_samples_per_second": 11.569,
686
  "eval_steps_per_second": 11.569,
687
  "step": 900
688
+ },
689
+ {
690
+ "epoch": 4.482758620689655,
691
+ "grad_norm": 1.3206291198730469,
692
+ "learning_rate": 6.088655409611797e-05,
693
+ "loss": 0.0195,
694
+ "step": 910
695
+ },
696
+ {
697
+ "epoch": 4.532019704433497,
698
+ "grad_norm": 0.7733320593833923,
699
+ "learning_rate": 6.07822678974027e-05,
700
+ "loss": 0.0225,
701
+ "step": 920
702
+ },
703
+ {
704
+ "epoch": 4.58128078817734,
705
+ "grad_norm": 0.5630067586898804,
706
+ "learning_rate": 6.067682289967549e-05,
707
+ "loss": 0.0209,
708
+ "step": 930
709
+ },
710
+ {
711
+ "epoch": 4.630541871921182,
712
+ "grad_norm": 0.7231793403625488,
713
+ "learning_rate": 6.05702234373672e-05,
714
+ "loss": 0.0257,
715
+ "step": 940
716
+ },
717
+ {
718
+ "epoch": 4.679802955665025,
719
+ "grad_norm": 1.0794564485549927,
720
+ "learning_rate": 6.04624738923642e-05,
721
+ "loss": 0.0247,
722
+ "step": 950
723
+ },
724
+ {
725
+ "epoch": 4.7290640394088665,
726
+ "grad_norm": 1.263869047164917,
727
+ "learning_rate": 6.0353578693828246e-05,
728
+ "loss": 0.0222,
729
+ "step": 960
730
+ },
731
+ {
732
+ "epoch": 4.778325123152709,
733
+ "grad_norm": 1.1668565273284912,
734
+ "learning_rate": 6.0243542318014456e-05,
735
+ "loss": 0.0291,
736
+ "step": 970
737
+ },
738
+ {
739
+ "epoch": 4.827586206896552,
740
+ "grad_norm": 1.756858229637146,
741
+ "learning_rate": 6.013236928808725e-05,
742
+ "loss": 0.0263,
743
+ "step": 980
744
+ },
745
+ {
746
+ "epoch": 4.876847290640394,
747
+ "grad_norm": 1.017069935798645,
748
+ "learning_rate": 6.002006417393445e-05,
749
+ "loss": 0.0241,
750
+ "step": 990
751
+ },
752
+ {
753
+ "epoch": 4.926108374384237,
754
+ "grad_norm": 1.153463363647461,
755
+ "learning_rate": 5.9906631591979426e-05,
756
+ "loss": 0.0255,
757
+ "step": 1000
758
+ },
759
+ {
760
+ "epoch": 4.975369458128079,
761
+ "grad_norm": 0.9330563545227051,
762
+ "learning_rate": 5.979207620499136e-05,
763
+ "loss": 0.0289,
764
+ "step": 1010
765
+ },
766
+ {
767
+ "epoch": 5.024630541871921,
768
+ "grad_norm": 0.6231732368469238,
769
+ "learning_rate": 5.96764027218935e-05,
770
+ "loss": 0.0204,
771
+ "step": 1020
772
+ },
773
+ {
774
+ "epoch": 5.073891625615763,
775
+ "grad_norm": 0.6820633411407471,
776
+ "learning_rate": 5.95596158975697e-05,
777
+ "loss": 0.0117,
778
+ "step": 1030
779
+ },
780
+ {
781
+ "epoch": 5.123152709359606,
782
+ "grad_norm": 0.6919358968734741,
783
+ "learning_rate": 5.944172053266886e-05,
784
+ "loss": 0.0142,
785
+ "step": 1040
786
+ },
787
+ {
788
+ "epoch": 5.172413793103448,
789
+ "grad_norm": 2.5571327209472656,
790
+ "learning_rate": 5.932272147340768e-05,
791
+ "loss": 0.0187,
792
+ "step": 1050
793
+ },
794
+ {
795
+ "epoch": 5.172413793103448,
796
+ "eval_loss": 0.20086674392223358,
797
+ "eval_runtime": 15.4256,
798
+ "eval_samples_per_second": 11.734,
799
+ "eval_steps_per_second": 11.734,
800
+ "step": 1050
801
  }
802
  ],
803
  "logging_steps": 10,
 
817
  "attributes": {}
818
  }
819
  },
820
+ "total_flos": 6.349714559045222e+16,
821
  "train_batch_size": 8,
822
  "trial_name": null,
823
  "trial_params": null