iamnguyen commited on
Commit
20350cf
·
verified ·
1 Parent(s): fa51acb

Training in progress, step 128, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e4f50b92f698330efa14b9aac94764ac34d6d4509dfa5c1fdfb68575bc943c7
3
  size 479769104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:723cdbed8d40933997cc8d9e1926fd97533157645b77f78a31a0777a237955b4
3
  size 479769104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d6680532138f3cf57460c4e0978c71e3e3fc3aff82bf843a16cb5743ee2bad6
3
  size 240728084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb9600dbce4dc797bd243e119f573cc8a6d76ef08a1e8626fb7f2cb94596db2
3
  size 240728084
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:804a1bb968b56ae5803ba0d79c37a917e42ea8548fe4b81baead068641d70bad
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9c8242be1e407e2d848669b7b5d69ac9796e0548ad1ac9a2c7f9531a4a28b62
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.007240945282778713,
5
  "eval_steps": 500,
6
- "global_step": 112,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -791,6 +791,118 @@
791
  "learning_rate": 7.225806451612903e-06,
792
  "loss": 1.4194,
793
  "step": 112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
794
  }
795
  ],
796
  "logging_steps": 1,
@@ -810,7 +922,7 @@
810
  "attributes": {}
811
  }
812
  },
813
- "total_flos": 7.237605007429632e+16,
814
  "train_batch_size": 2,
815
  "trial_name": null,
816
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.008275366037461386,
5
  "eval_steps": 500,
6
+ "global_step": 128,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
791
  "learning_rate": 7.225806451612903e-06,
792
  "loss": 1.4194,
793
  "step": 112
794
+ },
795
+ {
796
+ "epoch": 0.00730559657994638,
797
+ "grad_norm": 5.50141716003418,
798
+ "learning_rate": 7.290322580645162e-06,
799
+ "loss": 1.4837,
800
+ "step": 113
801
+ },
802
+ {
803
+ "epoch": 0.007370247877114047,
804
+ "grad_norm": 5.740527153015137,
805
+ "learning_rate": 7.35483870967742e-06,
806
+ "loss": 1.5181,
807
+ "step": 114
808
+ },
809
+ {
810
+ "epoch": 0.007434899174281714,
811
+ "grad_norm": 6.510746002197266,
812
+ "learning_rate": 7.4193548387096784e-06,
813
+ "loss": 1.3614,
814
+ "step": 115
815
+ },
816
+ {
817
+ "epoch": 0.007499550471449381,
818
+ "grad_norm": 6.607003211975098,
819
+ "learning_rate": 7.483870967741936e-06,
820
+ "loss": 1.4396,
821
+ "step": 116
822
+ },
823
+ {
824
+ "epoch": 0.007564201768617048,
825
+ "grad_norm": 6.828821182250977,
826
+ "learning_rate": 7.548387096774194e-06,
827
+ "loss": 1.4888,
828
+ "step": 117
829
+ },
830
+ {
831
+ "epoch": 0.007628853065784715,
832
+ "grad_norm": 5.51243782043457,
833
+ "learning_rate": 7.612903225806451e-06,
834
+ "loss": 1.4666,
835
+ "step": 118
836
+ },
837
+ {
838
+ "epoch": 0.007693504362952382,
839
+ "grad_norm": 5.797337532043457,
840
+ "learning_rate": 7.67741935483871e-06,
841
+ "loss": 1.3761,
842
+ "step": 119
843
+ },
844
+ {
845
+ "epoch": 0.007758155660120049,
846
+ "grad_norm": 5.460038185119629,
847
+ "learning_rate": 7.741935483870968e-06,
848
+ "loss": 1.4361,
849
+ "step": 120
850
+ },
851
+ {
852
+ "epoch": 0.007822806957287716,
853
+ "grad_norm": 5.366038799285889,
854
+ "learning_rate": 7.806451612903227e-06,
855
+ "loss": 1.4087,
856
+ "step": 121
857
+ },
858
+ {
859
+ "epoch": 0.007887458254455384,
860
+ "grad_norm": 6.333535194396973,
861
+ "learning_rate": 7.870967741935484e-06,
862
+ "loss": 1.4527,
863
+ "step": 122
864
+ },
865
+ {
866
+ "epoch": 0.00795210955162305,
867
+ "grad_norm": 5.908946514129639,
868
+ "learning_rate": 7.935483870967743e-06,
869
+ "loss": 1.4128,
870
+ "step": 123
871
+ },
872
+ {
873
+ "epoch": 0.008016760848790718,
874
+ "grad_norm": 5.050029754638672,
875
+ "learning_rate": 8.000000000000001e-06,
876
+ "loss": 1.4311,
877
+ "step": 124
878
+ },
879
+ {
880
+ "epoch": 0.008081412145958386,
881
+ "grad_norm": 6.2919816970825195,
882
+ "learning_rate": 8.064516129032258e-06,
883
+ "loss": 1.3892,
884
+ "step": 125
885
+ },
886
+ {
887
+ "epoch": 0.008146063443126052,
888
+ "grad_norm": 5.789970874786377,
889
+ "learning_rate": 8.129032258064517e-06,
890
+ "loss": 1.4024,
891
+ "step": 126
892
+ },
893
+ {
894
+ "epoch": 0.00821071474029372,
895
+ "grad_norm": 5.259674549102783,
896
+ "learning_rate": 8.193548387096774e-06,
897
+ "loss": 1.3323,
898
+ "step": 127
899
+ },
900
+ {
901
+ "epoch": 0.008275366037461386,
902
+ "grad_norm": 5.545688152313232,
903
+ "learning_rate": 8.258064516129033e-06,
904
+ "loss": 1.332,
905
+ "step": 128
906
  }
907
  ],
908
  "logging_steps": 1,
 
922
  "attributes": {}
923
  }
924
  },
925
+ "total_flos": 8.228794259622298e+16,
926
  "train_batch_size": 2,
927
  "trial_name": null,
928
  "trial_params": null