neuralwonderland commited on
Commit
8fb5845
·
verified ·
1 Parent(s): 78b3c7d

Training in progress, step 1200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c95de728f02bd108c427e07914d7a266ac8e219b042c5a31ab5c42cd5ae57f40
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:807e2020d4cc18f157e1ef0d1bfd91dd5bf0c37dcea25db0e1a74afaa6d01745
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e38d306ee414bf708f6b626142dca0ab4b5969b5cf416730f19f8a0d90de238
3
  size 1049049442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f4be45d595fa1a9446b71dac7d9916ddea1170c72ac19df309f820d8635447c
3
  size 1049049442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:496b0588f97d6d587516f88b79f3f545f508fdd7aeb5db98153fda3c8189db81
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e13c301efe13091da720b5921b6c06ef2af4b0396bf6dd105a86ca83c2c83de
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ba6bf3e8e84f8697154a5603399be4e682ed0ecacfbbcbd0b06870559b45041
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94d254686016f56c6ee3bc710fe38ee5c6ec1eb812335e8f222de3fe8edbd01d
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2271474599838257,
3
- "best_model_checkpoint": "./output/checkpoint-1050",
4
- "epoch": 0.04703247480403135,
5
  "eval_steps": 150,
6
- "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -798,6 +798,119 @@
798
  "eval_samples_per_second": 9.695,
799
  "eval_steps_per_second": 9.695,
800
  "step": 1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801
  }
802
  ],
803
  "logging_steps": 10,
@@ -817,7 +930,7 @@
817
  "attributes": {}
818
  }
819
  },
820
- "total_flos": 1.358086907308032e+17,
821
  "train_batch_size": 4,
822
  "trial_name": null,
823
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.2224195003509521,
3
+ "best_model_checkpoint": "./output/checkpoint-1200",
4
+ "epoch": 0.05375139977603583,
5
  "eval_steps": 150,
6
+ "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
798
  "eval_samples_per_second": 9.695,
799
  "eval_steps_per_second": 9.695,
800
  "step": 1050
801
+ },
802
+ {
803
+ "epoch": 0.04748040313549832,
804
+ "grad_norm": 4.918002605438232,
805
+ "learning_rate": 6.811827548134495e-06,
806
+ "loss": 1.156,
807
+ "step": 1060
808
+ },
809
+ {
810
+ "epoch": 0.047928331466965284,
811
+ "grad_norm": 3.533487319946289,
812
+ "learning_rate": 6.797883281792261e-06,
813
+ "loss": 1.0533,
814
+ "step": 1070
815
+ },
816
+ {
817
+ "epoch": 0.04837625979843225,
818
+ "grad_norm": 4.698348045349121,
819
+ "learning_rate": 6.783813728906054e-06,
820
+ "loss": 1.2621,
821
+ "step": 1080
822
+ },
823
+ {
824
+ "epoch": 0.048824188129899214,
825
+ "grad_norm": 3.90852427482605,
826
+ "learning_rate": 6.769619467820086e-06,
827
+ "loss": 1.0754,
828
+ "step": 1090
829
+ },
830
+ {
831
+ "epoch": 0.04927211646136618,
832
+ "grad_norm": 6.924786567687988,
833
+ "learning_rate": 6.755301082004838e-06,
834
+ "loss": 1.0617,
835
+ "step": 1100
836
+ },
837
+ {
838
+ "epoch": 0.049720044792833144,
839
+ "grad_norm": 5.685960292816162,
840
+ "learning_rate": 6.740859160033068e-06,
841
+ "loss": 1.2185,
842
+ "step": 1110
843
+ },
844
+ {
845
+ "epoch": 0.05016797312430011,
846
+ "grad_norm": 5.533092975616455,
847
+ "learning_rate": 6.726294295555623e-06,
848
+ "loss": 1.0583,
849
+ "step": 1120
850
+ },
851
+ {
852
+ "epoch": 0.050615901455767075,
853
+ "grad_norm": 4.5029988288879395,
854
+ "learning_rate": 6.711607087277034e-06,
855
+ "loss": 1.1781,
856
+ "step": 1130
857
+ },
858
+ {
859
+ "epoch": 0.05106382978723404,
860
+ "grad_norm": 3.2203736305236816,
861
+ "learning_rate": 6.69679813893091e-06,
862
+ "loss": 1.151,
863
+ "step": 1140
864
+ },
865
+ {
866
+ "epoch": 0.051511758118701005,
867
+ "grad_norm": 6.602795600891113,
868
+ "learning_rate": 6.681868059255113e-06,
869
+ "loss": 1.1373,
870
+ "step": 1150
871
+ },
872
+ {
873
+ "epoch": 0.05195968645016797,
874
+ "grad_norm": 3.071552038192749,
875
+ "learning_rate": 6.666817461966741e-06,
876
+ "loss": 1.1554,
877
+ "step": 1160
878
+ },
879
+ {
880
+ "epoch": 0.052407614781634936,
881
+ "grad_norm": 5.886751174926758,
882
+ "learning_rate": 6.651646965736902e-06,
883
+ "loss": 1.1328,
884
+ "step": 1170
885
+ },
886
+ {
887
+ "epoch": 0.0528555431131019,
888
+ "grad_norm": 4.323307991027832,
889
+ "learning_rate": 6.636357194165274e-06,
890
+ "loss": 1.1535,
891
+ "step": 1180
892
+ },
893
+ {
894
+ "epoch": 0.053303471444568866,
895
+ "grad_norm": 4.585876941680908,
896
+ "learning_rate": 6.620948775754481e-06,
897
+ "loss": 1.1636,
898
+ "step": 1190
899
+ },
900
+ {
901
+ "epoch": 0.05375139977603583,
902
+ "grad_norm": 3.9351437091827393,
903
+ "learning_rate": 6.605422343884255e-06,
904
+ "loss": 1.2689,
905
+ "step": 1200
906
+ },
907
+ {
908
+ "epoch": 0.05375139977603583,
909
+ "eval_loss": 1.2224195003509521,
910
+ "eval_runtime": 51.5936,
911
+ "eval_samples_per_second": 9.691,
912
+ "eval_steps_per_second": 9.691,
913
+ "step": 1200
914
  }
915
  ],
916
  "logging_steps": 10,
 
930
  "attributes": {}
931
  }
932
  },
933
+ "total_flos": 1.541998722299904e+17,
934
  "train_batch_size": 4,
935
  "trial_name": null,
936
  "trial_params": null