alicegoesdown commited on
Commit
a8a18e9
·
verified ·
1 Parent(s): 4c8dba7

Training in progress, step 1200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98f9852468be1d11f79a6afd8e8e8e90ab82b65b6630bfe27bf9d7258aa53760
3
  size 653434568
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1069acd7dbeb0a53054f21d4748045d3b0cd75d1c7844eea3e3c04cd84a1c1c6
3
  size 653434568
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46f5bcafb3391b66bf71dd49e132600d898d53858b6ef128a4473be5f14bbd54
3
  size 1288533754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddffa107f91c19c05fdc252823136fe52818fe6d47b0afd3b51276eb3eaaf4c6
3
  size 1288533754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8baf1c7e69a5025f19d25ee5fab7f4ab7f55b412ae064b6fb683da98fc8d4be9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:023bcf31226b6f20365e44211b51277c48818151bb707e207a8261688b6af12e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cf334b45c59ec3117af6235b0cb6b8da2aab8bbc78c388edc1a1925c6731983
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9700e65a0b3380601a1015680e2c133feb2eaf2c92236b0244cca4087d160954
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.6903132200241089,
3
- "best_model_checkpoint": "./output/checkpoint-1050",
4
- "epoch": 0.0931016137613052,
5
  "eval_steps": 150,
6
- "global_step": 1050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -798,6 +798,119 @@
798
  "eval_samples_per_second": 8.421,
799
  "eval_steps_per_second": 8.421,
800
  "step": 1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
801
  }
802
  ],
803
  "logging_steps": 10,
@@ -817,7 +930,7 @@
817
  "attributes": {}
818
  }
819
  },
820
- "total_flos": 3.53861980121088e+17,
821
  "train_batch_size": 8,
822
  "trial_name": null,
823
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.6847599744796753,
3
+ "best_model_checkpoint": "./output/checkpoint-1200",
4
+ "epoch": 0.10640184429863452,
5
  "eval_steps": 150,
6
+ "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
798
  "eval_samples_per_second": 8.421,
799
  "eval_steps_per_second": 8.421,
800
  "step": 1050
801
+ },
802
+ {
803
+ "epoch": 0.09398829579712716,
804
+ "grad_norm": 6.51155948638916,
805
+ "learning_rate": 9.082436730845996e-05,
806
+ "loss": 1.5121,
807
+ "step": 1060
808
+ },
809
+ {
810
+ "epoch": 0.0948749778329491,
811
+ "grad_norm": 7.315041542053223,
812
+ "learning_rate": 9.063844375723016e-05,
813
+ "loss": 1.45,
814
+ "step": 1070
815
+ },
816
+ {
817
+ "epoch": 0.09576165986877105,
818
+ "grad_norm": 9.287749290466309,
819
+ "learning_rate": 9.045084971874741e-05,
820
+ "loss": 1.6892,
821
+ "step": 1080
822
+ },
823
+ {
824
+ "epoch": 0.09664834190459301,
825
+ "grad_norm": 7.6157097816467285,
826
+ "learning_rate": 9.026159290426783e-05,
827
+ "loss": 1.832,
828
+ "step": 1090
829
+ },
830
+ {
831
+ "epoch": 0.09753502394041497,
832
+ "grad_norm": 6.081124782562256,
833
+ "learning_rate": 9.007068109339786e-05,
834
+ "loss": 1.6911,
835
+ "step": 1100
836
+ },
837
+ {
838
+ "epoch": 0.09842170597623692,
839
+ "grad_norm": 7.2468671798706055,
840
+ "learning_rate": 8.987812213377425e-05,
841
+ "loss": 1.6959,
842
+ "step": 1110
843
+ },
844
+ {
845
+ "epoch": 0.09930838801205888,
846
+ "grad_norm": 7.454516887664795,
847
+ "learning_rate": 8.968392394074165e-05,
848
+ "loss": 1.5169,
849
+ "step": 1120
850
+ },
851
+ {
852
+ "epoch": 0.10019507004788084,
853
+ "grad_norm": 10.253645896911621,
854
+ "learning_rate": 8.948809449702714e-05,
855
+ "loss": 1.6779,
856
+ "step": 1130
857
+ },
858
+ {
859
+ "epoch": 0.10108175208370278,
860
+ "grad_norm": 8.075345993041992,
861
+ "learning_rate": 8.929064185241216e-05,
862
+ "loss": 1.6622,
863
+ "step": 1140
864
+ },
865
+ {
866
+ "epoch": 0.10196843411952473,
867
+ "grad_norm": 11.007535934448242,
868
+ "learning_rate": 8.909157412340152e-05,
869
+ "loss": 1.7568,
870
+ "step": 1150
871
+ },
872
+ {
873
+ "epoch": 0.10285511615534669,
874
+ "grad_norm": 8.019722938537598,
875
+ "learning_rate": 8.889089949288989e-05,
876
+ "loss": 1.6177,
877
+ "step": 1160
878
+ },
879
+ {
880
+ "epoch": 0.10374179819116865,
881
+ "grad_norm": 8.618474960327148,
882
+ "learning_rate": 8.868862620982537e-05,
883
+ "loss": 1.5605,
884
+ "step": 1170
885
+ },
886
+ {
887
+ "epoch": 0.1046284802269906,
888
+ "grad_norm": 8.008125305175781,
889
+ "learning_rate": 8.848476258887034e-05,
890
+ "loss": 1.5995,
891
+ "step": 1180
892
+ },
893
+ {
894
+ "epoch": 0.10551516226281256,
895
+ "grad_norm": 11.63944149017334,
896
+ "learning_rate": 8.827931701005976e-05,
897
+ "loss": 1.5778,
898
+ "step": 1190
899
+ },
900
+ {
901
+ "epoch": 0.10640184429863452,
902
+ "grad_norm": 9.485556602478027,
903
+ "learning_rate": 8.807229791845674e-05,
904
+ "loss": 1.547,
905
+ "step": 1200
906
+ },
907
+ {
908
+ "epoch": 0.10640184429863452,
909
+ "eval_loss": 1.6847599744796753,
910
+ "eval_runtime": 59.4403,
911
+ "eval_samples_per_second": 8.412,
912
+ "eval_steps_per_second": 8.412,
913
+ "step": 1200
914
  }
915
  ],
916
  "logging_steps": 10,
 
930
  "attributes": {}
931
  }
932
  },
933
+ "total_flos": 4.043639543550935e+17,
934
  "train_batch_size": 8,
935
  "trial_name": null,
936
  "trial_params": null