ToastyPigeon commited on
Commit
a33a401
·
verified ·
1 Parent(s): ad62638

Training in progress, step 138, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -23,12 +23,12 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "gate_proj",
27
- "q_proj",
28
  "v_proj",
 
29
  "k_proj",
30
- "up_proj",
31
- "down_proj",
32
  "o_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "down_proj",
27
+ "up_proj",
28
  "v_proj",
29
+ "q_proj",
30
  "k_proj",
31
+ "gate_proj",
 
32
  "o_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09d423e94feaf8033b31452667f003cb573a77c4a9f506cbdd460557e56907c6
3
  size 1101095848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:159f4953091dcb60129c0d98dc4d161e4996f1e8228c96c20af61a193789da4e
3
  size 1101095848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78f828185d4f5953ab36ae5fff9f265052a5a94558cdbf8e78a79d91fd02babb
3
- size 839468180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd8ed52e211d25f45761d87e5f8eca279aa74c868fdfda06c6a4b31c3326a96e
3
+ size 841204242
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:382b0c36b5e9719dde2ab41462be8ffb1ad866ac7375e059dc9959d16c5cf0a8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:914a6a745fbc0daf993d085d59ff08f982320f1402730c23e1dff1739810252e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e243468041094465725543ec88af2957e1619e78af2ea16687cb83a4910726b1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b269ab7d807492a2a25a83d2f77415556d138cbad7aa38efc35115be1c1f045d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5043859649122807,
5
  "eval_steps": 23,
6
- "global_step": 115,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -860,6 +860,175 @@
860
  "eval_samples_per_second": 0.36,
861
  "eval_steps_per_second": 0.36,
862
  "step": 115
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
863
  }
864
  ],
865
  "logging_steps": 1,
@@ -879,7 +1048,7 @@
879
  "attributes": {}
880
  }
881
  },
882
- "total_flos": 6.451386683857306e+17,
883
  "train_batch_size": 8,
884
  "trial_name": null,
885
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6052631578947368,
5
  "eval_steps": 23,
6
+ "global_step": 138,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
860
  "eval_samples_per_second": 0.36,
861
  "eval_steps_per_second": 0.36,
862
  "step": 115
863
+ },
864
+ {
865
+ "epoch": 0.5087719298245614,
866
+ "grad_norm": 0.041254762560129166,
867
+ "learning_rate": 2.8472437850188416e-05,
868
+ "loss": 2.1322,
869
+ "step": 116
870
+ },
871
+ {
872
+ "epoch": 0.5131578947368421,
873
+ "grad_norm": 0.04002700001001358,
874
+ "learning_rate": 2.8148404115607496e-05,
875
+ "loss": 2.1348,
876
+ "step": 117
877
+ },
878
+ {
879
+ "epoch": 0.5175438596491229,
880
+ "grad_norm": 0.04018218815326691,
881
+ "learning_rate": 2.7824235725245042e-05,
882
+ "loss": 2.1397,
883
+ "step": 118
884
+ },
885
+ {
886
+ "epoch": 0.5219298245614035,
887
+ "grad_norm": 0.03979986160993576,
888
+ "learning_rate": 2.7500000000000004e-05,
889
+ "loss": 2.1661,
890
+ "step": 119
891
+ },
892
+ {
893
+ "epoch": 0.5263157894736842,
894
+ "grad_norm": 0.03741481155157089,
895
+ "learning_rate": 2.7175764274754967e-05,
896
+ "loss": 2.0629,
897
+ "step": 120
898
+ },
899
+ {
900
+ "epoch": 0.5307017543859649,
901
+ "grad_norm": 0.04032299295067787,
902
+ "learning_rate": 2.685159588439251e-05,
903
+ "loss": 2.2347,
904
+ "step": 121
905
+ },
906
+ {
907
+ "epoch": 0.5350877192982456,
908
+ "grad_norm": 0.042785074561834335,
909
+ "learning_rate": 2.6527562149811586e-05,
910
+ "loss": 2.1792,
911
+ "step": 122
912
+ },
913
+ {
914
+ "epoch": 0.5394736842105263,
915
+ "grad_norm": 0.040340930223464966,
916
+ "learning_rate": 2.6203730363946855e-05,
917
+ "loss": 2.4248,
918
+ "step": 123
919
+ },
920
+ {
921
+ "epoch": 0.543859649122807,
922
+ "grad_norm": 0.04094316065311432,
923
+ "learning_rate": 2.5880167777793746e-05,
924
+ "loss": 2.1152,
925
+ "step": 124
926
+ },
927
+ {
928
+ "epoch": 0.5482456140350878,
929
+ "grad_norm": 0.04053365811705589,
930
+ "learning_rate": 2.5556941586442263e-05,
931
+ "loss": 2.247,
932
+ "step": 125
933
+ },
934
+ {
935
+ "epoch": 0.5526315789473685,
936
+ "grad_norm": 0.04444659873843193,
937
+ "learning_rate": 2.523411891512244e-05,
938
+ "loss": 2.0332,
939
+ "step": 126
940
+ },
941
+ {
942
+ "epoch": 0.5570175438596491,
943
+ "grad_norm": 0.04211907461285591,
944
+ "learning_rate": 2.4911766805264246e-05,
945
+ "loss": 2.2871,
946
+ "step": 127
947
+ },
948
+ {
949
+ "epoch": 0.5614035087719298,
950
+ "grad_norm": 0.039295535534620285,
951
+ "learning_rate": 2.458995220057491e-05,
952
+ "loss": 2.0771,
953
+ "step": 128
954
+ },
955
+ {
956
+ "epoch": 0.5657894736842105,
957
+ "grad_norm": 0.04139047861099243,
958
+ "learning_rate": 2.426874193313657e-05,
959
+ "loss": 2.3538,
960
+ "step": 129
961
+ },
962
+ {
963
+ "epoch": 0.5701754385964912,
964
+ "grad_norm": 0.04222600907087326,
965
+ "learning_rate": 2.394820270952704e-05,
966
+ "loss": 2.2202,
967
+ "step": 130
968
+ },
969
+ {
970
+ "epoch": 0.5745614035087719,
971
+ "grad_norm": 0.04055177420377731,
972
+ "learning_rate": 2.3628401096966717e-05,
973
+ "loss": 2.1976,
974
+ "step": 131
975
+ },
976
+ {
977
+ "epoch": 0.5789473684210527,
978
+ "grad_norm": 0.040084317326545715,
979
+ "learning_rate": 2.3309403509494393e-05,
980
+ "loss": 2.1939,
981
+ "step": 132
982
+ },
983
+ {
984
+ "epoch": 0.5833333333333334,
985
+ "grad_norm": 0.04429348185658455,
986
+ "learning_rate": 2.2991276194174838e-05,
987
+ "loss": 2.367,
988
+ "step": 133
989
+ },
990
+ {
991
+ "epoch": 0.5877192982456141,
992
+ "grad_norm": 0.04366152733564377,
993
+ "learning_rate": 2.267408521734113e-05,
994
+ "loss": 2.2848,
995
+ "step": 134
996
+ },
997
+ {
998
+ "epoch": 0.5921052631578947,
999
+ "grad_norm": 0.04157907888293266,
1000
+ "learning_rate": 2.23578964508745e-05,
1001
+ "loss": 2.3166,
1002
+ "step": 135
1003
+ },
1004
+ {
1005
+ "epoch": 0.5964912280701754,
1006
+ "grad_norm": 0.045395560562610626,
1007
+ "learning_rate": 2.2042775558524503e-05,
1008
+ "loss": 2.2021,
1009
+ "step": 136
1010
+ },
1011
+ {
1012
+ "epoch": 0.6008771929824561,
1013
+ "grad_norm": 0.04491296038031578,
1014
+ "learning_rate": 2.1728787982272493e-05,
1015
+ "loss": 2.1836,
1016
+ "step": 137
1017
+ },
1018
+ {
1019
+ "epoch": 0.6052631578947368,
1020
+ "grad_norm": 0.04048113152384758,
1021
+ "learning_rate": 2.141599892874107e-05,
1022
+ "loss": 2.2104,
1023
+ "step": 138
1024
+ },
1025
+ {
1026
+ "epoch": 0.6052631578947368,
1027
+ "eval_loss": 2.166778087615967,
1028
+ "eval_runtime": 219.3317,
1029
+ "eval_samples_per_second": 0.365,
1030
+ "eval_steps_per_second": 0.365,
1031
+ "step": 138
1032
  }
1033
  ],
1034
  "logging_steps": 1,
 
1048
  "attributes": {}
1049
  }
1050
  },
1051
+ "total_flos": 7.741664020628767e+17,
1052
  "train_batch_size": 8,
1053
  "trial_name": null,
1054
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74041c6fe316b2e2ef7886b5ba6646caf901292406a50a5783f0fb2cd3feb66a
3
  size 6712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7486783359d6c84d6c4d68e2a011458cc32e2490c825991a5cf321ef8b99edb5
3
  size 6712