cwaud commited on
Commit
e5cad10
1 Parent(s): ef6190d

Training in progress, step 133, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d9443d3806b3f524104e7410b3e7a5e0e8f8deae600b21085beb3ac0c347fe7
3
  size 147770496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12be65f7e8dd5ac27291117edd1522ed1581db54bb6a06a934a5bd27b67433f
3
  size 147770496
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:883ac1d2d58af98f5c15e8c1f68fa35b3c5825ab049535194ee9b5e9923331e5
3
  size 295765866
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:311f36c5d5948b26d4bcd744cf495abfea0b2c2b47bbdcbd5358442b98ab6940
3
  size 295765866
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea7479aa993184fae8582a8ac09e7b54fd0c69e1659e2cd0f651a189fcfe7ab3
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:053a19bf6ddaa7bbad68aedb481b6fbae1f1ede7b8789c2659555b2f7468d4c2
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7827a0766373e30cb2e50db528dff06a73ccc5e29be7188061afef7b5fe7b104
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d723020e200376885e5ac296b9cba37ed8d9cf413b9b76c448ebc21c950b5a77
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:06031019adc9147ff97b6a7795fcebe13ec08027167ebbf684d7b589e62e1a23
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:996b5e0855b39aa6eafe4b88cbd8f3449cf57c2b9ab8bd497eea241d840fb88c
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddce3cd2ce73700e2934bb3f599ad484193bf9109c3b6e0a907e0ff72008c987
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22498a1c9b3f72e3686290a05844b34240bb18597ef646fc6b0c3b651699e0ff
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e2d2e9486a02a9d276f7ca10792a086dd994b16cb3a60458cafba5cc010d9e3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:349b16c1d49b7fd568cd7193488ce7ffb223618fe2fd7726cd63b343cdf98e43
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.3449122905731201,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.8609093354856067,
5
  "eval_steps": 25,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -747,6 +747,245 @@
747
  "eval_samples_per_second": 39.295,
748
  "eval_steps_per_second": 10.217,
749
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
750
  }
751
  ],
752
  "logging_steps": 1,
@@ -770,12 +1009,12 @@
770
  "should_evaluate": false,
771
  "should_log": false,
772
  "should_save": true,
773
- "should_training_stop": false
774
  },
775
  "attributes": {}
776
  }
777
  },
778
- "total_flos": 2.1243703888707584e+17,
779
  "train_batch_size": 1,
780
  "trial_name": null,
781
  "trial_params": null
 
1
  {
2
  "best_metric": 1.3449122905731201,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 1.1450094161958568,
5
  "eval_steps": 25,
6
+ "global_step": 133,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
747
  "eval_samples_per_second": 39.295,
748
  "eval_steps_per_second": 10.217,
749
  "step": 100
750
+ },
751
+ {
752
+ "epoch": 0.8695184288404627,
753
+ "grad_norm": 0.3012229800224304,
754
+ "learning_rate": 2.3180194846605367e-05,
755
+ "loss": 1.176,
756
+ "step": 101
757
+ },
758
+ {
759
+ "epoch": 0.8781275221953188,
760
+ "grad_norm": 0.4754287004470825,
761
+ "learning_rate": 2.2408881267183997e-05,
762
+ "loss": 1.1958,
763
+ "step": 102
764
+ },
765
+ {
766
+ "epoch": 0.8867366155501749,
767
+ "grad_norm": 0.43265655636787415,
768
+ "learning_rate": 2.165719935902685e-05,
769
+ "loss": 1.3262,
770
+ "step": 103
771
+ },
772
+ {
773
+ "epoch": 0.895345708905031,
774
+ "grad_norm": 0.5260616540908813,
775
+ "learning_rate": 2.09256019072082e-05,
776
+ "loss": 1.3721,
777
+ "step": 104
778
+ },
779
+ {
780
+ "epoch": 0.903954802259887,
781
+ "grad_norm": 0.5602609515190125,
782
+ "learning_rate": 2.0214529598676836e-05,
783
+ "loss": 1.401,
784
+ "step": 105
785
+ },
786
+ {
787
+ "epoch": 0.912563895614743,
788
+ "grad_norm": 0.29336562752723694,
789
+ "learning_rate": 1.952441075680272e-05,
790
+ "loss": 1.6924,
791
+ "step": 106
792
+ },
793
+ {
794
+ "epoch": 0.9211729889695991,
795
+ "grad_norm": 0.9488304853439331,
796
+ "learning_rate": 1.8855661083370986e-05,
797
+ "loss": 1.8012,
798
+ "step": 107
799
+ },
800
+ {
801
+ "epoch": 0.9297820823244553,
802
+ "grad_norm": 0.3932758867740631,
803
+ "learning_rate": 1.820868340817874e-05,
804
+ "loss": 1.6428,
805
+ "step": 108
806
+ },
807
+ {
808
+ "epoch": 0.9383911756793113,
809
+ "grad_norm": 0.3379191756248474,
810
+ "learning_rate": 1.758386744638546e-05,
811
+ "loss": 1.3678,
812
+ "step": 109
813
+ },
814
+ {
815
+ "epoch": 0.9470002690341673,
816
+ "grad_norm": 0.5376018285751343,
817
+ "learning_rate": 1.698158956376318e-05,
818
+ "loss": 1.6057,
819
+ "step": 110
820
+ },
821
+ {
822
+ "epoch": 0.9556093623890234,
823
+ "grad_norm": 0.6705049872398376,
824
+ "learning_rate": 1.6402212549987762e-05,
825
+ "loss": 1.5497,
826
+ "step": 111
827
+ },
828
+ {
829
+ "epoch": 0.9642184557438794,
830
+ "grad_norm": 1.5708343982696533,
831
+ "learning_rate": 1.584608540010799e-05,
832
+ "loss": 1.4589,
833
+ "step": 112
834
+ },
835
+ {
836
+ "epoch": 0.9728275490987356,
837
+ "grad_norm": 2.8929443359375,
838
+ "learning_rate": 1.531354310432403e-05,
839
+ "loss": 1.5784,
840
+ "step": 113
841
+ },
842
+ {
843
+ "epoch": 0.9814366424535916,
844
+ "grad_norm": 0.3657113313674927,
845
+ "learning_rate": 1.4804906446201816e-05,
846
+ "loss": 1.3912,
847
+ "step": 114
848
+ },
849
+ {
850
+ "epoch": 0.9900457358084477,
851
+ "grad_norm": 0.3794941306114197,
852
+ "learning_rate": 1.4320481809445051e-05,
853
+ "loss": 1.5847,
854
+ "step": 115
855
+ },
856
+ {
857
+ "epoch": 0.9986548291633037,
858
+ "grad_norm": 0.7362991571426392,
859
+ "learning_rate": 1.386056099334112e-05,
860
+ "loss": 1.399,
861
+ "step": 116
862
+ },
863
+ {
864
+ "epoch": 1.0072639225181599,
865
+ "grad_norm": 0.8029009103775024,
866
+ "learning_rate": 1.3425421036992098e-05,
867
+ "loss": 1.2215,
868
+ "step": 117
869
+ },
870
+ {
871
+ "epoch": 1.0158730158730158,
872
+ "grad_norm": 0.5080808997154236,
873
+ "learning_rate": 1.3015324052436753e-05,
874
+ "loss": 1.2015,
875
+ "step": 118
876
+ },
877
+ {
878
+ "epoch": 1.024482109227872,
879
+ "grad_norm": 0.44496291875839233,
880
+ "learning_rate": 1.2630517066764069e-05,
881
+ "loss": 1.2138,
882
+ "step": 119
883
+ },
884
+ {
885
+ "epoch": 1.033091202582728,
886
+ "grad_norm": 0.4348479211330414,
887
+ "learning_rate": 1.227123187331335e-05,
888
+ "loss": 1.2767,
889
+ "step": 120
890
+ },
891
+ {
892
+ "epoch": 1.041700295937584,
893
+ "grad_norm": 0.37992164492607117,
894
+ "learning_rate": 1.1937684892050604e-05,
895
+ "loss": 1.5242,
896
+ "step": 121
897
+ },
898
+ {
899
+ "epoch": 1.0503093892924402,
900
+ "grad_norm": 0.32971861958503723,
901
+ "learning_rate": 1.1630077039205209e-05,
902
+ "loss": 1.5498,
903
+ "step": 122
904
+ },
905
+ {
906
+ "epoch": 1.0589184826472962,
907
+ "grad_norm": 0.5224172472953796,
908
+ "learning_rate": 1.1348593606245522e-05,
909
+ "loss": 1.6984,
910
+ "step": 123
911
+ },
912
+ {
913
+ "epoch": 1.0675275760021523,
914
+ "grad_norm": 0.43070971965789795,
915
+ "learning_rate": 1.109340414826622e-05,
916
+ "loss": 1.5932,
917
+ "step": 124
918
+ },
919
+ {
920
+ "epoch": 1.0761366693570082,
921
+ "grad_norm": 0.4774491786956787,
922
+ "learning_rate": 1.0864662381854632e-05,
923
+ "loss": 1.4308,
924
+ "step": 125
925
+ },
926
+ {
927
+ "epoch": 1.0761366693570082,
928
+ "eval_loss": 1.3410676717758179,
929
+ "eval_runtime": 1.2741,
930
+ "eval_samples_per_second": 39.242,
931
+ "eval_steps_per_second": 10.203,
932
+ "step": 125
933
+ },
934
+ {
935
+ "epoch": 1.0847457627118644,
936
+ "grad_norm": 0.5184400677680969,
937
+ "learning_rate": 1.0662506092497646e-05,
938
+ "loss": 1.4641,
939
+ "step": 126
940
+ },
941
+ {
942
+ "epoch": 1.0933548560667206,
943
+ "grad_norm": 0.5525245666503906,
944
+ "learning_rate": 1.0487057051584856e-05,
945
+ "loss": 1.5545,
946
+ "step": 127
947
+ },
948
+ {
949
+ "epoch": 1.1019639494215765,
950
+ "grad_norm": 1.609927773475647,
951
+ "learning_rate": 1.0338420943058053e-05,
952
+ "loss": 1.3439,
953
+ "step": 128
954
+ },
955
+ {
956
+ "epoch": 1.1105730427764327,
957
+ "grad_norm": 2.2938551902770996,
958
+ "learning_rate": 1.0216687299751144e-05,
959
+ "loss": 1.4817,
960
+ "step": 129
961
+ },
962
+ {
963
+ "epoch": 1.1191821361312886,
964
+ "grad_norm": 0.45292142033576965,
965
+ "learning_rate": 1.0121929449458941e-05,
966
+ "loss": 1.1242,
967
+ "step": 130
968
+ },
969
+ {
970
+ "epoch": 1.1277912294861447,
971
+ "grad_norm": 0.4423352777957916,
972
+ "learning_rate": 1.0054204470767243e-05,
973
+ "loss": 1.1672,
974
+ "step": 131
975
+ },
976
+ {
977
+ "epoch": 1.136400322841001,
978
+ "grad_norm": 0.33851832151412964,
979
+ "learning_rate": 1.0013553158670811e-05,
980
+ "loss": 1.2433,
981
+ "step": 132
982
+ },
983
+ {
984
+ "epoch": 1.1450094161958568,
985
+ "grad_norm": 0.3434777855873108,
986
+ "learning_rate": 1e-05,
987
+ "loss": 1.4094,
988
+ "step": 133
989
  }
990
  ],
991
  "logging_steps": 1,
 
1009
  "should_evaluate": false,
1010
  "should_log": false,
1011
  "should_save": true,
1012
+ "should_training_stop": true
1013
  },
1014
  "attributes": {}
1015
  }
1016
  },
1017
+ "total_flos": 2.8256510115053568e+17,
1018
  "train_batch_size": 1,
1019
  "trial_name": null,
1020
  "trial_params": null