besimray commited on
Commit
e217616
·
verified ·
1 Parent(s): edba1df

Training in progress, step 90, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:819d34c3c14db785e0dd28311c01d0c8f6dcd03cdecfe856b6d00ab306a6259e
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c32e60c57250c51934f542ff2e75446d1c4cf392e777594b077e17ea6ad239
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf9b6d9e891724bb31db21c88f9512591eca8a4a6ad4e8699987a16aa8a16498
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78cd83f855bd80bf0c0a895685a76b346681eb6199b8673ce02a1764809414f1
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77d6b80ff98d1964921d012a443cb87b248a1f2b5da6296fdc6b0c8c5f518f22
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820fcfa3250b03ccb2011d1c28382f78231a7cd53b56f3e8494ea4c7f9ea8506
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7803647d52fbd7429a283dc695ba7cf653ff890c06d5c50f67d0a09610438889
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23b27ab0ae2b9af6f3d4c84cdaf8b0fc887acf71f8f726b270a3bce2845000a9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.038910865783691,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
- "epoch": 0.0036157374974576844,
5
  "eval_steps": 5,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -703,6 +703,92 @@
703
  "eval_samples_per_second": 52.822,
704
  "eval_steps_per_second": 26.414,
705
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
706
  }
707
  ],
708
  "logging_steps": 1,
@@ -731,7 +817,7 @@
731
  "attributes": {}
732
  }
733
  },
734
- "total_flos": 840538521600.0,
735
  "train_batch_size": 2,
736
  "trial_name": null,
737
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.034589767456055,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-90",
4
+ "epoch": 0.004067704684639895,
5
  "eval_steps": 5,
6
+ "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
703
  "eval_samples_per_second": 52.822,
704
  "eval_steps_per_second": 26.414,
705
  "step": 80
706
+ },
707
+ {
708
+ "epoch": 0.0036609342161759055,
709
+ "grad_norm": 0.7481367588043213,
710
+ "learning_rate": 0.0001898168561213419,
711
+ "loss": 44.2182,
712
+ "step": 81
713
+ },
714
+ {
715
+ "epoch": 0.0037061309348941267,
716
+ "grad_norm": 0.628414511680603,
717
+ "learning_rate": 0.0001895331334332753,
718
+ "loss": 44.1519,
719
+ "step": 82
720
+ },
721
+ {
722
+ "epoch": 0.003751327653612348,
723
+ "grad_norm": 0.658549964427948,
724
+ "learning_rate": 0.0001892457303887706,
725
+ "loss": 44.1364,
726
+ "step": 83
727
+ },
728
+ {
729
+ "epoch": 0.0037965243723305686,
730
+ "grad_norm": 0.5245007276535034,
731
+ "learning_rate": 0.0001889546588018412,
732
+ "loss": 44.1079,
733
+ "step": 84
734
+ },
735
+ {
736
+ "epoch": 0.00384172109104879,
737
+ "grad_norm": 0.5555324554443359,
738
+ "learning_rate": 0.00018865993063730004,
739
+ "loss": 44.1445,
740
+ "step": 85
741
+ },
742
+ {
743
+ "epoch": 0.00384172109104879,
744
+ "eval_loss": 11.036417007446289,
745
+ "eval_runtime": 176.131,
746
+ "eval_samples_per_second": 52.898,
747
+ "eval_steps_per_second": 26.452,
748
+ "step": 85
749
+ },
750
+ {
751
+ "epoch": 0.003886917809767011,
752
+ "grad_norm": 0.43622660636901855,
753
+ "learning_rate": 0.00018836155801026753,
754
+ "loss": 44.1515,
755
+ "step": 86
756
+ },
757
+ {
758
+ "epoch": 0.003932114528485232,
759
+ "grad_norm": 0.578544020652771,
760
+ "learning_rate": 0.0001880595531856738,
761
+ "loss": 44.0766,
762
+ "step": 87
763
+ },
764
+ {
765
+ "epoch": 0.003977311247203453,
766
+ "grad_norm": 0.598685085773468,
767
+ "learning_rate": 0.00018775392857775432,
768
+ "loss": 44.1756,
769
+ "step": 88
770
+ },
771
+ {
772
+ "epoch": 0.004022507965921674,
773
+ "grad_norm": 0.5733134150505066,
774
+ "learning_rate": 0.00018744469674953956,
775
+ "loss": 44.1756,
776
+ "step": 89
777
+ },
778
+ {
779
+ "epoch": 0.004067704684639895,
780
+ "grad_norm": 0.5177151560783386,
781
+ "learning_rate": 0.00018713187041233896,
782
+ "loss": 44.173,
783
+ "step": 90
784
+ },
785
+ {
786
+ "epoch": 0.004067704684639895,
787
+ "eval_loss": 11.034589767456055,
788
+ "eval_runtime": 176.3402,
789
+ "eval_samples_per_second": 52.835,
790
+ "eval_steps_per_second": 26.421,
791
+ "step": 90
792
  }
793
  ],
794
  "logging_steps": 1,
 
817
  "attributes": {}
818
  }
819
  },
820
+ "total_flos": 945605836800.0,
821
  "train_batch_size": 2,
822
  "trial_name": null,
823
  "trial_params": null