besimray commited on
Commit
fb111b4
·
verified ·
1 Parent(s): 270b649

Training in progress, step 80, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55b3090f899b46d492f32633e4515d9d6ae108238f6d47c3e88688671dccb48c
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:819d34c3c14db785e0dd28311c01d0c8f6dcd03cdecfe856b6d00ab306a6259e
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2046aeb674d75035e95a8d14adfeee0462127d510df5daeba02a5d1eab404340
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf9b6d9e891724bb31db21c88f9512591eca8a4a6ad4e8699987a16aa8a16498
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:670916a94714b472f443269033bde75c99b199370216d2fe1fb8611619905402
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77d6b80ff98d1964921d012a443cb87b248a1f2b5da6296fdc6b0c8c5f518f22
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1074437c4c638775a20a86de2e689d1a14d0ff4e4137df2ddeb45e94a776caef
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7803647d52fbd7429a283dc695ba7cf653ff890c06d5c50f67d0a09610438889
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.046669960021973,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-70",
4
- "epoch": 0.003163770310275474,
5
  "eval_steps": 5,
6
- "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -617,6 +617,92 @@
617
  "eval_samples_per_second": 52.828,
618
  "eval_steps_per_second": 26.417,
619
  "step": 70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
620
  }
621
  ],
622
  "logging_steps": 1,
@@ -645,7 +731,7 @@
645
  "attributes": {}
646
  }
647
  },
648
- "total_flos": 735471206400.0,
649
  "train_batch_size": 2,
650
  "trial_name": null,
651
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.038910865783691,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
+ "epoch": 0.0036157374974576844,
5
  "eval_steps": 5,
6
+ "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
617
  "eval_samples_per_second": 52.828,
618
  "eval_steps_per_second": 26.417,
619
  "step": 70
620
+ },
621
+ {
622
+ "epoch": 0.003208967028993695,
623
+ "grad_norm": 0.7522275447845459,
624
+ "learning_rate": 0.00019244917309000817,
625
+ "loss": 44.2246,
626
+ "step": 71
627
+ },
628
+ {
629
+ "epoch": 0.0032541637477119163,
630
+ "grad_norm": 0.7135974168777466,
631
+ "learning_rate": 0.00019220287022200707,
632
+ "loss": 44.2111,
633
+ "step": 72
634
+ },
635
+ {
636
+ "epoch": 0.003299360466430137,
637
+ "grad_norm": 0.7275662422180176,
638
+ "learning_rate": 0.0001919527772551451,
639
+ "loss": 44.1464,
640
+ "step": 73
641
+ },
642
+ {
643
+ "epoch": 0.003344557185148358,
644
+ "grad_norm": 0.6742229461669922,
645
+ "learning_rate": 0.00019169890446976454,
646
+ "loss": 44.2105,
647
+ "step": 74
648
+ },
649
+ {
650
+ "epoch": 0.0033897539038665794,
651
+ "grad_norm": 0.6085646152496338,
652
+ "learning_rate": 0.00019144126230158127,
653
+ "loss": 44.0926,
654
+ "step": 75
655
+ },
656
+ {
657
+ "epoch": 0.0033897539038665794,
658
+ "eval_loss": 11.042237281799316,
659
+ "eval_runtime": 176.114,
660
+ "eval_samples_per_second": 52.903,
661
+ "eval_steps_per_second": 26.454,
662
+ "step": 75
663
+ },
664
+ {
665
+ "epoch": 0.0034349506225848005,
666
+ "grad_norm": 0.7245734333992004,
667
+ "learning_rate": 0.0001911798613412557,
668
+ "loss": 44.2154,
669
+ "step": 76
670
+ },
671
+ {
672
+ "epoch": 0.0034801473413030213,
673
+ "grad_norm": 0.7311281561851501,
674
+ "learning_rate": 0.0001909147123339575,
675
+ "loss": 44.1687,
676
+ "step": 77
677
+ },
678
+ {
679
+ "epoch": 0.0035253440600212425,
680
+ "grad_norm": 0.6399495601654053,
681
+ "learning_rate": 0.0001906458261789238,
682
+ "loss": 44.1596,
683
+ "step": 78
684
+ },
685
+ {
686
+ "epoch": 0.0035705407787394636,
687
+ "grad_norm": 0.5650178790092468,
688
+ "learning_rate": 0.00019037321392901136,
689
+ "loss": 44.1466,
690
+ "step": 79
691
+ },
692
+ {
693
+ "epoch": 0.0036157374974576844,
694
+ "grad_norm": 0.6039579510688782,
695
+ "learning_rate": 0.0001900968867902419,
696
+ "loss": 44.1955,
697
+ "step": 80
698
+ },
699
+ {
700
+ "epoch": 0.0036157374974576844,
701
+ "eval_loss": 11.038910865783691,
702
+ "eval_runtime": 176.3853,
703
+ "eval_samples_per_second": 52.822,
704
+ "eval_steps_per_second": 26.414,
705
+ "step": 80
706
  }
707
  ],
708
  "logging_steps": 1,
 
731
  "attributes": {}
732
  }
733
  },
734
+ "total_flos": 840538521600.0,
735
  "train_batch_size": 2,
736
  "trial_name": null,
737
  "trial_params": null