Nexspear commited on
Commit
6c62154
·
verified ·
1 Parent(s): 5f4271b

Training in progress, step 272, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5f938f590c16a2da7e7003b5726decdca766113cb8c6bf1132392eec1032131
3
  size 161533192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3b18860fdd8bcf71debab5f74b6ead522adcbd9c9319c1fd8778303b021cf38
3
  size 161533192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:242a26a061ea5d980a98bce60edfbf0494218ef54aba0d781b0dca84f1a1c629
3
- size 82460660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8521efcd8a6f8eeefb342b5c7eddb1107b80c2459403feed9249188c27e0879d
3
+ size 82461044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:771fda137b95b0a53343c99caba02f0ddc4fde33ba4274f7584011ab1e463479
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac3a037aee2862ad5a8302e2a82cb10c0b9d1880fba19ac4994d90b3f85cd339
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbcef9424696e41c7961bd91f0570d39d59ef33af28ed19a0eb9e4f50ed1b09a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3520b6e9bfde48b403dd6f4096e526132e910f4d92bd802fb2e831d46f8ad41f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.08024275118004046,
5
  "eval_steps": 34,
6
- "global_step": 238,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -624,6 +624,91 @@
624
  "eval_samples_per_second": 14.626,
625
  "eval_steps_per_second": 1.83,
626
  "step": 238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627
  }
628
  ],
629
  "logging_steps": 3,
@@ -643,7 +728,7 @@
643
  "attributes": {}
644
  }
645
  },
646
- "total_flos": 3.3274241802357965e+17,
647
  "train_batch_size": 8,
648
  "trial_name": null,
649
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.09170600134861767,
5
  "eval_steps": 34,
6
+ "global_step": 272,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
624
  "eval_samples_per_second": 14.626,
625
  "eval_steps_per_second": 1.83,
626
  "step": 238
627
+ },
628
+ {
629
+ "epoch": 0.08091706001348618,
630
+ "grad_norm": 1.521083116531372,
631
+ "learning_rate": 1.8044563402088684e-05,
632
+ "loss": 0.4579,
633
+ "step": 240
634
+ },
635
+ {
636
+ "epoch": 0.08192852326365475,
637
+ "grad_norm": 1.1534286737442017,
638
+ "learning_rate": 1.746635141803761e-05,
639
+ "loss": 0.3893,
640
+ "step": 243
641
+ },
642
+ {
643
+ "epoch": 0.08293998651382332,
644
+ "grad_norm": 1.179457426071167,
645
+ "learning_rate": 1.6892538872607937e-05,
646
+ "loss": 0.428,
647
+ "step": 246
648
+ },
649
+ {
650
+ "epoch": 0.08395144976399191,
651
+ "grad_norm": 1.498482346534729,
652
+ "learning_rate": 1.6323460856167426e-05,
653
+ "loss": 0.414,
654
+ "step": 249
655
+ },
656
+ {
657
+ "epoch": 0.08496291301416048,
658
+ "grad_norm": 1.3838918209075928,
659
+ "learning_rate": 1.5759449694252226e-05,
660
+ "loss": 0.4113,
661
+ "step": 252
662
+ },
663
+ {
664
+ "epoch": 0.08597437626432906,
665
+ "grad_norm": 1.2871530055999756,
666
+ "learning_rate": 1.5200834753498128e-05,
667
+ "loss": 0.4945,
668
+ "step": 255
669
+ },
670
+ {
671
+ "epoch": 0.08698583951449763,
672
+ "grad_norm": 1.1573866605758667,
673
+ "learning_rate": 1.4647942249299707e-05,
674
+ "loss": 0.4448,
675
+ "step": 258
676
+ },
677
+ {
678
+ "epoch": 0.08799730276466622,
679
+ "grad_norm": 1.2284533977508545,
680
+ "learning_rate": 1.4101095055309746e-05,
681
+ "loss": 0.4248,
682
+ "step": 261
683
+ },
684
+ {
685
+ "epoch": 0.08900876601483479,
686
+ "grad_norm": 1.3865326642990112,
687
+ "learning_rate": 1.356061251489012e-05,
688
+ "loss": 0.5,
689
+ "step": 264
690
+ },
691
+ {
692
+ "epoch": 0.09002022926500337,
693
+ "grad_norm": 1.0498360395431519,
694
+ "learning_rate": 1.302681025462424e-05,
695
+ "loss": 0.3297,
696
+ "step": 267
697
+ },
698
+ {
699
+ "epoch": 0.09103169251517194,
700
+ "grad_norm": 1.1438897848129272,
701
+ "learning_rate": 1.2500000000000006e-05,
702
+ "loss": 0.4251,
703
+ "step": 270
704
+ },
705
+ {
706
+ "epoch": 0.09170600134861767,
707
+ "eval_loss": 0.409618616104126,
708
+ "eval_runtime": 341.4948,
709
+ "eval_samples_per_second": 14.627,
710
+ "eval_steps_per_second": 1.83,
711
+ "step": 272
712
  }
713
  ],
714
  "logging_steps": 3,
 
728
  "attributes": {}
729
  }
730
  },
731
+ "total_flos": 3.802770491698053e+17,
732
  "train_batch_size": 8,
733
  "trial_name": null,
734
  "trial_params": null