eeeebbb2 commited on
Commit
05ad2f1
·
verified ·
1 Parent(s): 8a55d0b

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44ac53318cef16db2c1f3ebf4ebf7492f6dc6fce86073fe04feffb8fb63b9642
3
  size 323014168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4343dd8d1a3cae2275c1b0775c85aafda99ba00210dc9cbfd6d2effd170b7b85
3
  size 323014168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c75c7dea273f524b3f5ffbc10fdbc12e406b3579c12503f7f4c8c4a1731f0da
3
  size 646253418
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd42158c3831bab1671ace88d8baad72e71cdec3f442f9320594be4bf179907c
3
  size 646253418
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7550234f3654f47ef268ddebed5f7ca0593527b2051f8a462946f6efc1354fba
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c172fb92d30a3997e4403c7053678be36d697a289f64f1d5bf73cdcda2e89547
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4674c1ecea3fb20b0e4d1bb402c7a39e33c2fa1f9bb9e7c0c9a279cf7173975a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eefced3cc3821b717603a74ca9084a0add144eee88dcae3a554a7a32cfb5b8c
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9c357a8462dc9b8384b2ba89f44ce3ff70acc445c57c40a1fb4112fd6c0dd94
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4f0a25f271ba57cea38506e22ec2fac7b5f3b817d44cfc3220b45decdd324f2
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90cefe94b05ef9a9611cf4d51c2632e86145d2186490af2725ad80cc0784fdf5
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62b20abbb823060076b1a3eb033cb16f6b5fc1c0d26dfa41bcc3cd89efd53e2e
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38b483de1bf1f4ff353f1ec4a8c9df2b2847b745cbf851617486bbe747b1c0c0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b88cf1ca29131bea6a1c678610dbfd2c6c90cc06115bb015c84ed8386046423a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.017774144187569618,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-75",
4
- "epoch": 0.3665241295051924,
5
  "eval_steps": 25,
6
- "global_step": 75,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -564,6 +564,189 @@
564
  "eval_samples_per_second": 15.49,
565
  "eval_steps_per_second": 4.027,
566
  "step": 75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  }
568
  ],
569
  "logging_steps": 1,
@@ -592,7 +775,7 @@
592
  "attributes": {}
593
  }
594
  },
595
- "total_flos": 8.436086715973632e+17,
596
  "train_batch_size": 1,
597
  "trial_name": null,
598
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.012017174623906612,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.48869883934025654,
5
  "eval_steps": 25,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
564
  "eval_samples_per_second": 15.49,
565
  "eval_steps_per_second": 4.027,
566
  "step": 75
567
+ },
568
+ {
569
+ "epoch": 0.371411117898595,
570
+ "grad_norm": 0.3393837809562683,
571
+ "learning_rate": 5.696287243144013e-05,
572
+ "loss": 0.0564,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 0.37629810629199756,
577
+ "grad_norm": 0.19871211051940918,
578
+ "learning_rate": 5.598166982655526e-05,
579
+ "loss": 0.0472,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 0.3811850946854001,
584
+ "grad_norm": 0.20391109585762024,
585
+ "learning_rate": 5.500000000000001e-05,
586
+ "loss": 0.0435,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 0.3860720830788027,
591
+ "grad_norm": 0.18788817524909973,
592
+ "learning_rate": 5.4018330173444754e-05,
593
+ "loss": 0.0379,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 0.39095907147220527,
598
+ "grad_norm": 0.14316879212856293,
599
+ "learning_rate": 5.303712756855988e-05,
600
+ "loss": 0.0283,
601
+ "step": 80
602
+ },
603
+ {
604
+ "epoch": 0.39584605986560784,
605
+ "grad_norm": 0.17786382138729095,
606
+ "learning_rate": 5.205685918464356e-05,
607
+ "loss": 0.027,
608
+ "step": 81
609
+ },
610
+ {
611
+ "epoch": 0.4007330482590104,
612
+ "grad_norm": 0.21436955034732819,
613
+ "learning_rate": 5.107799157635538e-05,
614
+ "loss": 0.0309,
615
+ "step": 82
616
+ },
617
+ {
618
+ "epoch": 0.405620036652413,
619
+ "grad_norm": 0.16341635584831238,
620
+ "learning_rate": 5.0100990631661606e-05,
621
+ "loss": 0.0287,
622
+ "step": 83
623
+ },
624
+ {
625
+ "epoch": 0.41050702504581554,
626
+ "grad_norm": 0.19714505970478058,
627
+ "learning_rate": 4.912632135009769e-05,
628
+ "loss": 0.0267,
629
+ "step": 84
630
+ },
631
+ {
632
+ "epoch": 0.41539401343921806,
633
+ "grad_norm": 0.1616361290216446,
634
+ "learning_rate": 4.8154447621453744e-05,
635
+ "loss": 0.0217,
636
+ "step": 85
637
+ },
638
+ {
639
+ "epoch": 0.4202810018326206,
640
+ "grad_norm": 0.11600978672504425,
641
+ "learning_rate": 4.718583200498814e-05,
642
+ "loss": 0.0178,
643
+ "step": 86
644
+ },
645
+ {
646
+ "epoch": 0.4251679902260232,
647
+ "grad_norm": 0.10082818567752838,
648
+ "learning_rate": 4.6220935509274235e-05,
649
+ "loss": 0.0108,
650
+ "step": 87
651
+ },
652
+ {
653
+ "epoch": 0.43005497861942577,
654
+ "grad_norm": 0.21947574615478516,
655
+ "learning_rate": 4.526021737278538e-05,
656
+ "loss": 0.0339,
657
+ "step": 88
658
+ },
659
+ {
660
+ "epoch": 0.43494196701282833,
661
+ "grad_norm": 0.231426402926445,
662
+ "learning_rate": 4.430413484532222e-05,
663
+ "loss": 0.0479,
664
+ "step": 89
665
+ },
666
+ {
667
+ "epoch": 0.4398289554062309,
668
+ "grad_norm": 0.23115426301956177,
669
+ "learning_rate": 4.3353142970386564e-05,
670
+ "loss": 0.0427,
671
+ "step": 90
672
+ },
673
+ {
674
+ "epoch": 0.4447159437996335,
675
+ "grad_norm": 0.19273918867111206,
676
+ "learning_rate": 4.240769436860537e-05,
677
+ "loss": 0.0372,
678
+ "step": 91
679
+ },
680
+ {
681
+ "epoch": 0.44960293219303604,
682
+ "grad_norm": 0.17096419632434845,
683
+ "learning_rate": 4.146823902230772e-05,
684
+ "loss": 0.0293,
685
+ "step": 92
686
+ },
687
+ {
688
+ "epoch": 0.4544899205864386,
689
+ "grad_norm": 0.15599671006202698,
690
+ "learning_rate": 4.053522406135775e-05,
691
+ "loss": 0.0252,
692
+ "step": 93
693
+ },
694
+ {
695
+ "epoch": 0.4593769089798412,
696
+ "grad_norm": 0.14636379480361938,
697
+ "learning_rate": 3.960909355034491e-05,
698
+ "loss": 0.0289,
699
+ "step": 94
700
+ },
701
+ {
702
+ "epoch": 0.46426389737324375,
703
+ "grad_norm": 0.1349724531173706,
704
+ "learning_rate": 3.8690288277233435e-05,
705
+ "loss": 0.021,
706
+ "step": 95
707
+ },
708
+ {
709
+ "epoch": 0.4691508857666463,
710
+ "grad_norm": 0.18591056764125824,
711
+ "learning_rate": 3.777924554357096e-05,
712
+ "loss": 0.0206,
713
+ "step": 96
714
+ },
715
+ {
716
+ "epoch": 0.4740378741600489,
717
+ "grad_norm": 0.1168551817536354,
718
+ "learning_rate": 3.687639895635684e-05,
719
+ "loss": 0.017,
720
+ "step": 97
721
+ },
722
+ {
723
+ "epoch": 0.47892486255345146,
724
+ "grad_norm": 0.15066345036029816,
725
+ "learning_rate": 3.598217822166854e-05,
726
+ "loss": 0.0151,
727
+ "step": 98
728
+ },
729
+ {
730
+ "epoch": 0.483811850946854,
731
+ "grad_norm": 0.10822492092847824,
732
+ "learning_rate": 3.509700894014496e-05,
733
+ "loss": 0.0098,
734
+ "step": 99
735
+ },
736
+ {
737
+ "epoch": 0.48869883934025654,
738
+ "grad_norm": 0.09653550386428833,
739
+ "learning_rate": 3.422131240442349e-05,
740
+ "loss": 0.0064,
741
+ "step": 100
742
+ },
743
+ {
744
+ "epoch": 0.48869883934025654,
745
+ "eval_loss": 0.012017174623906612,
746
+ "eval_runtime": 3.2213,
747
+ "eval_samples_per_second": 15.522,
748
+ "eval_steps_per_second": 4.036,
749
+ "step": 100
750
  }
751
  ],
752
  "logging_steps": 1,
 
775
  "attributes": {}
776
  }
777
  },
778
+ "total_flos": 1.1248115621298176e+18,
779
  "train_batch_size": 1,
780
  "trial_name": null,
781
  "trial_params": null