neuralwonderland commited on
Commit
e53d17f
·
verified ·
1 Parent(s): f38e6d9

Training in progress, step 900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55dfe9fb7f3128ce900f85873ce9089dda937ea76e143b8837bdd97de02a6640
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fdfb940693f1c3aafa0189ec7111d3edb4fd9d110f0d9c61ff0cf3745124e39
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1083c00dc43995a6c81f87894859e2f58e4370c70e9000e40ef092dded244fe6
3
  size 1049049442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d37c4809c9aa1ad0c2fe5d47d3b851522e6f0210d0448ba7dd98ba91922ea764
3
  size 1049049442
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c95ad3b8a642363e32a822abf74fe540f31dd9513c96b7cd9ff2c634d99406f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:953f8e5d71766516f22562bd76f4dfbe7528aeb193338b993e0806f5c5da2d47
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01d7fef7d29480b2b6b9ce11dd77e0c699d943e8e5d91236651553a4ae0d3870
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f51293ad7eaca2d9fbcb656287d586dbce3d55186d836ffb75f98a82c46d3fdf
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.2344202995300293,
3
- "best_model_checkpoint": "./output/checkpoint-750",
4
- "epoch": 0.0335946248600224,
5
  "eval_steps": 150,
6
- "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -572,6 +572,119 @@
572
  "eval_samples_per_second": 9.684,
573
  "eval_steps_per_second": 9.684,
574
  "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
  }
576
  ],
577
  "logging_steps": 10,
@@ -591,7 +704,7 @@
591
  "attributes": {}
592
  }
593
  },
594
- "total_flos": 9.63620096827392e+16,
595
  "train_batch_size": 4,
596
  "trial_name": null,
597
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.2309150695800781,
3
+ "best_model_checkpoint": "./output/checkpoint-900",
4
+ "epoch": 0.040313549832026875,
5
  "eval_steps": 150,
6
+ "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
572
  "eval_samples_per_second": 9.684,
573
  "eval_steps_per_second": 9.684,
574
  "step": 750
575
+ },
576
+ {
577
+ "epoch": 0.03404255319148936,
578
+ "grad_norm": 4.141931056976318,
579
+ "learning_rate": 7.169244800297089e-06,
580
+ "loss": 1.2613,
581
+ "step": 760
582
+ },
583
+ {
584
+ "epoch": 0.03449048152295633,
585
+ "grad_norm": 4.191932201385498,
586
+ "learning_rate": 7.159301712523407e-06,
587
+ "loss": 1.1802,
588
+ "step": 770
589
+ },
590
+ {
591
+ "epoch": 0.03493840985442329,
592
+ "grad_norm": 4.759700775146484,
593
+ "learning_rate": 7.149218481709644e-06,
594
+ "loss": 1.0651,
595
+ "step": 780
596
+ },
597
+ {
598
+ "epoch": 0.03538633818589026,
599
+ "grad_norm": 3.969430923461914,
600
+ "learning_rate": 7.1389955223379266e-06,
601
+ "loss": 0.9129,
602
+ "step": 790
603
+ },
604
+ {
605
+ "epoch": 0.03583426651735722,
606
+ "grad_norm": 5.1956467628479,
607
+ "learning_rate": 7.128633254634072e-06,
608
+ "loss": 1.2688,
609
+ "step": 800
610
+ },
611
+ {
612
+ "epoch": 0.03628219484882419,
613
+ "grad_norm": 3.615705966949463,
614
+ "learning_rate": 7.118132104550322e-06,
615
+ "loss": 1.1092,
616
+ "step": 810
617
+ },
618
+ {
619
+ "epoch": 0.036730123180291153,
620
+ "grad_norm": 3.635277271270752,
621
+ "learning_rate": 7.107492503747826e-06,
622
+ "loss": 1.0265,
623
+ "step": 820
624
+ },
625
+ {
626
+ "epoch": 0.03717805151175812,
627
+ "grad_norm": 4.518077373504639,
628
+ "learning_rate": 7.096714889578898e-06,
629
+ "loss": 1.0817,
630
+ "step": 830
631
+ },
632
+ {
633
+ "epoch": 0.037625979843225084,
634
+ "grad_norm": 6.652565002441406,
635
+ "learning_rate": 7.085799705069046e-06,
636
+ "loss": 0.9709,
637
+ "step": 840
638
+ },
639
+ {
640
+ "epoch": 0.03807390817469205,
641
+ "grad_norm": 5.337361812591553,
642
+ "learning_rate": 7.0747473988987515e-06,
643
+ "loss": 1.0883,
644
+ "step": 850
645
+ },
646
+ {
647
+ "epoch": 0.038521836506159014,
648
+ "grad_norm": 5.067249774932861,
649
+ "learning_rate": 7.063558425385033e-06,
650
+ "loss": 1.08,
651
+ "step": 860
652
+ },
653
+ {
654
+ "epoch": 0.03896976483762598,
655
+ "grad_norm": 3.9859232902526855,
656
+ "learning_rate": 7.052233244462769e-06,
657
+ "loss": 1.0063,
658
+ "step": 870
659
+ },
660
+ {
661
+ "epoch": 0.039417693169092945,
662
+ "grad_norm": 5.297623634338379,
663
+ "learning_rate": 7.040772321665788e-06,
664
+ "loss": 0.9638,
665
+ "step": 880
666
+ },
667
+ {
668
+ "epoch": 0.03986562150055991,
669
+ "grad_norm": 6.088709354400635,
670
+ "learning_rate": 7.029176128107734e-06,
671
+ "loss": 1.2673,
672
+ "step": 890
673
+ },
674
+ {
675
+ "epoch": 0.040313549832026875,
676
+ "grad_norm": 7.997159957885742,
677
+ "learning_rate": 7.017445140462711e-06,
678
+ "loss": 0.9986,
679
+ "step": 900
680
+ },
681
+ {
682
+ "epoch": 0.040313549832026875,
683
+ "eval_loss": 1.2309150695800781,
684
+ "eval_runtime": 51.612,
685
+ "eval_samples_per_second": 9.688,
686
+ "eval_steps_per_second": 9.688,
687
+ "step": 900
688
  }
689
  ],
690
  "logging_steps": 10,
 
704
  "attributes": {}
705
  }
706
  },
707
+ "total_flos": 1.157850253080576e+17,
708
  "train_batch_size": 4,
709
  "trial_name": null,
710
  "trial_params": null