neuralwonderland commited on
Commit
61d9d2b
·
verified ·
1 Parent(s): 5d6354c

Training in progress, step 900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b521c111bca85508520ccdb324c0b45a8eaa1aa0f18ff1da11a37e8ee1bbc0b6
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ffaf65277dedfeabee1628efce03e32368cea5326586e5329f36d47cf87bb02
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c96ec7482a6bb1114547a8a9e64f11536f8ca54961b0518f980b4d31ab26ece2
3
  size 139313554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1df32252b9ad9f873764ba736101dc8657b24faf48baafe030b4676270be0b20
3
  size 139313554
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4df5a37ab7cfb8a96608f6dd12ed85def2455fd70d691a55a8b5ef56a2a75631
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e365a4d04da08f0e566123029ce881befe006108afd22526e5c54d5326561a75
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d9acb300ee3481a499d71232bda2cc6822a3d265890998459ac4ab1f2a7a58f
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19edf8f1957a4ee4a7ad7c15c95d0e40ddbc2262c2a8b3d331d09ae113e648a2
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.9952675700187683,
3
- "best_model_checkpoint": "./output/checkpoint-750",
4
- "epoch": 0.09323719542516161,
5
  "eval_steps": 150,
6
- "global_step": 750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -572,6 +572,119 @@
572
  "eval_samples_per_second": 8.986,
573
  "eval_steps_per_second": 8.986,
574
  "step": 750
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
575
  }
576
  ],
577
  "logging_steps": 10,
@@ -591,7 +704,7 @@
591
  "attributes": {}
592
  }
593
  },
594
- "total_flos": 2.600106617969664e+16,
595
  "train_batch_size": 16,
596
  "trial_name": null,
597
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9783537983894348,
3
+ "best_model_checkpoint": "./output/checkpoint-900",
4
+ "epoch": 0.11188463451019394,
5
  "eval_steps": 150,
6
+ "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
572
  "eval_samples_per_second": 8.986,
573
  "eval_steps_per_second": 8.986,
574
  "step": 750
575
+ },
576
+ {
577
+ "epoch": 0.09448035803083044,
578
+ "grad_norm": 1.6342475414276123,
579
+ "learning_rate": 0.00011948741333828481,
580
+ "loss": 0.7924,
581
+ "step": 760
582
+ },
583
+ {
584
+ "epoch": 0.09572352063649925,
585
+ "grad_norm": 1.773032307624817,
586
+ "learning_rate": 0.00011932169520872344,
587
+ "loss": 0.7737,
588
+ "step": 770
589
+ },
590
+ {
591
+ "epoch": 0.09696668324216808,
592
+ "grad_norm": 1.3042093515396118,
593
+ "learning_rate": 0.00011915364136182738,
594
+ "loss": 0.7554,
595
+ "step": 780
596
+ },
597
+ {
598
+ "epoch": 0.0982098458478369,
599
+ "grad_norm": 1.9383268356323242,
600
+ "learning_rate": 0.0001189832587056321,
601
+ "loss": 0.793,
602
+ "step": 790
603
+ },
604
+ {
605
+ "epoch": 0.09945300845350571,
606
+ "grad_norm": 0.9794479608535767,
607
+ "learning_rate": 0.00011881055424390119,
608
+ "loss": 0.7059,
609
+ "step": 800
610
+ },
611
+ {
612
+ "epoch": 0.10069617105917454,
613
+ "grad_norm": 1.8000845909118652,
614
+ "learning_rate": 0.00011863553507583869,
615
+ "loss": 0.7037,
616
+ "step": 810
617
+ },
618
+ {
619
+ "epoch": 0.10193933366484337,
620
+ "grad_norm": 1.6130144596099854,
621
+ "learning_rate": 0.00011845820839579708,
622
+ "loss": 0.9073,
623
+ "step": 820
624
+ },
625
+ {
626
+ "epoch": 0.10318249627051218,
627
+ "grad_norm": 1.7219104766845703,
628
+ "learning_rate": 0.00011827858149298162,
629
+ "loss": 0.7089,
630
+ "step": 830
631
+ },
632
+ {
633
+ "epoch": 0.10442565887618101,
634
+ "grad_norm": 1.8444559574127197,
635
+ "learning_rate": 0.00011809666175115075,
636
+ "loss": 0.6576,
637
+ "step": 840
638
+ },
639
+ {
640
+ "epoch": 0.10566882148184982,
641
+ "grad_norm": 2.564314126968384,
642
+ "learning_rate": 0.00011791245664831251,
643
+ "loss": 0.7686,
644
+ "step": 850
645
+ },
646
+ {
647
+ "epoch": 0.10691198408751865,
648
+ "grad_norm": 1.1209467649459839,
649
+ "learning_rate": 0.0001177259737564172,
650
+ "loss": 0.7148,
651
+ "step": 860
652
+ },
653
+ {
654
+ "epoch": 0.10815514669318747,
655
+ "grad_norm": 1.5384615659713745,
656
+ "learning_rate": 0.00011753722074104613,
657
+ "loss": 0.7689,
658
+ "step": 870
659
+ },
660
+ {
661
+ "epoch": 0.10939830929885629,
662
+ "grad_norm": 1.2957892417907715,
663
+ "learning_rate": 0.00011734620536109644,
664
+ "loss": 0.7372,
665
+ "step": 880
666
+ },
667
+ {
668
+ "epoch": 0.11064147190452511,
669
+ "grad_norm": 31.243797302246094,
670
+ "learning_rate": 0.00011715293546846223,
671
+ "loss": 0.725,
672
+ "step": 890
673
+ },
674
+ {
675
+ "epoch": 0.11188463451019394,
676
+ "grad_norm": 1.338306188583374,
677
+ "learning_rate": 0.00011695741900771184,
678
+ "loss": 0.6768,
679
+ "step": 900
680
+ },
681
+ {
682
+ "epoch": 0.11188463451019394,
683
+ "eval_loss": 0.9783537983894348,
684
+ "eval_runtime": 57.0411,
685
+ "eval_samples_per_second": 8.766,
686
+ "eval_steps_per_second": 8.766,
687
+ "step": 900
688
  }
689
  ],
690
  "logging_steps": 10,
 
704
  "attributes": {}
705
  }
706
  },
707
+ "total_flos": 3.123967906031616e+16,
708
  "train_batch_size": 16,
709
  "trial_name": null,
710
  "trial_params": null