yuweiiizz commited on
Commit
ab3dc7b
·
verified ·
1 Parent(s): da1d747

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:703a6ac7121d730d9c0e42b1a06b45963892be5944000d1f0185ce7044bc3c9e
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13f8f2b474fa2b6284d59b3ee18ee12fa7507a2433ee4f0ba97cf2ecdc3c35c1
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddfbaef04fe7f922a8f6475913402834b772a6863710273bbd173e2839db0fd8
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0ef1069b97fed6b2293215f8c989c7a0784d96706a1ee18b58bab201b4fc6d0
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1877997138fd6c00a4ddcc0ec7e9c019b9f4ccb7a15031d5cedb28ccb7a2c96c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27cdf2edd39f57a70573d9ff0027b58248741fcc4a77b968063bd6a9c61fd866
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29fb9e79fa30fbb431af919246a50a3118e2599b8f861d1f7ece53767b613869
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17af6a83bb1cb19cd0edadcdd8667775ae13ecbc6438dd8bbc5fbd929a74874b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 51.77491557370612,
3
  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-2000",
4
- "epoch": 0.8,
5
  "eval_steps": 1000,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -585,6 +585,295 @@
585
  "eval_samples_per_second": 2.264,
586
  "eval_steps_per_second": 0.283,
587
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
  }
589
  ],
590
  "logging_steps": 25,
@@ -592,7 +881,7 @@
592
  "num_input_tokens_seen": 0,
593
  "num_train_epochs": 2,
594
  "save_steps": 1000,
595
- "total_flos": 9.23473281024e+18,
596
  "train_batch_size": 8,
597
  "trial_name": null,
598
  "trial_params": null
 
1
  {
2
  "best_metric": 51.77491557370612,
3
  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-2000",
4
+ "epoch": 1.2,
5
  "eval_steps": 1000,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
585
  "eval_samples_per_second": 2.264,
586
  "eval_steps_per_second": 0.283,
587
  "step": 2000
588
+ },
589
+ {
590
+ "epoch": 0.81,
591
+ "grad_norm": 15.01921272277832,
592
+ "learning_rate": 6.6111111111111115e-06,
593
+ "loss": 1.0203,
594
+ "step": 2025
595
+ },
596
+ {
597
+ "epoch": 0.82,
598
+ "grad_norm": 12.810806274414062,
599
+ "learning_rate": 6.555555555555556e-06,
600
+ "loss": 1.0426,
601
+ "step": 2050
602
+ },
603
+ {
604
+ "epoch": 0.83,
605
+ "grad_norm": 12.653311729431152,
606
+ "learning_rate": 6.5000000000000004e-06,
607
+ "loss": 1.0451,
608
+ "step": 2075
609
+ },
610
+ {
611
+ "epoch": 0.84,
612
+ "grad_norm": 14.353548049926758,
613
+ "learning_rate": 6.444444444444445e-06,
614
+ "loss": 1.0853,
615
+ "step": 2100
616
+ },
617
+ {
618
+ "epoch": 0.85,
619
+ "grad_norm": 12.075662612915039,
620
+ "learning_rate": 6.3888888888888885e-06,
621
+ "loss": 1.0124,
622
+ "step": 2125
623
+ },
624
+ {
625
+ "epoch": 0.86,
626
+ "grad_norm": 14.976198196411133,
627
+ "learning_rate": 6.333333333333333e-06,
628
+ "loss": 1.0461,
629
+ "step": 2150
630
+ },
631
+ {
632
+ "epoch": 0.87,
633
+ "grad_norm": 12.702341079711914,
634
+ "learning_rate": 6.277777777777778e-06,
635
+ "loss": 1.0319,
636
+ "step": 2175
637
+ },
638
+ {
639
+ "epoch": 0.88,
640
+ "grad_norm": 14.232624053955078,
641
+ "learning_rate": 6.222222222222223e-06,
642
+ "loss": 1.0174,
643
+ "step": 2200
644
+ },
645
+ {
646
+ "epoch": 0.89,
647
+ "grad_norm": 12.84585189819336,
648
+ "learning_rate": 6.166666666666667e-06,
649
+ "loss": 1.0275,
650
+ "step": 2225
651
+ },
652
+ {
653
+ "epoch": 0.9,
654
+ "grad_norm": 17.438783645629883,
655
+ "learning_rate": 6.111111111111112e-06,
656
+ "loss": 0.9808,
657
+ "step": 2250
658
+ },
659
+ {
660
+ "epoch": 0.91,
661
+ "grad_norm": 13.450716018676758,
662
+ "learning_rate": 6.055555555555555e-06,
663
+ "loss": 1.0239,
664
+ "step": 2275
665
+ },
666
+ {
667
+ "epoch": 0.92,
668
+ "grad_norm": 14.077709197998047,
669
+ "learning_rate": 6e-06,
670
+ "loss": 1.0151,
671
+ "step": 2300
672
+ },
673
+ {
674
+ "epoch": 0.93,
675
+ "grad_norm": 10.755902290344238,
676
+ "learning_rate": 5.944444444444445e-06,
677
+ "loss": 0.9994,
678
+ "step": 2325
679
+ },
680
+ {
681
+ "epoch": 0.94,
682
+ "grad_norm": 13.025961875915527,
683
+ "learning_rate": 5.88888888888889e-06,
684
+ "loss": 1.0136,
685
+ "step": 2350
686
+ },
687
+ {
688
+ "epoch": 0.95,
689
+ "grad_norm": 13.124420166015625,
690
+ "learning_rate": 5.833333333333334e-06,
691
+ "loss": 0.9732,
692
+ "step": 2375
693
+ },
694
+ {
695
+ "epoch": 0.96,
696
+ "grad_norm": 13.470574378967285,
697
+ "learning_rate": 5.777777777777778e-06,
698
+ "loss": 0.9919,
699
+ "step": 2400
700
+ },
701
+ {
702
+ "epoch": 0.97,
703
+ "grad_norm": 13.073882102966309,
704
+ "learning_rate": 5.722222222222222e-06,
705
+ "loss": 0.9791,
706
+ "step": 2425
707
+ },
708
+ {
709
+ "epoch": 0.98,
710
+ "grad_norm": 12.156904220581055,
711
+ "learning_rate": 5.666666666666667e-06,
712
+ "loss": 0.9761,
713
+ "step": 2450
714
+ },
715
+ {
716
+ "epoch": 0.99,
717
+ "grad_norm": 14.560174942016602,
718
+ "learning_rate": 5.611111111111112e-06,
719
+ "loss": 0.9478,
720
+ "step": 2475
721
+ },
722
+ {
723
+ "epoch": 1.0,
724
+ "grad_norm": 13.365988731384277,
725
+ "learning_rate": 5.555555555555557e-06,
726
+ "loss": 0.9439,
727
+ "step": 2500
728
+ },
729
+ {
730
+ "epoch": 1.01,
731
+ "grad_norm": 11.116703987121582,
732
+ "learning_rate": 5.500000000000001e-06,
733
+ "loss": 0.8144,
734
+ "step": 2525
735
+ },
736
+ {
737
+ "epoch": 1.02,
738
+ "grad_norm": 11.22291088104248,
739
+ "learning_rate": 5.444444444444445e-06,
740
+ "loss": 0.7626,
741
+ "step": 2550
742
+ },
743
+ {
744
+ "epoch": 1.03,
745
+ "grad_norm": 10.895951271057129,
746
+ "learning_rate": 5.388888888888889e-06,
747
+ "loss": 0.7443,
748
+ "step": 2575
749
+ },
750
+ {
751
+ "epoch": 1.04,
752
+ "grad_norm": 10.497587203979492,
753
+ "learning_rate": 5.333333333333334e-06,
754
+ "loss": 0.7348,
755
+ "step": 2600
756
+ },
757
+ {
758
+ "epoch": 1.05,
759
+ "grad_norm": 10.018071174621582,
760
+ "learning_rate": 5.2777777777777785e-06,
761
+ "loss": 0.7096,
762
+ "step": 2625
763
+ },
764
+ {
765
+ "epoch": 1.06,
766
+ "grad_norm": 11.595548629760742,
767
+ "learning_rate": 5.2222222222222226e-06,
768
+ "loss": 0.7527,
769
+ "step": 2650
770
+ },
771
+ {
772
+ "epoch": 1.07,
773
+ "grad_norm": 9.90001106262207,
774
+ "learning_rate": 5.1666666666666675e-06,
775
+ "loss": 0.7502,
776
+ "step": 2675
777
+ },
778
+ {
779
+ "epoch": 1.08,
780
+ "grad_norm": 14.49976921081543,
781
+ "learning_rate": 5.1111111111111115e-06,
782
+ "loss": 0.7253,
783
+ "step": 2700
784
+ },
785
+ {
786
+ "epoch": 1.09,
787
+ "grad_norm": 10.992387771606445,
788
+ "learning_rate": 5.0555555555555555e-06,
789
+ "loss": 0.759,
790
+ "step": 2725
791
+ },
792
+ {
793
+ "epoch": 1.1,
794
+ "grad_norm": 11.267956733703613,
795
+ "learning_rate": 5e-06,
796
+ "loss": 0.7174,
797
+ "step": 2750
798
+ },
799
+ {
800
+ "epoch": 1.11,
801
+ "grad_norm": 12.558463096618652,
802
+ "learning_rate": 4.944444444444445e-06,
803
+ "loss": 0.7936,
804
+ "step": 2775
805
+ },
806
+ {
807
+ "epoch": 1.12,
808
+ "grad_norm": 10.136383056640625,
809
+ "learning_rate": 4.888888888888889e-06,
810
+ "loss": 0.7573,
811
+ "step": 2800
812
+ },
813
+ {
814
+ "epoch": 1.13,
815
+ "grad_norm": 10.183345794677734,
816
+ "learning_rate": 4.833333333333333e-06,
817
+ "loss": 0.7704,
818
+ "step": 2825
819
+ },
820
+ {
821
+ "epoch": 1.1400000000000001,
822
+ "grad_norm": 10.488049507141113,
823
+ "learning_rate": 4.777777777777778e-06,
824
+ "loss": 0.7343,
825
+ "step": 2850
826
+ },
827
+ {
828
+ "epoch": 1.15,
829
+ "grad_norm": 12.607866287231445,
830
+ "learning_rate": 4.722222222222222e-06,
831
+ "loss": 0.7247,
832
+ "step": 2875
833
+ },
834
+ {
835
+ "epoch": 1.16,
836
+ "grad_norm": 10.342514991760254,
837
+ "learning_rate": 4.666666666666667e-06,
838
+ "loss": 0.7618,
839
+ "step": 2900
840
+ },
841
+ {
842
+ "epoch": 1.17,
843
+ "grad_norm": 10.273557662963867,
844
+ "learning_rate": 4.611111111111112e-06,
845
+ "loss": 0.805,
846
+ "step": 2925
847
+ },
848
+ {
849
+ "epoch": 1.18,
850
+ "grad_norm": 11.328307151794434,
851
+ "learning_rate": 4.555555555555556e-06,
852
+ "loss": 0.6998,
853
+ "step": 2950
854
+ },
855
+ {
856
+ "epoch": 1.19,
857
+ "grad_norm": 13.699926376342773,
858
+ "learning_rate": 4.5e-06,
859
+ "loss": 0.7924,
860
+ "step": 2975
861
+ },
862
+ {
863
+ "epoch": 1.2,
864
+ "grad_norm": 10.797248840332031,
865
+ "learning_rate": 4.444444444444444e-06,
866
+ "loss": 0.7837,
867
+ "step": 3000
868
+ },
869
+ {
870
+ "epoch": 1.2,
871
+ "eval_cer": 54.1977538679023,
872
+ "eval_loss": 0.9572013020515442,
873
+ "eval_runtime": 1756.4904,
874
+ "eval_samples_per_second": 2.241,
875
+ "eval_steps_per_second": 0.28,
876
+ "step": 3000
877
  }
878
  ],
879
  "logging_steps": 25,
 
881
  "num_input_tokens_seen": 0,
882
  "num_train_epochs": 2,
883
  "save_steps": 1000,
884
+ "total_flos": 1.385209921536e+19,
885
  "train_batch_size": 8,
886
  "trial_name": null,
887
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04e771610fc099e5b30e7cffca86636d05f6b8c26a942dacf865f2937b7d763e
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c3e5a033637a03b3c288e7f28b5f4eb2b2c22389eb467e77f64839f3948fe5f
3
  size 5176