yuweiiizz commited on
Commit
01c34fc
·
verified ·
1 Parent(s): e70febd

Training in progress, step 4000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb4723176030943ebde72d95a5b430beb31e429b8f07ab63805e0a19400ce394
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b35281e57f38653ec64c5bc5be60610b3bddf528283e52b17f94b101f0da1e3
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27774a1c9324137368765ab64656e4cba5fe09dd0b9dba44b6e5f26d0df4f7af
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7dd151efea5d7680180e1189346e1df7c2dcb8e0a5fa46b8d2fff36f69f4ca9
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27cdf2edd39f57a70573d9ff0027b58248741fcc4a77b968063bd6a9c61fd866
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4a887c0679a244fb3578da62fc4230274c5d38de547b25494a50298ffcd112e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8286671647e935c888143a0e7f6f5af2bd3055ba02d389f94e0f162c96f1d80e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b52ab14c4ce453c2751c4f86099cc9afc680226047a17acc9c5c9e00c75b7d9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 51.82938759538251,
3
- "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-3000",
4
- "epoch": 1.2007204322593557,
5
  "eval_steps": 1000,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -874,6 +874,295 @@
874
  "eval_samples_per_second": 2.202,
875
  "eval_steps_per_second": 0.275,
876
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
877
  }
878
  ],
879
  "logging_steps": 25,
@@ -881,7 +1170,7 @@
881
  "num_input_tokens_seen": 0,
882
  "num_train_epochs": 2,
883
  "save_steps": 1000,
884
- "total_flos": 1.385007911755776e+19,
885
  "train_batch_size": 8,
886
  "trial_name": null,
887
  "trial_params": null
 
1
  {
2
+ "best_metric": 51.82156133828997,
3
+ "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-4000",
4
+ "epoch": 1.6009605763458075,
5
  "eval_steps": 1000,
6
+ "global_step": 4000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
874
  "eval_samples_per_second": 2.202,
875
  "eval_steps_per_second": 0.275,
876
  "step": 3000
877
+ },
878
+ {
879
+ "epoch": 1.2107264358615168,
880
+ "grad_norm": 10.6748628616333,
881
+ "learning_rate": 4.383896797153026e-06,
882
+ "loss": 0.7877,
883
+ "step": 3025
884
+ },
885
+ {
886
+ "epoch": 1.2207324394636783,
887
+ "grad_norm": 11.652464866638184,
888
+ "learning_rate": 4.328291814946619e-06,
889
+ "loss": 0.7794,
890
+ "step": 3050
891
+ },
892
+ {
893
+ "epoch": 1.2307384430658395,
894
+ "grad_norm": 11.998939514160156,
895
+ "learning_rate": 4.272686832740214e-06,
896
+ "loss": 0.8183,
897
+ "step": 3075
898
+ },
899
+ {
900
+ "epoch": 1.2407444466680009,
901
+ "grad_norm": 13.142699241638184,
902
+ "learning_rate": 4.217081850533808e-06,
903
+ "loss": 0.7834,
904
+ "step": 3100
905
+ },
906
+ {
907
+ "epoch": 1.250750450270162,
908
+ "grad_norm": 11.67496395111084,
909
+ "learning_rate": 4.161476868327402e-06,
910
+ "loss": 0.7594,
911
+ "step": 3125
912
+ },
913
+ {
914
+ "epoch": 1.2607564538723235,
915
+ "grad_norm": 11.429244995117188,
916
+ "learning_rate": 4.105871886120997e-06,
917
+ "loss": 0.7963,
918
+ "step": 3150
919
+ },
920
+ {
921
+ "epoch": 1.2707624574744847,
922
+ "grad_norm": 12.160046577453613,
923
+ "learning_rate": 4.0502669039145905e-06,
924
+ "loss": 0.7441,
925
+ "step": 3175
926
+ },
927
+ {
928
+ "epoch": 1.2807684610766459,
929
+ "grad_norm": 12.606410026550293,
930
+ "learning_rate": 3.994661921708186e-06,
931
+ "loss": 0.8733,
932
+ "step": 3200
933
+ },
934
+ {
935
+ "epoch": 1.2907744646788073,
936
+ "grad_norm": 14.263989448547363,
937
+ "learning_rate": 3.93905693950178e-06,
938
+ "loss": 0.7433,
939
+ "step": 3225
940
+ },
941
+ {
942
+ "epoch": 1.3007804682809687,
943
+ "grad_norm": 11.299212455749512,
944
+ "learning_rate": 3.883451957295374e-06,
945
+ "loss": 0.789,
946
+ "step": 3250
947
+ },
948
+ {
949
+ "epoch": 1.31078647188313,
950
+ "grad_norm": 12.999605178833008,
951
+ "learning_rate": 3.827846975088969e-06,
952
+ "loss": 0.7868,
953
+ "step": 3275
954
+ },
955
+ {
956
+ "epoch": 1.320792475485291,
957
+ "grad_norm": 10.38305950164795,
958
+ "learning_rate": 3.7722419928825625e-06,
959
+ "loss": 0.7433,
960
+ "step": 3300
961
+ },
962
+ {
963
+ "epoch": 1.3307984790874525,
964
+ "grad_norm": 13.05246639251709,
965
+ "learning_rate": 3.7166370106761567e-06,
966
+ "loss": 0.7547,
967
+ "step": 3325
968
+ },
969
+ {
970
+ "epoch": 1.3408044826896137,
971
+ "grad_norm": 11.195088386535645,
972
+ "learning_rate": 3.661032028469751e-06,
973
+ "loss": 0.8126,
974
+ "step": 3350
975
+ },
976
+ {
977
+ "epoch": 1.3508104862917751,
978
+ "grad_norm": 11.096240997314453,
979
+ "learning_rate": 3.605427046263346e-06,
980
+ "loss": 0.743,
981
+ "step": 3375
982
+ },
983
+ {
984
+ "epoch": 1.3608164898939363,
985
+ "grad_norm": 13.594226837158203,
986
+ "learning_rate": 3.5498220640569395e-06,
987
+ "loss": 0.7965,
988
+ "step": 3400
989
+ },
990
+ {
991
+ "epoch": 1.3708224934960977,
992
+ "grad_norm": 15.316413879394531,
993
+ "learning_rate": 3.4942170818505337e-06,
994
+ "loss": 0.7956,
995
+ "step": 3425
996
+ },
997
+ {
998
+ "epoch": 1.380828497098259,
999
+ "grad_norm": 12.78977108001709,
1000
+ "learning_rate": 3.4386120996441287e-06,
1001
+ "loss": 0.7693,
1002
+ "step": 3450
1003
+ },
1004
+ {
1005
+ "epoch": 1.3908345007004201,
1006
+ "grad_norm": 12.662712097167969,
1007
+ "learning_rate": 3.383007117437723e-06,
1008
+ "loss": 0.7768,
1009
+ "step": 3475
1010
+ },
1011
+ {
1012
+ "epoch": 1.4008405043025816,
1013
+ "grad_norm": 8.803949356079102,
1014
+ "learning_rate": 3.327402135231317e-06,
1015
+ "loss": 0.7622,
1016
+ "step": 3500
1017
+ },
1018
+ {
1019
+ "epoch": 1.410846507904743,
1020
+ "grad_norm": 13.736053466796875,
1021
+ "learning_rate": 3.2717971530249116e-06,
1022
+ "loss": 0.8152,
1023
+ "step": 3525
1024
+ },
1025
+ {
1026
+ "epoch": 1.4208525115069042,
1027
+ "grad_norm": 12.255024909973145,
1028
+ "learning_rate": 3.2161921708185057e-06,
1029
+ "loss": 0.8149,
1030
+ "step": 3550
1031
+ },
1032
+ {
1033
+ "epoch": 1.4308585151090654,
1034
+ "grad_norm": 12.75201416015625,
1035
+ "learning_rate": 3.1605871886121e-06,
1036
+ "loss": 0.7471,
1037
+ "step": 3575
1038
+ },
1039
+ {
1040
+ "epoch": 1.4408645187112268,
1041
+ "grad_norm": 13.30036449432373,
1042
+ "learning_rate": 3.1049822064056944e-06,
1043
+ "loss": 0.6892,
1044
+ "step": 3600
1045
+ },
1046
+ {
1047
+ "epoch": 1.450870522313388,
1048
+ "grad_norm": 10.946511268615723,
1049
+ "learning_rate": 3.0493772241992886e-06,
1050
+ "loss": 0.7916,
1051
+ "step": 3625
1052
+ },
1053
+ {
1054
+ "epoch": 1.4608765259155494,
1055
+ "grad_norm": 10.852522850036621,
1056
+ "learning_rate": 2.9937722419928827e-06,
1057
+ "loss": 0.7329,
1058
+ "step": 3650
1059
+ },
1060
+ {
1061
+ "epoch": 1.4708825295177106,
1062
+ "grad_norm": 11.466883659362793,
1063
+ "learning_rate": 2.938167259786477e-06,
1064
+ "loss": 0.7588,
1065
+ "step": 3675
1066
+ },
1067
+ {
1068
+ "epoch": 1.480888533119872,
1069
+ "grad_norm": 12.728093147277832,
1070
+ "learning_rate": 2.8825622775800715e-06,
1071
+ "loss": 0.7527,
1072
+ "step": 3700
1073
+ },
1074
+ {
1075
+ "epoch": 1.4908945367220332,
1076
+ "grad_norm": 9.343868255615234,
1077
+ "learning_rate": 2.8269572953736656e-06,
1078
+ "loss": 0.7451,
1079
+ "step": 3725
1080
+ },
1081
+ {
1082
+ "epoch": 1.5009005403241944,
1083
+ "grad_norm": 14.047112464904785,
1084
+ "learning_rate": 2.7713523131672598e-06,
1085
+ "loss": 0.7905,
1086
+ "step": 3750
1087
+ },
1088
+ {
1089
+ "epoch": 1.5109065439263558,
1090
+ "grad_norm": 10.599916458129883,
1091
+ "learning_rate": 2.7157473309608543e-06,
1092
+ "loss": 0.7819,
1093
+ "step": 3775
1094
+ },
1095
+ {
1096
+ "epoch": 1.5209125475285172,
1097
+ "grad_norm": 14.341135025024414,
1098
+ "learning_rate": 2.6601423487544485e-06,
1099
+ "loss": 0.7166,
1100
+ "step": 3800
1101
+ },
1102
+ {
1103
+ "epoch": 1.5309185511306784,
1104
+ "grad_norm": 11.71387767791748,
1105
+ "learning_rate": 2.6045373665480426e-06,
1106
+ "loss": 0.7386,
1107
+ "step": 3825
1108
+ },
1109
+ {
1110
+ "epoch": 1.5409245547328396,
1111
+ "grad_norm": 12.604011535644531,
1112
+ "learning_rate": 2.5489323843416376e-06,
1113
+ "loss": 0.6969,
1114
+ "step": 3850
1115
+ },
1116
+ {
1117
+ "epoch": 1.550930558335001,
1118
+ "grad_norm": 9.848773002624512,
1119
+ "learning_rate": 2.4933274021352318e-06,
1120
+ "loss": 0.8356,
1121
+ "step": 3875
1122
+ },
1123
+ {
1124
+ "epoch": 1.5609365619371625,
1125
+ "grad_norm": 14.05534553527832,
1126
+ "learning_rate": 2.437722419928826e-06,
1127
+ "loss": 0.7282,
1128
+ "step": 3900
1129
+ },
1130
+ {
1131
+ "epoch": 1.5709425655393234,
1132
+ "grad_norm": 12.9791259765625,
1133
+ "learning_rate": 2.38211743772242e-06,
1134
+ "loss": 0.7059,
1135
+ "step": 3925
1136
+ },
1137
+ {
1138
+ "epoch": 1.5809485691414848,
1139
+ "grad_norm": 10.708452224731445,
1140
+ "learning_rate": 2.3265124555160142e-06,
1141
+ "loss": 0.7712,
1142
+ "step": 3950
1143
+ },
1144
+ {
1145
+ "epoch": 1.5909545727436463,
1146
+ "grad_norm": 13.361218452453613,
1147
+ "learning_rate": 2.270907473309609e-06,
1148
+ "loss": 0.719,
1149
+ "step": 3975
1150
+ },
1151
+ {
1152
+ "epoch": 1.6009605763458075,
1153
+ "grad_norm": 11.740647315979004,
1154
+ "learning_rate": 2.215302491103203e-06,
1155
+ "loss": 0.8262,
1156
+ "step": 4000
1157
+ },
1158
+ {
1159
+ "epoch": 1.6009605763458075,
1160
+ "eval_cer": 51.82156133828997,
1161
+ "eval_loss": 0.9109482169151306,
1162
+ "eval_runtime": 1918.0204,
1163
+ "eval_samples_per_second": 2.068,
1164
+ "eval_steps_per_second": 0.259,
1165
+ "step": 4000
1166
  }
1167
  ],
1168
  "logging_steps": 25,
 
1170
  "num_input_tokens_seen": 0,
1171
  "num_train_epochs": 2,
1172
  "save_steps": 1000,
1173
+ "total_flos": 1.846744552267776e+19,
1174
  "train_batch_size": 8,
1175
  "trial_name": null,
1176
  "trial_params": null