Training in progress, step 1000, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +174 -1041
last-checkpoint/training_args.bin +1 -1

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3bfea1f8ccff1d3d104539ef9c86c38d6670980c839e6047b65be8f2eae783c8
 size 966995080

 version https://git-lfs.github.com/spec/v1
+oid sha256:3df59285835e0cc93eed8a07997106068a6025a1eecf29ca1883050640082bf5
 size 966995080

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ca089eb0a1d5699b01f559f18d4bede6fbd50e2cda9b1cb1676c3c5548889ceb
 size 1925064044

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba8b8aca45c706933df0ac865eb25d0a5f5734f04a35d51d8b5f659916db1f8f
 size 1925064044

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7c93a397e9322e49f4ed50d18f810eaf2c39ecdb2985c95d248cd7a2fa2aa47
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d1f09b1f1f9b06ad2afb12e89fc8695073b76afcf9ea0b3552c7069932117824
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd54311344b834087a4b1c20d06544579c7f43d33908960b6b3b61734dbde46d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c2e7a67c9c301b36183def727305bd60ef4c597b197ad54cdb0001ffc36e45a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,1177 +1,310 @@
 {
-  "best_metric": 48.63818252226668,
-  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-4000",
-  "epoch": 2.5806451612903225,
   "eval_steps": 1000,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.016129032258064516,
-      "grad_norm": 241.39755249023438,
-      "learning_rate": 5.376344086021506e-07,
-      "loss": 8.0646,
       "step": 25
     },
     {
-      "epoch": 0.03225806451612903,
-      "grad_norm": 52.91600799560547,
-      "learning_rate": 1.0752688172043011e-06,
-      "loss": 5.6903,
       "step": 50
     },
     {
-      "epoch": 0.04838709677419355,
-      "grad_norm": 32.09747314453125,
-      "learning_rate": 1.6129032258064516e-06,
-      "loss": 3.6353,
       "step": 75
     },
     {
-      "epoch": 0.06451612903225806,
-      "grad_norm": 31.451000213623047,
-      "learning_rate": 2.1505376344086023e-06,
-      "loss": 2.6364,
       "step": 100
     },
     {
-      "epoch": 0.08064516129032258,
-      "grad_norm": 29.471986770629883,
-      "learning_rate": 2.688172043010753e-06,
-      "loss": 2.3125,
       "step": 125
     },
     {
-      "epoch": 0.0967741935483871,
-      "grad_norm": 28.64345932006836,
-      "learning_rate": 3.225806451612903e-06,
-      "loss": 2.1281,
       "step": 150
     },
     {
-      "epoch": 0.11290322580645161,
-      "grad_norm": 28.750173568725586,
-      "learning_rate": 3.763440860215054e-06,
-      "loss": 1.9073,
       "step": 175
     },
     {
-      "epoch": 0.12903225806451613,
-      "grad_norm": 23.051420211791992,
-      "learning_rate": 4.3010752688172045e-06,
-      "loss": 1.5977,
       "step": 200
     },
     {
-      "epoch": 0.14516129032258066,
-      "grad_norm": 18.67135238647461,
-      "learning_rate": 4.838709677419355e-06,
-      "loss": 1.5081,
       "step": 225
     },
     {
-      "epoch": 0.16129032258064516,
-      "grad_norm": 15.335652351379395,
-      "learning_rate": 5.376344086021506e-06,
-      "loss": 1.4169,
       "step": 250
     },
     {
-      "epoch": 0.1774193548387097,
-      "grad_norm": 16.2917537689209,
-      "learning_rate": 5.9139784946236566e-06,
-      "loss": 1.3469,
       "step": 275
     },
     {
-      "epoch": 0.1935483870967742,
-      "grad_norm": 15.212031364440918,
-      "learning_rate": 6.451612903225806e-06,
-      "loss": 1.4059,
       "step": 300
     },
     {
-      "epoch": 0.20967741935483872,
-      "grad_norm": 15.661399841308594,
-      "learning_rate": 6.989247311827958e-06,
-      "loss": 1.333,
       "step": 325
     },
     {
-      "epoch": 0.22580645161290322,
-      "grad_norm": 16.841798782348633,
-      "learning_rate": 7.526881720430108e-06,
-      "loss": 1.2252,
       "step": 350
     },
     {
-      "epoch": 0.24193548387096775,
-      "grad_norm": 17.468032836914062,
-      "learning_rate": 8.064516129032258e-06,
-      "loss": 1.2996,
       "step": 375
     },
     {
-      "epoch": 0.25806451612903225,
-      "grad_norm": 16.684844970703125,
-      "learning_rate": 8.602150537634409e-06,
-      "loss": 1.2653,
       "step": 400
     },
     {
-      "epoch": 0.27419354838709675,
-      "grad_norm": 14.749136924743652,
-      "learning_rate": 9.13978494623656e-06,
-      "loss": 1.1967,
       "step": 425
     },
     {
-      "epoch": 0.2903225806451613,
-      "grad_norm": 13.751141548156738,
-      "learning_rate": 9.67741935483871e-06,
-      "loss": 1.1865,
       "step": 450
     },
     {
-      "epoch": 0.3064516129032258,
-      "grad_norm": 16.48873519897461,
-      "learning_rate": 9.97610513739546e-06,
-      "loss": 1.1636,
       "step": 475
     },
     {
-      "epoch": 0.3225806451612903,
-      "grad_norm": 14.694608688354492,
-      "learning_rate": 9.916367980884111e-06,
-      "loss": 1.1796,
       "step": 500
     },
     {
-      "epoch": 0.3387096774193548,
-      "grad_norm": 15.619414329528809,
-      "learning_rate": 9.856630824372761e-06,
-      "loss": 1.1655,
       "step": 525
     },
     {
-      "epoch": 0.3548387096774194,
-      "grad_norm": 13.177242279052734,
-      "learning_rate": 9.79689366786141e-06,
-      "loss": 1.143,
       "step": 550
     },
     {
-      "epoch": 0.3709677419354839,
-      "grad_norm": 15.957605361938477,
-      "learning_rate": 9.737156511350062e-06,
-      "loss": 1.1414,
       "step": 575
     },
     {
-      "epoch": 0.3870967741935484,
-      "grad_norm": 12.467620849609375,
-      "learning_rate": 9.67741935483871e-06,
-      "loss": 1.0964,
       "step": 600
     },
     {
-      "epoch": 0.4032258064516129,
-      "grad_norm": 15.435978889465332,
-      "learning_rate": 9.61768219832736e-06,
-      "loss": 1.1512,
       "step": 625
     },
     {
-      "epoch": 0.41935483870967744,
-      "grad_norm": 13.087624549865723,
-      "learning_rate": 9.557945041816011e-06,
-      "loss": 1.1338,
       "step": 650
     },
     {
-      "epoch": 0.43548387096774194,
-      "grad_norm": 15.716456413269043,
-      "learning_rate": 9.49820788530466e-06,
-      "loss": 1.0783,
       "step": 675
     },
     {
-      "epoch": 0.45161290322580644,
-      "grad_norm": 14.517507553100586,
-      "learning_rate": 9.43847072879331e-06,
-      "loss": 1.0728,
       "step": 700
     },
     {
-      "epoch": 0.46774193548387094,
-      "grad_norm": 17.37009620666504,
-      "learning_rate": 9.37873357228196e-06,
-      "loss": 1.0317,
       "step": 725
     },
     {
-      "epoch": 0.4838709677419355,
-      "grad_norm": 14.03701400756836,
-      "learning_rate": 9.31899641577061e-06,
-      "loss": 1.0347,
       "step": 750
     },
     {
-      "epoch": 0.5,
-      "grad_norm": 12.431659698486328,
-      "learning_rate": 9.25925925925926e-06,
-      "loss": 1.0524,
       "step": 775
     },
     {
-      "epoch": 0.5161290322580645,
-      "grad_norm": 12.746413230895996,
-      "learning_rate": 9.19952210274791e-06,
-      "loss": 1.0826,
       "step": 800
     },
     {
-      "epoch": 0.532258064516129,
-      "grad_norm": 15.521408081054688,
-      "learning_rate": 9.13978494623656e-06,
-      "loss": 1.0377,
       "step": 825
     },
     {
-      "epoch": 0.5483870967741935,
-      "grad_norm": 15.342901229858398,
-      "learning_rate": 9.08004778972521e-06,
-      "loss": 0.9762,
       "step": 850
     },
     {
-      "epoch": 0.5645161290322581,
-      "grad_norm": 16.137371063232422,
-      "learning_rate": 9.02031063321386e-06,
-      "loss": 1.0725,
       "step": 875
     },
     {
-      "epoch": 0.5806451612903226,
-      "grad_norm": 14.61146068572998,
-      "learning_rate": 8.96057347670251e-06,
-      "loss": 0.9554,
       "step": 900
     },
     {
-      "epoch": 0.5967741935483871,
-      "grad_norm": 13.561723709106445,
-      "learning_rate": 8.90083632019116e-06,
-      "loss": 1.0127,
       "step": 925
     },
     {
-      "epoch": 0.6129032258064516,
-      "grad_norm": 16.037729263305664,
-      "learning_rate": 8.84109916367981e-06,
-      "loss": 0.9621,
       "step": 950
     },
     {
-      "epoch": 0.6290322580645161,
-      "grad_norm": 13.945268630981445,
-      "learning_rate": 8.78136200716846e-06,
-      "loss": 0.9479,
       "step": 975
     },
     {
-      "epoch": 0.6451612903225806,
-      "grad_norm": 15.826567649841309,
-      "learning_rate": 8.72162485065711e-06,
-      "loss": 0.9789,
       "step": 1000
     },
     {
-      "epoch": 0.6451612903225806,
-      "eval_cer": 60.21685813863431,
-      "eval_loss": 0.9020848870277405,
-      "eval_runtime": 953.7359,
-      "eval_samples_per_second": 2.392,
-      "eval_steps_per_second": 0.3,
       "step": 1000
-    },
-    {
-      "epoch": 0.6612903225806451,
-      "grad_norm": 11.495616912841797,
-      "learning_rate": 8.66188769414576e-06,
-      "loss": 0.9695,
-      "step": 1025
-    },
-    {
-      "epoch": 0.6774193548387096,
-      "grad_norm": 15.224388122558594,
-      "learning_rate": 8.602150537634409e-06,
-      "loss": 0.9488,
-      "step": 1050
-    },
-    {
-      "epoch": 0.6935483870967742,
-      "grad_norm": 13.824469566345215,
-      "learning_rate": 8.54241338112306e-06,
-      "loss": 1.0474,
-      "step": 1075
-    },
-    {
-      "epoch": 0.7096774193548387,
-      "grad_norm": 14.53409194946289,
-      "learning_rate": 8.48267622461171e-06,
-      "loss": 0.9866,
-      "step": 1100
-    },
-    {
-      "epoch": 0.7258064516129032,
-      "grad_norm": 12.956225395202637,
-      "learning_rate": 8.422939068100358e-06,
-      "loss": 0.9072,
-      "step": 1125
-    },
-    {
-      "epoch": 0.7419354838709677,
-      "grad_norm": 13.533162117004395,
-      "learning_rate": 8.36320191158901e-06,
-      "loss": 0.9428,
-      "step": 1150
-    },
-    {
-      "epoch": 0.7580645161290323,
-      "grad_norm": 14.72665023803711,
-      "learning_rate": 8.303464755077659e-06,
-      "loss": 0.9387,
-      "step": 1175
-    },
-    {
-      "epoch": 0.7741935483870968,
-      "grad_norm": 12.921445846557617,
-      "learning_rate": 8.24372759856631e-06,
-      "loss": 0.9442,
-      "step": 1200
-    },
-    {
-      "epoch": 0.7903225806451613,
-      "grad_norm": 11.652874946594238,
-      "learning_rate": 8.18399044205496e-06,
-      "loss": 0.9359,
-      "step": 1225
-    },
-    {
-      "epoch": 0.8064516129032258,
-      "grad_norm": 15.415846824645996,
-      "learning_rate": 8.124253285543608e-06,
-      "loss": 0.916,
-      "step": 1250
-    },
-    {
-      "epoch": 0.8225806451612904,
-      "grad_norm": 18.422143936157227,
-      "learning_rate": 8.064516129032258e-06,
-      "loss": 0.9608,
-      "step": 1275
-    },
-    {
-      "epoch": 0.8387096774193549,
-      "grad_norm": 11.93355941772461,
-      "learning_rate": 8.004778972520909e-06,
-      "loss": 0.9297,
-      "step": 1300
-    },
-    {
-      "epoch": 0.8548387096774194,
-      "grad_norm": 16.42209243774414,
-      "learning_rate": 7.945041816009559e-06,
-      "loss": 0.8933,
-      "step": 1325
-    },
-    {
-      "epoch": 0.8709677419354839,
-      "grad_norm": 14.272250175476074,
-      "learning_rate": 7.88530465949821e-06,
-      "loss": 0.9185,
-      "step": 1350
-    },
-    {
-      "epoch": 0.8870967741935484,
-      "grad_norm": 12.172361373901367,
-      "learning_rate": 7.825567502986858e-06,
-      "loss": 0.8476,
-      "step": 1375
-    },
-    {
-      "epoch": 0.9032258064516129,
-      "grad_norm": 14.475882530212402,
-      "learning_rate": 7.765830346475508e-06,
-      "loss": 0.925,
-      "step": 1400
-    },
-    {
-      "epoch": 0.9193548387096774,
-      "grad_norm": 14.247998237609863,
-      "learning_rate": 7.706093189964159e-06,
-      "loss": 0.888,
-      "step": 1425
-    },
-    {
-      "epoch": 0.9354838709677419,
-      "grad_norm": 12.855352401733398,
-      "learning_rate": 7.646356033452809e-06,
-      "loss": 0.888,
-      "step": 1450
-    },
-    {
-      "epoch": 0.9516129032258065,
-      "grad_norm": 14.016806602478027,
-      "learning_rate": 7.586618876941458e-06,
-      "loss": 0.9237,
-      "step": 1475
-    },
-    {
-      "epoch": 0.967741935483871,
-      "grad_norm": 13.113448143005371,
-      "learning_rate": 7.526881720430108e-06,
-      "loss": 0.8767,
-      "step": 1500
-    },
-    {
-      "epoch": 0.9838709677419355,
-      "grad_norm": 15.823156356811523,
-      "learning_rate": 7.467144563918758e-06,
-      "loss": 0.8561,
-      "step": 1525
-    },
-    {
-      "epoch": 1.0,
-      "grad_norm": 16.72173309326172,
-      "learning_rate": 7.4074074074074075e-06,
-      "loss": 0.853,
-      "step": 1550
-    },
-    {
-      "epoch": 1.0161290322580645,
-      "grad_norm": 11.821678161621094,
-      "learning_rate": 7.347670250896059e-06,
-      "loss": 0.6325,
-      "step": 1575
-    },
-    {
-      "epoch": 1.032258064516129,
-      "grad_norm": 10.003717422485352,
-      "learning_rate": 7.287933094384708e-06,
-      "loss": 0.5999,
-      "step": 1600
-    },
-    {
-      "epoch": 1.0483870967741935,
-      "grad_norm": 11.098932266235352,
-      "learning_rate": 7.2281959378733575e-06,
-      "loss": 0.6439,
-      "step": 1625
-    },
-    {
-      "epoch": 1.064516129032258,
-      "grad_norm": 13.39173412322998,
-      "learning_rate": 7.168458781362008e-06,
-      "loss": 0.6171,
-      "step": 1650
-    },
-    {
-      "epoch": 1.0806451612903225,
-      "grad_norm": 12.478330612182617,
-      "learning_rate": 7.108721624850657e-06,
-      "loss": 0.6296,
-      "step": 1675
-    },
-    {
-      "epoch": 1.096774193548387,
-      "grad_norm": 11.143562316894531,
-      "learning_rate": 7.048984468339307e-06,
-      "loss": 0.6252,
-      "step": 1700
-    },
-    {
-      "epoch": 1.1129032258064515,
-      "grad_norm": 9.06653118133545,
-      "learning_rate": 6.989247311827958e-06,
-      "loss": 0.627,
-      "step": 1725
-    },
-    {
-      "epoch": 1.129032258064516,
-      "grad_norm": 12.985542297363281,
-      "learning_rate": 6.929510155316607e-06,
-      "loss": 0.6145,
-      "step": 1750
-    },
-    {
-      "epoch": 1.1451612903225807,
-      "grad_norm": 12.124594688415527,
-      "learning_rate": 6.869772998805258e-06,
-      "loss": 0.601,
-      "step": 1775
-    },
-    {
-      "epoch": 1.1612903225806452,
-      "grad_norm": 11.50346851348877,
-      "learning_rate": 6.810035842293907e-06,
-      "loss": 0.5787,
-      "step": 1800
-    },
-    {
-      "epoch": 1.1774193548387097,
-      "grad_norm": 11.256744384765625,
-      "learning_rate": 6.7502986857825566e-06,
-      "loss": 0.5949,
-      "step": 1825
-    },
-    {
-      "epoch": 1.1935483870967742,
-      "grad_norm": 12.568142890930176,
-      "learning_rate": 6.690561529271207e-06,
-      "loss": 0.6396,
-      "step": 1850
-    },
-    {
-      "epoch": 1.2096774193548387,
-      "grad_norm": 11.688636779785156,
-      "learning_rate": 6.630824372759857e-06,
-      "loss": 0.6106,
-      "step": 1875
-    },
-    {
-      "epoch": 1.2258064516129032,
-      "grad_norm": 13.135574340820312,
-      "learning_rate": 6.5710872162485075e-06,
-      "loss": 0.6197,
-      "step": 1900
-    },
-    {
-      "epoch": 1.2419354838709677,
-      "grad_norm": 14.128840446472168,
-      "learning_rate": 6.511350059737157e-06,
-      "loss": 0.6474,
-      "step": 1925
-    },
-    {
-      "epoch": 1.2580645161290323,
-      "grad_norm": 11.889117240905762,
-      "learning_rate": 6.451612903225806e-06,
-      "loss": 0.5966,
-      "step": 1950
-    },
-    {
-      "epoch": 1.2741935483870968,
-      "grad_norm": 12.298087120056152,
-      "learning_rate": 6.391875746714457e-06,
-      "loss": 0.6007,
-      "step": 1975
-    },
-    {
-      "epoch": 1.2903225806451613,
-      "grad_norm": 13.969961166381836,
-      "learning_rate": 6.332138590203107e-06,
-      "loss": 0.61,
-      "step": 2000
-    },
-    {
-      "epoch": 1.2903225806451613,
-      "eval_cer": 53.38840841616109,
-      "eval_loss": 0.753625750541687,
-      "eval_runtime": 951.328,
-      "eval_samples_per_second": 2.398,
-      "eval_steps_per_second": 0.301,
-      "step": 2000
-    },
-    {
-      "epoch": 1.3064516129032258,
-      "grad_norm": 9.99063777923584,
-      "learning_rate": 6.272401433691757e-06,
-      "loss": 0.588,
-      "step": 2025
-    },
-    {
-      "epoch": 1.3225806451612903,
-      "grad_norm": 13.123091697692871,
-      "learning_rate": 6.212664277180407e-06,
-      "loss": 0.5886,
-      "step": 2050
-    },
-    {
-      "epoch": 1.3387096774193548,
-      "grad_norm": 10.930394172668457,
-      "learning_rate": 6.152927120669057e-06,
-      "loss": 0.6117,
-      "step": 2075
-    },
-    {
-      "epoch": 1.3548387096774195,
-      "grad_norm": 12.531543731689453,
-      "learning_rate": 6.0931899641577065e-06,
-      "loss": 0.5931,
-      "step": 2100
-    },
-    {
-      "epoch": 1.370967741935484,
-      "grad_norm": 13.16308307647705,
-      "learning_rate": 6.033452807646356e-06,
-      "loss": 0.598,
-      "step": 2125
-    },
-    {
-      "epoch": 1.3870967741935485,
-      "grad_norm": 11.17799186706543,
-      "learning_rate": 5.973715651135007e-06,
-      "loss": 0.6141,
-      "step": 2150
-    },
-    {
-      "epoch": 1.403225806451613,
-      "grad_norm": 10.640506744384766,
-      "learning_rate": 5.9139784946236566e-06,
-      "loss": 0.5682,
-      "step": 2175
-    },
-    {
-      "epoch": 1.4193548387096775,
-      "grad_norm": 11.789594650268555,
-      "learning_rate": 5.854241338112307e-06,
-      "loss": 0.5598,
-      "step": 2200
-    },
-    {
-      "epoch": 1.435483870967742,
-      "grad_norm": 11.937474250793457,
-      "learning_rate": 5.794504181600956e-06,
-      "loss": 0.6344,
-      "step": 2225
-    },
-    {
-      "epoch": 1.4516129032258065,
-      "grad_norm": 14.106030464172363,
-      "learning_rate": 5.734767025089606e-06,
-      "loss": 0.5783,
-      "step": 2250
-    },
-    {
-      "epoch": 1.467741935483871,
-      "grad_norm": 12.365781784057617,
-      "learning_rate": 5.675029868578256e-06,
-      "loss": 0.6335,
-      "step": 2275
-    },
-    {
-      "epoch": 1.4838709677419355,
-      "grad_norm": 14.670917510986328,
-      "learning_rate": 5.615292712066906e-06,
-      "loss": 0.5988,
-      "step": 2300
-    },
-    {
-      "epoch": 1.5,
-      "grad_norm": 10.45535659790039,
-      "learning_rate": 5.555555555555557e-06,
-      "loss": 0.5912,
-      "step": 2325
-    },
-    {
-      "epoch": 1.5161290322580645,
-      "grad_norm": 15.059216499328613,
-      "learning_rate": 5.495818399044206e-06,
-      "loss": 0.5405,
-      "step": 2350
-    },
-    {
-      "epoch": 1.532258064516129,
-      "grad_norm": 12.705628395080566,
-      "learning_rate": 5.436081242532856e-06,
-      "loss": 0.5816,
-      "step": 2375
-    },
-    {
-      "epoch": 1.5483870967741935,
-      "grad_norm": 14.382452964782715,
-      "learning_rate": 5.376344086021506e-06,
-      "loss": 0.5437,
-      "step": 2400
-    },
-    {
-      "epoch": 1.564516129032258,
-      "grad_norm": 10.80752944946289,
-      "learning_rate": 5.316606929510155e-06,
-      "loss": 0.5975,
-      "step": 2425
-    },
-    {
-      "epoch": 1.5806451612903225,
-      "grad_norm": 12.146509170532227,
-      "learning_rate": 5.2568697729988065e-06,
-      "loss": 0.599,
-      "step": 2450
-    },
-    {
-      "epoch": 1.596774193548387,
-      "grad_norm": 12.145088195800781,
-      "learning_rate": 5.197132616487456e-06,
-      "loss": 0.6506,
-      "step": 2475
-    },
-    {
-      "epoch": 1.6129032258064515,
-      "grad_norm": 13.103174209594727,
-      "learning_rate": 5.137395459976105e-06,
-      "loss": 0.5649,
-      "step": 2500
-    },
-    {
-      "epoch": 1.629032258064516,
-      "grad_norm": 13.602423667907715,
-      "learning_rate": 5.077658303464756e-06,
-      "loss": 0.5424,
-      "step": 2525
-    },
-    {
-      "epoch": 1.6451612903225805,
-      "grad_norm": 14.787790298461914,
-      "learning_rate": 5.017921146953405e-06,
-      "loss": 0.5628,
-      "step": 2550
-    },
-    {
-      "epoch": 1.661290322580645,
-      "grad_norm": 11.559283256530762,
-      "learning_rate": 4.9581839904420555e-06,
-      "loss": 0.6216,
-      "step": 2575
-    },
-    {
-      "epoch": 1.6774193548387095,
-      "grad_norm": 13.20376968383789,
-      "learning_rate": 4.898446833930705e-06,
-      "loss": 0.5694,
-      "step": 2600
-    },
-    {
-      "epoch": 1.6935483870967742,
-      "grad_norm": 9.632781982421875,
-      "learning_rate": 4.838709677419355e-06,
-      "loss": 0.5808,
-      "step": 2625
-    },
-    {
-      "epoch": 1.7096774193548387,
-      "grad_norm": 12.304398536682129,
-      "learning_rate": 4.7789725209080055e-06,
-      "loss": 0.5777,
-      "step": 2650
-    },
-    {
-      "epoch": 1.7258064516129032,
-      "grad_norm": 11.025238990783691,
-      "learning_rate": 4.719235364396655e-06,
-      "loss": 0.5964,
-      "step": 2675
-    },
-    {
-      "epoch": 1.7419354838709677,
-      "grad_norm": 13.640275955200195,
-      "learning_rate": 4.659498207885305e-06,
-      "loss": 0.5936,
-      "step": 2700
-    },
-    {
-      "epoch": 1.7580645161290323,
-      "grad_norm": 14.28750991821289,
-      "learning_rate": 4.599761051373955e-06,
-      "loss": 0.5814,
-      "step": 2725
-    },
-    {
-      "epoch": 1.7741935483870968,
-      "grad_norm": 14.228248596191406,
-      "learning_rate": 4.540023894862605e-06,
-      "loss": 0.5881,
-      "step": 2750
-    },
-    {
-      "epoch": 1.7903225806451613,
-      "grad_norm": 12.126937866210938,
-      "learning_rate": 4.480286738351255e-06,
-      "loss": 0.5568,
-      "step": 2775
-    },
-    {
-      "epoch": 1.8064516129032258,
-      "grad_norm": 12.653525352478027,
-      "learning_rate": 4.420549581839905e-06,
-      "loss": 0.5988,
-      "step": 2800
-    },
-    {
-      "epoch": 1.8225806451612905,
-      "grad_norm": 10.851930618286133,
-      "learning_rate": 4.360812425328555e-06,
-      "loss": 0.6073,
-      "step": 2825
-    },
-    {
-      "epoch": 1.838709677419355,
-      "grad_norm": 12.00724983215332,
-      "learning_rate": 4.3010752688172045e-06,
-      "loss": 0.5739,
-      "step": 2850
-    },
-    {
-      "epoch": 1.8548387096774195,
-      "grad_norm": 10.997614860534668,
-      "learning_rate": 4.241338112305855e-06,
-      "loss": 0.5663,
-      "step": 2875
-    },
-    {
-      "epoch": 1.870967741935484,
-      "grad_norm": 12.384391784667969,
-      "learning_rate": 4.181600955794505e-06,
-      "loss": 0.5325,
-      "step": 2900
-    },
-    {
-      "epoch": 1.8870967741935485,
-      "grad_norm": 10.200772285461426,
-      "learning_rate": 4.121863799283155e-06,
-      "loss": 0.5918,
-      "step": 2925
-    },
-    {
-      "epoch": 1.903225806451613,
-      "grad_norm": 13.224651336669922,
-      "learning_rate": 4.062126642771804e-06,
-      "loss": 0.5399,
-      "step": 2950
-    },
-    {
-      "epoch": 1.9193548387096775,
-      "grad_norm": 10.611023902893066,
-      "learning_rate": 4.002389486260454e-06,
-      "loss": 0.5593,
-      "step": 2975
-    },
-    {
-      "epoch": 1.935483870967742,
-      "grad_norm": 10.110644340515137,
-      "learning_rate": 3.942652329749105e-06,
-      "loss": 0.5611,
-      "step": 3000
-    },
-    {
-      "epoch": 1.935483870967742,
-      "eval_cer": 51.336001032657805,
-      "eval_loss": 0.6702780723571777,
-      "eval_runtime": 963.0475,
-      "eval_samples_per_second": 2.369,
-      "eval_steps_per_second": 0.297,
-      "step": 3000
-    },
-    {
-      "epoch": 1.9516129032258065,
-      "grad_norm": 10.685456275939941,
-      "learning_rate": 3.882915173237754e-06,
-      "loss": 0.5326,
-      "step": 3025
-    },
-    {
-      "epoch": 1.967741935483871,
-      "grad_norm": 14.404354095458984,
-      "learning_rate": 3.823178016726404e-06,
-      "loss": 0.5828,
-      "step": 3050
-    },
-    {
-      "epoch": 1.9838709677419355,
-      "grad_norm": 13.997696876525879,
-      "learning_rate": 3.763440860215054e-06,
-      "loss": 0.5394,
-      "step": 3075
-    },
-    {
-      "epoch": 2.0,
-      "grad_norm": 16.121444702148438,
-      "learning_rate": 3.7037037037037037e-06,
-      "loss": 0.5635,
-      "step": 3100
-    },
-    {
-      "epoch": 2.0161290322580645,
-      "grad_norm": 9.237725257873535,
-      "learning_rate": 3.643966547192354e-06,
-      "loss": 0.3737,
-      "step": 3125
-    },
-    {
-      "epoch": 2.032258064516129,
-      "grad_norm": 11.313372611999512,
-      "learning_rate": 3.584229390681004e-06,
-      "loss": 0.3934,
-      "step": 3150
-    },
-    {
-      "epoch": 2.0483870967741935,
-      "grad_norm": 9.819090843200684,
-      "learning_rate": 3.5244922341696534e-06,
-      "loss": 0.3494,
-      "step": 3175
-    },
-    {
-      "epoch": 2.064516129032258,
-      "grad_norm": 9.302324295043945,
-      "learning_rate": 3.4647550776583037e-06,
-      "loss": 0.3691,
-      "step": 3200
-    },
-    {
-      "epoch": 2.0806451612903225,
-      "grad_norm": 11.517475128173828,
-      "learning_rate": 3.4050179211469536e-06,
-      "loss": 0.3652,
-      "step": 3225
-    },
-    {
-      "epoch": 2.096774193548387,
-      "grad_norm": 7.707530975341797,
-      "learning_rate": 3.3452807646356034e-06,
-      "loss": 0.3566,
-      "step": 3250
-    },
-    {
-      "epoch": 2.1129032258064515,
-      "grad_norm": 9.121161460876465,
-      "learning_rate": 3.2855436081242537e-06,
-      "loss": 0.3409,
-      "step": 3275
-    },
-    {
-      "epoch": 2.129032258064516,
-      "grad_norm": 10.464853286743164,
-      "learning_rate": 3.225806451612903e-06,
-      "loss": 0.33,
-      "step": 3300
-    },
-    {
-      "epoch": 2.1451612903225805,
-      "grad_norm": 8.300515174865723,
-      "learning_rate": 3.1660692951015535e-06,
-      "loss": 0.3436,
-      "step": 3325
-    },
-    {
-      "epoch": 2.161290322580645,
-      "grad_norm": 7.577033519744873,
-      "learning_rate": 3.1063321385902034e-06,
-      "loss": 0.3441,
-      "step": 3350
-    },
-    {
-      "epoch": 2.1774193548387095,
-      "grad_norm": 12.314337730407715,
-      "learning_rate": 3.0465949820788532e-06,
-      "loss": 0.387,
-      "step": 3375
-    },
-    {
-      "epoch": 2.193548387096774,
-      "grad_norm": 8.03864860534668,
-      "learning_rate": 2.9868578255675035e-06,
-      "loss": 0.3533,
-      "step": 3400
-    },
-    {
-      "epoch": 2.2096774193548385,
-      "grad_norm": 10.326530456542969,
-      "learning_rate": 2.9271206690561534e-06,
-      "loss": 0.351,
-      "step": 3425
-    },
-    {
-      "epoch": 2.225806451612903,
-      "grad_norm": 8.268649101257324,
-      "learning_rate": 2.867383512544803e-06,
-      "loss": 0.3437,
-      "step": 3450
-    },
-    {
-      "epoch": 2.241935483870968,
-      "grad_norm": 9.62258529663086,
-      "learning_rate": 2.807646356033453e-06,
-      "loss": 0.3254,
-      "step": 3475
-    },
-    {
-      "epoch": 2.258064516129032,
-      "grad_norm": 8.58535099029541,
-      "learning_rate": 2.747909199522103e-06,
-      "loss": 0.3592,
-      "step": 3500
-    },
-    {
-      "epoch": 2.274193548387097,
-      "grad_norm": 10.211243629455566,
-      "learning_rate": 2.688172043010753e-06,
-      "loss": 0.3334,
-      "step": 3525
-    },
-    {
-      "epoch": 2.2903225806451615,
-      "grad_norm": 9.174546241760254,
-      "learning_rate": 2.6284348864994032e-06,
-      "loss": 0.3533,
-      "step": 3550
-    },
-    {
-      "epoch": 2.306451612903226,
-      "grad_norm": 9.889862060546875,
-      "learning_rate": 2.5686977299880527e-06,
-      "loss": 0.3263,
-      "step": 3575
-    },
-    {
-      "epoch": 2.3225806451612905,
-      "grad_norm": 10.23873519897461,
-      "learning_rate": 2.5089605734767026e-06,
-      "loss": 0.3601,
-      "step": 3600
-    },
-    {
-      "epoch": 2.338709677419355,
-      "grad_norm": 8.46229076385498,
-      "learning_rate": 2.4492234169653525e-06,
-      "loss": 0.335,
-      "step": 3625
-    },
-    {
-      "epoch": 2.3548387096774195,
-      "grad_norm": 8.364771842956543,
-      "learning_rate": 2.3894862604540028e-06,
-      "loss": 0.3447,
-      "step": 3650
-    },
-    {
-      "epoch": 2.370967741935484,
-      "grad_norm": 11.249506950378418,
-      "learning_rate": 2.3297491039426526e-06,
-      "loss": 0.3544,
-      "step": 3675
-    },
-    {
-      "epoch": 2.3870967741935485,
-      "grad_norm": 8.8016996383667,
-      "learning_rate": 2.2700119474313025e-06,
-      "loss": 0.3277,
-      "step": 3700
-    },
-    {
-      "epoch": 2.403225806451613,
-      "grad_norm": 9.774581909179688,
-      "learning_rate": 2.2102747909199524e-06,
-      "loss": 0.3346,
-      "step": 3725
-    },
-    {
-      "epoch": 2.4193548387096775,
-      "grad_norm": 8.027830123901367,
-      "learning_rate": 2.1505376344086023e-06,
-      "loss": 0.3291,
-      "step": 3750
-    },
-    {
-      "epoch": 2.435483870967742,
-      "grad_norm": 10.107059478759766,
-      "learning_rate": 2.0908004778972526e-06,
-      "loss": 0.3366,
-      "step": 3775
-    },
-    {
-      "epoch": 2.4516129032258065,
-      "grad_norm": 8.280789375305176,
-      "learning_rate": 2.031063321385902e-06,
-      "loss": 0.3473,
-      "step": 3800
-    },
-    {
-      "epoch": 2.467741935483871,
-      "grad_norm": 9.160382270812988,
-      "learning_rate": 1.9713261648745523e-06,
-      "loss": 0.3215,
-      "step": 3825
-    },
-    {
-      "epoch": 2.4838709677419355,
-      "grad_norm": 7.922098636627197,
-      "learning_rate": 1.911589008363202e-06,
-      "loss": 0.3285,
-      "step": 3850
-    },
-    {
-      "epoch": 2.5,
-      "grad_norm": 9.239423751831055,
-      "learning_rate": 1.8518518518518519e-06,
-      "loss": 0.3608,
-      "step": 3875
-    },
-    {
-      "epoch": 2.5161290322580645,
-      "grad_norm": 8.667262077331543,
-      "learning_rate": 1.792114695340502e-06,
-      "loss": 0.3389,
-      "step": 3900
-    },
-    {
-      "epoch": 2.532258064516129,
-      "grad_norm": 10.475480079650879,
-      "learning_rate": 1.7323775388291518e-06,
-      "loss": 0.3226,
-      "step": 3925
-    },
-    {
-      "epoch": 2.5483870967741935,
-      "grad_norm": 11.079362869262695,
-      "learning_rate": 1.6726403823178017e-06,
-      "loss": 0.3559,
-      "step": 3950
-    },
-    {
-      "epoch": 2.564516129032258,
-      "grad_norm": 10.680990219116211,
-      "learning_rate": 1.6129032258064516e-06,
-      "loss": 0.297,
-      "step": 3975
-    },
-    {
-      "epoch": 2.5806451612903225,
-      "grad_norm": 13.101299285888672,
-      "learning_rate": 1.5531660692951017e-06,
-      "loss": 0.3359,
-      "step": 4000
-    },
-    {
-      "epoch": 2.5806451612903225,
-      "eval_cer": 48.63818252226668,
-      "eval_loss": 0.6473900675773621,
-      "eval_runtime": 969.6918,
-      "eval_samples_per_second": 2.352,
-      "eval_steps_per_second": 0.295,
-      "step": 4000
     }
   ],
   "logging_steps": 25,
-  "max_steps": 4650,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
   "save_steps": 1000,
-  "total_flos": 1.84665797664768e+19,
-  "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 61.346116219917825,
+  "best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-1000",
+  "epoch": 0.40024014408645187,
   "eval_steps": 1000,
+  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.010006003602161296,
+      "grad_norm": 197.02195739746094,
+      "learning_rate": 5.000000000000001e-07,
+      "loss": 7.3864,
       "step": 25
     },
     {
+      "epoch": 0.020012007204322592,
+      "grad_norm": 41.90484619140625,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 5.5537,
       "step": 50
     },
     {
+      "epoch": 0.03001801080648389,
+      "grad_norm": 32.08101272583008,
+      "learning_rate": 1.5e-06,
+      "loss": 3.7704,
       "step": 75
     },
     {
+      "epoch": 0.040024014408645184,
+      "grad_norm": 28.216585159301758,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 2.6296,
       "step": 100
     },
     {
+      "epoch": 0.05003001801080648,
+      "grad_norm": 23.683828353881836,
+      "learning_rate": 2.5e-06,
+      "loss": 2.4474,
       "step": 125
     },
     {
+      "epoch": 0.06003602161296778,
+      "grad_norm": 21.883520126342773,
+      "learning_rate": 3e-06,
+      "loss": 2.2054,
       "step": 150
     },
     {
+      "epoch": 0.07004202521512908,
+      "grad_norm": 24.772098541259766,
+      "learning_rate": 3.5e-06,
+      "loss": 2.0695,
       "step": 175
     },
     {
+      "epoch": 0.08004802881729037,
+      "grad_norm": 24.105548858642578,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 1.9116,
       "step": 200
     },
     {
+      "epoch": 0.09005403241945167,
+      "grad_norm": 18.805519104003906,
+      "learning_rate": 4.5e-06,
+      "loss": 1.7643,
       "step": 225
     },
     {
+      "epoch": 0.10006003602161297,
+      "grad_norm": 15.599541664123535,
+      "learning_rate": 5e-06,
+      "loss": 1.6394,
       "step": 250
     },
     {
+      "epoch": 0.11006603962377426,
+      "grad_norm": 15.514196395874023,
+      "learning_rate": 5.500000000000001e-06,
+      "loss": 1.6016,
       "step": 275
     },
     {
+      "epoch": 0.12007204322593557,
+      "grad_norm": 15.5431547164917,
+      "learning_rate": 6e-06,
+      "loss": 1.5851,
       "step": 300
     },
     {
+      "epoch": 0.13007804682809687,
+      "grad_norm": 16.450502395629883,
+      "learning_rate": 6.5000000000000004e-06,
+      "loss": 1.5076,
       "step": 325
     },
     {
+      "epoch": 0.14008405043025815,
+      "grad_norm": 16.393997192382812,
+      "learning_rate": 7e-06,
+      "loss": 1.5487,
       "step": 350
     },
     {
+      "epoch": 0.15009005403241946,
+      "grad_norm": 14.165709495544434,
+      "learning_rate": 7.500000000000001e-06,
+      "loss": 1.5365,
       "step": 375
     },
     {
+      "epoch": 0.16009605763458074,
+      "grad_norm": 15.929381370544434,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 1.5023,
       "step": 400
     },
     {
+      "epoch": 0.17010206123674204,
+      "grad_norm": 14.422001838684082,
+      "learning_rate": 8.5e-06,
+      "loss": 1.3558,
       "step": 425
     },
     {
+      "epoch": 0.18010806483890335,
+      "grad_norm": 13.510339736938477,
+      "learning_rate": 9e-06,
+      "loss": 1.3898,
       "step": 450
     },
     {
+      "epoch": 0.19011406844106463,
+      "grad_norm": 14.485660552978516,
+      "learning_rate": 9.5e-06,
+      "loss": 1.4279,
       "step": 475
     },
     {
+      "epoch": 0.20012007204322593,
+      "grad_norm": 14.117327690124512,
+      "learning_rate": 1e-05,
+      "loss": 1.3455,
       "step": 500
     },
     {
+      "epoch": 0.21012607564538724,
+      "grad_norm": 16.12464714050293,
+      "learning_rate": 9.944395017793596e-06,
+      "loss": 1.4,
       "step": 525
     },
     {
+      "epoch": 0.22013207924754852,
+      "grad_norm": 15.304022789001465,
+      "learning_rate": 9.888790035587188e-06,
+      "loss": 1.4159,
       "step": 550
     },
     {
+      "epoch": 0.23013808284970982,
+      "grad_norm": 14.668664932250977,
+      "learning_rate": 9.833185053380784e-06,
+      "loss": 1.3445,
       "step": 575
     },
     {
+      "epoch": 0.24014408645187113,
+      "grad_norm": 13.041420936584473,
+      "learning_rate": 9.777580071174379e-06,
+      "loss": 1.3622,
       "step": 600
     },
     {
+      "epoch": 0.25015009005403244,
+      "grad_norm": 15.908055305480957,
+      "learning_rate": 9.721975088967973e-06,
+      "loss": 1.3234,
       "step": 625
     },
     {
+      "epoch": 0.26015609365619374,
+      "grad_norm": 13.73078727722168,
+      "learning_rate": 9.666370106761567e-06,
+      "loss": 1.2332,
       "step": 650
     },
     {
+      "epoch": 0.270162097258355,
+      "grad_norm": 14.327301979064941,
+      "learning_rate": 9.610765124555162e-06,
+      "loss": 1.3042,
       "step": 675
     },
     {
+      "epoch": 0.2801681008605163,
+      "grad_norm": 14.390907287597656,
+      "learning_rate": 9.555160142348756e-06,
+      "loss": 1.3216,
       "step": 700
     },
     {
+      "epoch": 0.2901741044626776,
+      "grad_norm": 13.917515754699707,
+      "learning_rate": 9.49955516014235e-06,
+      "loss": 1.2931,
       "step": 725
     },
     {
+      "epoch": 0.3001801080648389,
+      "grad_norm": 15.108023643493652,
+      "learning_rate": 9.443950177935945e-06,
+      "loss": 1.3286,
       "step": 750
     },
     {
+      "epoch": 0.3101861116670002,
+      "grad_norm": 13.692678451538086,
+      "learning_rate": 9.388345195729539e-06,
+      "loss": 1.3057,
       "step": 775
     },
     {
+      "epoch": 0.32019211526916147,
+      "grad_norm": 13.685354232788086,
+      "learning_rate": 9.332740213523132e-06,
+      "loss": 1.2402,
       "step": 800
     },
     {
+      "epoch": 0.3301981188713228,
+      "grad_norm": 14.591761589050293,
+      "learning_rate": 9.277135231316726e-06,
+      "loss": 1.2688,
       "step": 825
     },
     {
+      "epoch": 0.3402041224734841,
+      "grad_norm": 15.677751541137695,
+      "learning_rate": 9.221530249110321e-06,
+      "loss": 1.3076,
       "step": 850
     },
     {
+      "epoch": 0.3502101260756454,
+      "grad_norm": 15.109577178955078,
+      "learning_rate": 9.165925266903915e-06,
+      "loss": 1.2141,
       "step": 875
     },
     {
+      "epoch": 0.3602161296778067,
+      "grad_norm": 10.552845001220703,
+      "learning_rate": 9.110320284697509e-06,
+      "loss": 1.2393,
       "step": 900
     },
     {
+      "epoch": 0.370222133279968,
+      "grad_norm": 12.321894645690918,
+      "learning_rate": 9.054715302491104e-06,
+      "loss": 1.2417,
       "step": 925
     },
     {
+      "epoch": 0.38022813688212925,
+      "grad_norm": 13.729790687561035,
+      "learning_rate": 8.999110320284698e-06,
+      "loss": 1.2082,
       "step": 950
     },
     {
+      "epoch": 0.39023414048429056,
+      "grad_norm": 13.137016296386719,
+      "learning_rate": 8.943505338078292e-06,
+      "loss": 1.2048,
       "step": 975
     },
     {
+      "epoch": 0.40024014408645187,
+      "grad_norm": 12.194613456726074,
+      "learning_rate": 8.887900355871887e-06,
+      "loss": 1.2739,
       "step": 1000
     },
     {
+      "epoch": 0.40024014408645187,
+      "eval_cer": 61.346116219917825,
+      "eval_loss": 1.169872522354126,
+      "eval_runtime": 1744.6409,
+      "eval_samples_per_second": 2.274,
+      "eval_steps_per_second": 0.284,
       "step": 1000
     }
   ],
   "logging_steps": 25,
+  "max_steps": 4996,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
   "save_steps": 1000,
+  "total_flos": 4.61736640512e+18,
+  "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5780b3fe6cf6a2b7abc711d493a9d31fc1181c9fff73c0fc0a79ae423a23e2fb
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc814e8346759fc832d3d40fb2efbd9f7f5bf91489499603abb8463206368d6b
 size 5176