Training in progress, step 8000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +294 -5

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b7557404827630580061cacbdd8f26d0d44a02e5df077ae8459b8a8446eb057a
 size 966995080

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac71f6104740cd4c6f5b2f0a4f1d312da1f27f2b28f81745260c3b9565e2c13f
 size 966995080

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40065ff3d994eb26a33f8c24383aff7f13d2afaa2a976d0aaefce0b59d43194b
 size 1925064044

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa70ddee520870b9de82d68cf42fd025e0d52691a3ae84d400a9cf6b5eefc95d
 size 1925064044

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ada5f6f7cb1b6a49d79d11cd5642321498733c76d6eb8ca5030fe74fa4bc331
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:5bcd75decc8ec809bdd000c1a023eecd569d9a9775fe640822926fa2ab60021b
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:38c7bcc6d095cc304fbc9e04d83745ba25161601bac81ac7ac1f4f775235d730
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0d64c6a32c80af160202850f4c0903e8541ea45c1c0ea5e8f7b7cbe62296d013
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 16.889332833895203,
-  "best_model_checkpoint": "./whisper-small-taiwanese-hanzi/checkpoint-7000",
-  "epoch": 2.8,
   "eval_steps": 1000,
-  "global_step": 7000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2030,6 +2030,295 @@
       "eval_samples_per_second": 2.241,
       "eval_steps_per_second": 0.28,
       "step": 7000
     }
   ],
   "logging_steps": 25,
@@ -2037,7 +2326,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 4,
   "save_steps": 1000,
-  "total_flos": 3.232156483584e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 16.252549215465933,
+  "best_model_checkpoint": "./whisper-small-taiwanese-hanzi/checkpoint-8000",
+  "epoch": 3.2,
   "eval_steps": 1000,
+  "global_step": 8000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 2.241,
       "eval_steps_per_second": 0.28,
       "step": 7000
+    },
+    {
+      "epoch": 2.81,
+      "grad_norm": 4.931020736694336,
+      "learning_rate": 1.3541666666666667e-06,
+      "loss": 0.1069,
+      "step": 7025
+    },
+    {
+      "epoch": 2.82,
+      "grad_norm": 4.510303497314453,
+      "learning_rate": 1.3194444444444446e-06,
+      "loss": 0.1088,
+      "step": 7050
+    },
+    {
+      "epoch": 2.83,
+      "grad_norm": 6.164842128753662,
+      "learning_rate": 1.2847222222222222e-06,
+      "loss": 0.1168,
+      "step": 7075
+    },
+    {
+      "epoch": 2.84,
+      "grad_norm": 4.769392490386963,
+      "learning_rate": 1.25e-06,
+      "loss": 0.1235,
+      "step": 7100
+    },
+    {
+      "epoch": 2.85,
+      "grad_norm": 5.1608710289001465,
+      "learning_rate": 1.2152777777777778e-06,
+      "loss": 0.1068,
+      "step": 7125
+    },
+    {
+      "epoch": 2.86,
+      "grad_norm": 6.107107639312744,
+      "learning_rate": 1.1805555555555556e-06,
+      "loss": 0.1213,
+      "step": 7150
+    },
+    {
+      "epoch": 2.87,
+      "grad_norm": 5.863134860992432,
+      "learning_rate": 1.1458333333333333e-06,
+      "loss": 0.1087,
+      "step": 7175
+    },
+    {
+      "epoch": 2.88,
+      "grad_norm": 4.510376453399658,
+      "learning_rate": 1.111111111111111e-06,
+      "loss": 0.1246,
+      "step": 7200
+    },
+    {
+      "epoch": 2.89,
+      "grad_norm": 6.96931791305542,
+      "learning_rate": 1.076388888888889e-06,
+      "loss": 0.1113,
+      "step": 7225
+    },
+    {
+      "epoch": 2.9,
+      "grad_norm": 6.812278747558594,
+      "learning_rate": 1.0416666666666667e-06,
+      "loss": 0.1392,
+      "step": 7250
+    },
+    {
+      "epoch": 2.91,
+      "grad_norm": 5.756344318389893,
+      "learning_rate": 1.0069444444444447e-06,
+      "loss": 0.0846,
+      "step": 7275
+    },
+    {
+      "epoch": 2.92,
+      "grad_norm": 3.710134506225586,
+      "learning_rate": 9.722222222222224e-07,
+      "loss": 0.101,
+      "step": 7300
+    },
+    {
+      "epoch": 2.93,
+      "grad_norm": 6.572783946990967,
+      "learning_rate": 9.375000000000001e-07,
+      "loss": 0.1141,
+      "step": 7325
+    },
+    {
+      "epoch": 2.94,
+      "grad_norm": 8.185981750488281,
+      "learning_rate": 9.027777777777779e-07,
+      "loss": 0.1207,
+      "step": 7350
+    },
+    {
+      "epoch": 2.95,
+      "grad_norm": 4.434142589569092,
+      "learning_rate": 8.680555555555556e-07,
+      "loss": 0.1206,
+      "step": 7375
+    },
+    {
+      "epoch": 2.96,
+      "grad_norm": 4.396365165710449,
+      "learning_rate": 8.333333333333333e-07,
+      "loss": 0.1235,
+      "step": 7400
+    },
+    {
+      "epoch": 2.9699999999999998,
+      "grad_norm": 8.427244186401367,
+      "learning_rate": 7.986111111111111e-07,
+      "loss": 0.1131,
+      "step": 7425
+    },
+    {
+      "epoch": 2.98,
+      "grad_norm": 5.311284065246582,
+      "learning_rate": 7.63888888888889e-07,
+      "loss": 0.1141,
+      "step": 7450
+    },
+    {
+      "epoch": 2.99,
+      "grad_norm": 3.4007761478424072,
+      "learning_rate": 7.291666666666667e-07,
+      "loss": 0.1031,
+      "step": 7475
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 4.035045146942139,
+      "learning_rate": 6.944444444444446e-07,
+      "loss": 0.0908,
+      "step": 7500
+    },
+    {
+      "epoch": 3.01,
+      "grad_norm": 2.659799337387085,
+      "learning_rate": 6.597222222222223e-07,
+      "loss": 0.061,
+      "step": 7525
+    },
+    {
+      "epoch": 3.02,
+      "grad_norm": 3.8670284748077393,
+      "learning_rate": 6.25e-07,
+      "loss": 0.0687,
+      "step": 7550
+    },
+    {
+      "epoch": 3.03,
+      "grad_norm": 3.3453280925750732,
+      "learning_rate": 5.902777777777778e-07,
+      "loss": 0.0619,
+      "step": 7575
+    },
+    {
+      "epoch": 3.04,
+      "grad_norm": 4.7413249015808105,
+      "learning_rate": 5.555555555555555e-07,
+      "loss": 0.0701,
+      "step": 7600
+    },
+    {
+      "epoch": 3.05,
+      "grad_norm": 3.685469627380371,
+      "learning_rate": 5.208333333333334e-07,
+      "loss": 0.0712,
+      "step": 7625
+    },
+    {
+      "epoch": 3.06,
+      "grad_norm": 1.9484667778015137,
+      "learning_rate": 4.861111111111112e-07,
+      "loss": 0.0699,
+      "step": 7650
+    },
+    {
+      "epoch": 3.07,
+      "grad_norm": 4.542367458343506,
+      "learning_rate": 4.5138888888888893e-07,
+      "loss": 0.0608,
+      "step": 7675
+    },
+    {
+      "epoch": 3.08,
+      "grad_norm": 3.3884780406951904,
+      "learning_rate": 4.1666666666666667e-07,
+      "loss": 0.0629,
+      "step": 7700
+    },
+    {
+      "epoch": 3.09,
+      "grad_norm": 3.8800745010375977,
+      "learning_rate": 3.819444444444445e-07,
+      "loss": 0.0627,
+      "step": 7725
+    },
+    {
+      "epoch": 3.1,
+      "grad_norm": 3.6111207008361816,
+      "learning_rate": 3.472222222222223e-07,
+      "loss": 0.0616,
+      "step": 7750
+    },
+    {
+      "epoch": 3.11,
+      "grad_norm": 4.225738525390625,
+      "learning_rate": 3.125e-07,
+      "loss": 0.0701,
+      "step": 7775
+    },
+    {
+      "epoch": 3.12,
+      "grad_norm": 4.790248394012451,
+      "learning_rate": 2.7777777777777776e-07,
+      "loss": 0.0761,
+      "step": 7800
+    },
+    {
+      "epoch": 3.13,
+      "grad_norm": 5.802876949310303,
+      "learning_rate": 2.430555555555556e-07,
+      "loss": 0.0647,
+      "step": 7825
+    },
+    {
+      "epoch": 3.14,
+      "grad_norm": 4.658420085906982,
+      "learning_rate": 2.0833333333333333e-07,
+      "loss": 0.0605,
+      "step": 7850
+    },
+    {
+      "epoch": 3.15,
+      "grad_norm": 3.0659847259521484,
+      "learning_rate": 1.7361111111111115e-07,
+      "loss": 0.062,
+      "step": 7875
+    },
+    {
+      "epoch": 3.16,
+      "grad_norm": 2.227428436279297,
+      "learning_rate": 1.3888888888888888e-07,
+      "loss": 0.0572,
+      "step": 7900
+    },
+    {
+      "epoch": 3.17,
+      "grad_norm": 2.0126748085021973,
+      "learning_rate": 1.0416666666666667e-07,
+      "loss": 0.056,
+      "step": 7925
+    },
+    {
+      "epoch": 3.18,
+      "grad_norm": 5.308594703674316,
+      "learning_rate": 6.944444444444444e-08,
+      "loss": 0.0728,
+      "step": 7950
+    },
+    {
+      "epoch": 3.19,
+      "grad_norm": 2.8052618503570557,
+      "learning_rate": 3.472222222222222e-08,
+      "loss": 0.0744,
+      "step": 7975
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 3.2474498748779297,
+      "learning_rate": 0.0,
+      "loss": 0.0678,
+      "step": 8000
+    },
+    {
+      "epoch": 3.2,
+      "eval_cer": 16.252549215465933,
+      "eval_loss": 0.2705218493938446,
+      "eval_runtime": 1747.8851,
+      "eval_samples_per_second": 2.252,
+      "eval_steps_per_second": 0.281,
+      "step": 8000
     }
   ],
   "logging_steps": 25,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 4,
   "save_steps": 1000,
+  "total_flos": 3.693893124096e+19,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null