{ "best_metric": 6.049679487179487, "best_model_checkpoint": "./exp/whisper-small-taiwanese-asr-v2/checkpoint-7000", "epoch": 22.675736961451246, "eval_steps": 1000, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05668934240362812, "grad_norm": 39.12731170654297, "learning_rate": 4.2000000000000006e-07, "loss": 3.459, "step": 25 }, { "epoch": 0.11337868480725624, "grad_norm": 27.20041847229004, "learning_rate": 9.200000000000001e-07, "loss": 3.0795, "step": 50 }, { "epoch": 0.17006802721088435, "grad_norm": 23.331146240234375, "learning_rate": 1.42e-06, "loss": 2.4576, "step": 75 }, { "epoch": 0.22675736961451248, "grad_norm": 18.951128005981445, "learning_rate": 1.9200000000000003e-06, "loss": 1.8809, "step": 100 }, { "epoch": 0.2834467120181406, "grad_norm": 18.83504295349121, "learning_rate": 2.42e-06, "loss": 1.3831, "step": 125 }, { "epoch": 0.3401360544217687, "grad_norm": 16.38509750366211, "learning_rate": 2.92e-06, "loss": 1.049, "step": 150 }, { "epoch": 0.3968253968253968, "grad_norm": 17.442344665527344, "learning_rate": 3.4200000000000007e-06, "loss": 0.8571, "step": 175 }, { "epoch": 0.45351473922902497, "grad_norm": 15.140103340148926, "learning_rate": 3.920000000000001e-06, "loss": 0.6812, "step": 200 }, { "epoch": 0.5102040816326531, "grad_norm": 16.342971801757812, "learning_rate": 4.42e-06, "loss": 0.5756, "step": 225 }, { "epoch": 0.5668934240362812, "grad_norm": 18.02117347717285, "learning_rate": 4.92e-06, "loss": 0.5135, "step": 250 }, { "epoch": 0.6235827664399093, "grad_norm": 19.521883010864258, "learning_rate": 5.420000000000001e-06, "loss": 0.4338, "step": 275 }, { "epoch": 0.6802721088435374, "grad_norm": 14.71985149383545, "learning_rate": 5.92e-06, "loss": 0.3755, "step": 300 }, { "epoch": 0.7369614512471655, "grad_norm": 6.9398722648620605, "learning_rate": 6.42e-06, "loss": 0.3662, "step": 325 }, { "epoch": 0.7936507936507936, "grad_norm": 8.429058074951172, "learning_rate": 6.92e-06, "loss": 0.3196, "step": 350 }, { "epoch": 0.8503401360544217, "grad_norm": 7.899880409240723, "learning_rate": 7.420000000000001e-06, "loss": 0.3186, "step": 375 }, { "epoch": 0.9070294784580499, "grad_norm": 9.854070663452148, "learning_rate": 7.92e-06, "loss": 0.2849, "step": 400 }, { "epoch": 0.963718820861678, "grad_norm": 9.135157585144043, "learning_rate": 8.42e-06, "loss": 0.3387, "step": 425 }, { "epoch": 1.0204081632653061, "grad_norm": 7.550724983215332, "learning_rate": 8.920000000000001e-06, "loss": 0.2661, "step": 450 }, { "epoch": 1.0770975056689343, "grad_norm": 6.407596111297607, "learning_rate": 9.42e-06, "loss": 0.1618, "step": 475 }, { "epoch": 1.1337868480725624, "grad_norm": 8.470088958740234, "learning_rate": 9.920000000000002e-06, "loss": 0.2531, "step": 500 }, { "epoch": 1.1904761904761905, "grad_norm": 14.254158020019531, "learning_rate": 9.977894736842106e-06, "loss": 0.2415, "step": 525 }, { "epoch": 1.2471655328798186, "grad_norm": 5.5717339515686035, "learning_rate": 9.951578947368423e-06, "loss": 0.1863, "step": 550 }, { "epoch": 1.3038548752834467, "grad_norm": 9.035225868225098, "learning_rate": 9.925263157894738e-06, "loss": 0.1772, "step": 575 }, { "epoch": 1.3605442176870748, "grad_norm": 12.706698417663574, "learning_rate": 9.898947368421054e-06, "loss": 0.1589, "step": 600 }, { "epoch": 1.417233560090703, "grad_norm": 8.393030166625977, "learning_rate": 9.87263157894737e-06, "loss": 0.172, "step": 625 }, { "epoch": 1.473922902494331, "grad_norm": 5.842218399047852, "learning_rate": 9.846315789473684e-06, "loss": 0.1656, "step": 650 }, { "epoch": 1.5306122448979593, "grad_norm": 9.525617599487305, "learning_rate": 9.820000000000001e-06, "loss": 0.186, "step": 675 }, { "epoch": 1.5873015873015874, "grad_norm": 19.124101638793945, "learning_rate": 9.793684210526316e-06, "loss": 0.1943, "step": 700 }, { "epoch": 1.6439909297052155, "grad_norm": 9.63739013671875, "learning_rate": 9.767368421052632e-06, "loss": 0.1514, "step": 725 }, { "epoch": 1.7006802721088436, "grad_norm": 14.47154712677002, "learning_rate": 9.741052631578947e-06, "loss": 0.142, "step": 750 }, { "epoch": 1.7573696145124718, "grad_norm": 6.553714275360107, "learning_rate": 9.714736842105264e-06, "loss": 0.1563, "step": 775 }, { "epoch": 1.8140589569160999, "grad_norm": 6.272864818572998, "learning_rate": 9.68842105263158e-06, "loss": 0.1454, "step": 800 }, { "epoch": 1.870748299319728, "grad_norm": 6.724349021911621, "learning_rate": 9.662105263157896e-06, "loss": 0.3289, "step": 825 }, { "epoch": 1.927437641723356, "grad_norm": 6.75054931640625, "learning_rate": 9.635789473684212e-06, "loss": 0.1455, "step": 850 }, { "epoch": 1.9841269841269842, "grad_norm": 7.212646961212158, "learning_rate": 9.609473684210527e-06, "loss": 0.2098, "step": 875 }, { "epoch": 2.0408163265306123, "grad_norm": 4.189349174499512, "learning_rate": 9.583157894736842e-06, "loss": 0.1016, "step": 900 }, { "epoch": 2.0975056689342404, "grad_norm": 9.265031814575195, "learning_rate": 9.556842105263159e-06, "loss": 0.1116, "step": 925 }, { "epoch": 2.1541950113378685, "grad_norm": 4.884438991546631, "learning_rate": 9.530526315789474e-06, "loss": 0.1209, "step": 950 }, { "epoch": 2.2108843537414966, "grad_norm": 4.258934497833252, "learning_rate": 9.50421052631579e-06, "loss": 0.1331, "step": 975 }, { "epoch": 2.2675736961451247, "grad_norm": 5.421435832977295, "learning_rate": 9.477894736842106e-06, "loss": 0.083, "step": 1000 }, { "epoch": 2.2675736961451247, "eval_loss": 0.1953069269657135, "eval_runtime": 158.2725, "eval_samples_per_second": 2.957, "eval_steps_per_second": 0.493, "eval_wer": 8.173076923076923, "step": 1000 }, { "epoch": 2.324263038548753, "grad_norm": 15.091805458068848, "learning_rate": 9.451578947368422e-06, "loss": 0.0908, "step": 1025 }, { "epoch": 2.380952380952381, "grad_norm": 9.11117172241211, "learning_rate": 9.425263157894737e-06, "loss": 0.1528, "step": 1050 }, { "epoch": 2.437641723356009, "grad_norm": 8.174238204956055, "learning_rate": 9.398947368421052e-06, "loss": 0.059, "step": 1075 }, { "epoch": 2.494331065759637, "grad_norm": 12.375757217407227, "learning_rate": 9.372631578947369e-06, "loss": 0.0773, "step": 1100 }, { "epoch": 2.5510204081632653, "grad_norm": 8.518278121948242, "learning_rate": 9.346315789473684e-06, "loss": 0.0714, "step": 1125 }, { "epoch": 2.6077097505668934, "grad_norm": 7.095608234405518, "learning_rate": 9.32e-06, "loss": 0.0778, "step": 1150 }, { "epoch": 2.6643990929705215, "grad_norm": 5.508459568023682, "learning_rate": 9.293684210526317e-06, "loss": 0.0696, "step": 1175 }, { "epoch": 2.7210884353741496, "grad_norm": 1.7915226221084595, "learning_rate": 9.267368421052632e-06, "loss": 0.0504, "step": 1200 }, { "epoch": 2.7777777777777777, "grad_norm": 4.419779300689697, "learning_rate": 9.241052631578949e-06, "loss": 0.0667, "step": 1225 }, { "epoch": 2.834467120181406, "grad_norm": 4.396119594573975, "learning_rate": 9.214736842105264e-06, "loss": 0.1037, "step": 1250 }, { "epoch": 2.891156462585034, "grad_norm": 4.604401111602783, "learning_rate": 9.18842105263158e-06, "loss": 0.1276, "step": 1275 }, { "epoch": 2.947845804988662, "grad_norm": 6.504410743713379, "learning_rate": 9.162105263157895e-06, "loss": 0.0683, "step": 1300 }, { "epoch": 3.00453514739229, "grad_norm": 3.4603614807128906, "learning_rate": 9.13578947368421e-06, "loss": 0.0548, "step": 1325 }, { "epoch": 3.061224489795918, "grad_norm": 8.05783748626709, "learning_rate": 9.109473684210527e-06, "loss": 0.0423, "step": 1350 }, { "epoch": 3.1179138321995463, "grad_norm": 1.4570063352584839, "learning_rate": 9.083157894736842e-06, "loss": 0.0388, "step": 1375 }, { "epoch": 3.1746031746031744, "grad_norm": 2.493945360183716, "learning_rate": 9.056842105263159e-06, "loss": 0.0704, "step": 1400 }, { "epoch": 3.2312925170068025, "grad_norm": 3.1329710483551025, "learning_rate": 9.030526315789474e-06, "loss": 0.0418, "step": 1425 }, { "epoch": 3.287981859410431, "grad_norm": 0.9819092154502869, "learning_rate": 9.00421052631579e-06, "loss": 0.0572, "step": 1450 }, { "epoch": 3.3446712018140587, "grad_norm": 3.2691588401794434, "learning_rate": 8.977894736842107e-06, "loss": 0.0391, "step": 1475 }, { "epoch": 3.4013605442176873, "grad_norm": 0.9585368037223816, "learning_rate": 8.951578947368422e-06, "loss": 0.0468, "step": 1500 }, { "epoch": 3.458049886621315, "grad_norm": 5.486790657043457, "learning_rate": 8.925263157894739e-06, "loss": 0.0423, "step": 1525 }, { "epoch": 3.5147392290249435, "grad_norm": 7.715363502502441, "learning_rate": 8.898947368421054e-06, "loss": 0.119, "step": 1550 }, { "epoch": 3.571428571428571, "grad_norm": 0.7970800995826721, "learning_rate": 8.872631578947369e-06, "loss": 0.0264, "step": 1575 }, { "epoch": 3.6281179138321997, "grad_norm": 9.640973091125488, "learning_rate": 8.846315789473685e-06, "loss": 0.0305, "step": 1600 }, { "epoch": 3.6848072562358274, "grad_norm": 5.621988296508789, "learning_rate": 8.82e-06, "loss": 0.0329, "step": 1625 }, { "epoch": 3.741496598639456, "grad_norm": 2.357621908187866, "learning_rate": 8.793684210526317e-06, "loss": 0.0657, "step": 1650 }, { "epoch": 3.798185941043084, "grad_norm": 3.6409835815429688, "learning_rate": 8.767368421052632e-06, "loss": 0.0311, "step": 1675 }, { "epoch": 3.854875283446712, "grad_norm": 2.0802805423736572, "learning_rate": 8.741052631578949e-06, "loss": 0.0763, "step": 1700 }, { "epoch": 3.9115646258503403, "grad_norm": 14.157549858093262, "learning_rate": 8.714736842105264e-06, "loss": 0.029, "step": 1725 }, { "epoch": 3.9682539682539684, "grad_norm": 3.303739309310913, "learning_rate": 8.688421052631579e-06, "loss": 0.0429, "step": 1750 }, { "epoch": 4.024943310657596, "grad_norm": 3.3203935623168945, "learning_rate": 8.662105263157895e-06, "loss": 0.0212, "step": 1775 }, { "epoch": 4.081632653061225, "grad_norm": 0.7093961834907532, "learning_rate": 8.63578947368421e-06, "loss": 0.012, "step": 1800 }, { "epoch": 4.138321995464852, "grad_norm": 2.5663654804229736, "learning_rate": 8.609473684210527e-06, "loss": 0.0232, "step": 1825 }, { "epoch": 4.195011337868481, "grad_norm": 27.419864654541016, "learning_rate": 8.583157894736843e-06, "loss": 0.0192, "step": 1850 }, { "epoch": 4.2517006802721085, "grad_norm": 0.45838263630867004, "learning_rate": 8.556842105263158e-06, "loss": 0.0577, "step": 1875 }, { "epoch": 4.308390022675737, "grad_norm": 1.192967176437378, "learning_rate": 8.530526315789475e-06, "loss": 0.0622, "step": 1900 }, { "epoch": 4.365079365079365, "grad_norm": 5.143068790435791, "learning_rate": 8.50421052631579e-06, "loss": 0.0217, "step": 1925 }, { "epoch": 4.421768707482993, "grad_norm": 3.8326940536499023, "learning_rate": 8.477894736842107e-06, "loss": 0.0212, "step": 1950 }, { "epoch": 4.478458049886621, "grad_norm": 1.7538135051727295, "learning_rate": 8.451578947368422e-06, "loss": 0.0499, "step": 1975 }, { "epoch": 4.535147392290249, "grad_norm": 0.5768720507621765, "learning_rate": 8.425263157894737e-06, "loss": 0.0444, "step": 2000 }, { "epoch": 4.535147392290249, "eval_loss": 0.19730134308338165, "eval_runtime": 158.9638, "eval_samples_per_second": 2.944, "eval_steps_per_second": 0.491, "eval_wer": 6.944444444444445, "step": 2000 }, { "epoch": 4.591836734693878, "grad_norm": 4.980081558227539, "learning_rate": 8.398947368421053e-06, "loss": 0.0205, "step": 2025 }, { "epoch": 4.648526077097506, "grad_norm": 2.384981155395508, "learning_rate": 8.372631578947368e-06, "loss": 0.027, "step": 2050 }, { "epoch": 4.705215419501133, "grad_norm": 1.0443973541259766, "learning_rate": 8.346315789473685e-06, "loss": 0.0537, "step": 2075 }, { "epoch": 4.761904761904762, "grad_norm": 1.0461288690567017, "learning_rate": 8.32e-06, "loss": 0.0146, "step": 2100 }, { "epoch": 4.81859410430839, "grad_norm": 6.465195655822754, "learning_rate": 8.293684210526317e-06, "loss": 0.0349, "step": 2125 }, { "epoch": 4.875283446712018, "grad_norm": 4.054196834564209, "learning_rate": 8.267368421052632e-06, "loss": 0.0186, "step": 2150 }, { "epoch": 4.931972789115647, "grad_norm": 1.7794383764266968, "learning_rate": 8.241052631578948e-06, "loss": 0.0224, "step": 2175 }, { "epoch": 4.988662131519274, "grad_norm": 5.188144683837891, "learning_rate": 8.214736842105265e-06, "loss": 0.047, "step": 2200 }, { "epoch": 5.045351473922903, "grad_norm": 0.08940722048282623, "learning_rate": 8.18842105263158e-06, "loss": 0.0186, "step": 2225 }, { "epoch": 5.1020408163265305, "grad_norm": 3.670670747756958, "learning_rate": 8.162105263157895e-06, "loss": 0.0183, "step": 2250 }, { "epoch": 5.158730158730159, "grad_norm": 2.3486833572387695, "learning_rate": 8.135789473684212e-06, "loss": 0.015, "step": 2275 }, { "epoch": 5.215419501133787, "grad_norm": 4.572961807250977, "learning_rate": 8.109473684210527e-06, "loss": 0.0239, "step": 2300 }, { "epoch": 5.272108843537415, "grad_norm": 1.114425539970398, "learning_rate": 8.083157894736843e-06, "loss": 0.0396, "step": 2325 }, { "epoch": 5.328798185941043, "grad_norm": 0.35853487253189087, "learning_rate": 8.056842105263158e-06, "loss": 0.0185, "step": 2350 }, { "epoch": 5.3854875283446715, "grad_norm": 5.747114181518555, "learning_rate": 8.030526315789475e-06, "loss": 0.0136, "step": 2375 }, { "epoch": 5.442176870748299, "grad_norm": 0.33728256821632385, "learning_rate": 8.00421052631579e-06, "loss": 0.0366, "step": 2400 }, { "epoch": 5.498866213151928, "grad_norm": 13.636420249938965, "learning_rate": 7.977894736842105e-06, "loss": 0.0441, "step": 2425 }, { "epoch": 5.555555555555555, "grad_norm": 2.6928389072418213, "learning_rate": 7.951578947368421e-06, "loss": 0.014, "step": 2450 }, { "epoch": 5.612244897959184, "grad_norm": 0.18423891067504883, "learning_rate": 7.925263157894736e-06, "loss": 0.007, "step": 2475 }, { "epoch": 5.668934240362812, "grad_norm": 9.745643615722656, "learning_rate": 7.898947368421053e-06, "loss": 0.0065, "step": 2500 }, { "epoch": 5.72562358276644, "grad_norm": 1.0876612663269043, "learning_rate": 7.87263157894737e-06, "loss": 0.0464, "step": 2525 }, { "epoch": 5.782312925170068, "grad_norm": 3.2040324211120605, "learning_rate": 7.846315789473685e-06, "loss": 0.0085, "step": 2550 }, { "epoch": 5.839002267573696, "grad_norm": 0.07621826976537704, "learning_rate": 7.820000000000001e-06, "loss": 0.0417, "step": 2575 }, { "epoch": 5.895691609977324, "grad_norm": 0.11689532548189163, "learning_rate": 7.793684210526316e-06, "loss": 0.0113, "step": 2600 }, { "epoch": 5.9523809523809526, "grad_norm": 1.2615737915039062, "learning_rate": 7.767368421052633e-06, "loss": 0.017, "step": 2625 }, { "epoch": 6.00907029478458, "grad_norm": 6.465045928955078, "learning_rate": 7.741052631578948e-06, "loss": 0.0134, "step": 2650 }, { "epoch": 6.065759637188209, "grad_norm": 0.6422730088233948, "learning_rate": 7.714736842105263e-06, "loss": 0.0407, "step": 2675 }, { "epoch": 6.122448979591836, "grad_norm": 0.07282353192567825, "learning_rate": 7.68842105263158e-06, "loss": 0.0066, "step": 2700 }, { "epoch": 6.179138321995465, "grad_norm": 0.13422216475009918, "learning_rate": 7.662105263157895e-06, "loss": 0.0345, "step": 2725 }, { "epoch": 6.235827664399093, "grad_norm": 4.29299783706665, "learning_rate": 7.635789473684211e-06, "loss": 0.0047, "step": 2750 }, { "epoch": 6.292517006802721, "grad_norm": 0.05262218415737152, "learning_rate": 7.609473684210526e-06, "loss": 0.0023, "step": 2775 }, { "epoch": 6.349206349206349, "grad_norm": 6.577340602874756, "learning_rate": 7.583157894736842e-06, "loss": 0.005, "step": 2800 }, { "epoch": 6.405895691609977, "grad_norm": 0.2815414071083069, "learning_rate": 7.556842105263158e-06, "loss": 0.0155, "step": 2825 }, { "epoch": 6.462585034013605, "grad_norm": 0.05367182940244675, "learning_rate": 7.5305263157894745e-06, "loss": 0.04, "step": 2850 }, { "epoch": 6.519274376417234, "grad_norm": 0.14756247401237488, "learning_rate": 7.50421052631579e-06, "loss": 0.0049, "step": 2875 }, { "epoch": 6.575963718820862, "grad_norm": 2.2667605876922607, "learning_rate": 7.477894736842106e-06, "loss": 0.0028, "step": 2900 }, { "epoch": 6.63265306122449, "grad_norm": 0.13220186531543732, "learning_rate": 7.451578947368422e-06, "loss": 0.0114, "step": 2925 }, { "epoch": 6.6893424036281175, "grad_norm": 2.413485288619995, "learning_rate": 7.425263157894738e-06, "loss": 0.0028, "step": 2950 }, { "epoch": 6.746031746031746, "grad_norm": 1.9141589403152466, "learning_rate": 7.398947368421054e-06, "loss": 0.0079, "step": 2975 }, { "epoch": 6.802721088435375, "grad_norm": 5.837501525878906, "learning_rate": 7.3726315789473694e-06, "loss": 0.0294, "step": 3000 }, { "epoch": 6.802721088435375, "eval_loss": 0.19840003550052643, "eval_runtime": 156.6444, "eval_samples_per_second": 2.988, "eval_steps_per_second": 0.498, "eval_wer": 6.517094017094018, "step": 3000 }, { "epoch": 6.859410430839002, "grad_norm": 0.0996256172657013, "learning_rate": 7.346315789473684e-06, "loss": 0.0027, "step": 3025 }, { "epoch": 6.91609977324263, "grad_norm": 0.028051115572452545, "learning_rate": 7.32e-06, "loss": 0.008, "step": 3050 }, { "epoch": 6.9727891156462585, "grad_norm": 0.14238622784614563, "learning_rate": 7.293684210526316e-06, "loss": 0.0263, "step": 3075 }, { "epoch": 7.029478458049887, "grad_norm": 0.112076535820961, "learning_rate": 7.267368421052632e-06, "loss": 0.0192, "step": 3100 }, { "epoch": 7.086167800453515, "grad_norm": 0.14218856394290924, "learning_rate": 7.241052631578948e-06, "loss": 0.0099, "step": 3125 }, { "epoch": 7.142857142857143, "grad_norm": 0.08735861629247665, "learning_rate": 7.2147368421052635e-06, "loss": 0.0013, "step": 3150 }, { "epoch": 7.199546485260771, "grad_norm": 0.07858515530824661, "learning_rate": 7.18842105263158e-06, "loss": 0.043, "step": 3175 }, { "epoch": 7.2562358276643995, "grad_norm": 0.04395943507552147, "learning_rate": 7.162105263157896e-06, "loss": 0.0196, "step": 3200 }, { "epoch": 7.312925170068027, "grad_norm": 0.13118867576122284, "learning_rate": 7.135789473684212e-06, "loss": 0.0032, "step": 3225 }, { "epoch": 7.369614512471656, "grad_norm": 0.5723868608474731, "learning_rate": 7.109473684210528e-06, "loss": 0.0193, "step": 3250 }, { "epoch": 7.426303854875283, "grad_norm": 0.17946386337280273, "learning_rate": 7.08421052631579e-06, "loss": 0.0066, "step": 3275 }, { "epoch": 7.482993197278912, "grad_norm": 0.31329983472824097, "learning_rate": 7.057894736842106e-06, "loss": 0.004, "step": 3300 }, { "epoch": 7.5396825396825395, "grad_norm": 0.08119330555200577, "learning_rate": 7.031578947368422e-06, "loss": 0.0079, "step": 3325 }, { "epoch": 7.596371882086168, "grad_norm": 0.2911977767944336, "learning_rate": 7.005263157894738e-06, "loss": 0.0053, "step": 3350 }, { "epoch": 7.653061224489796, "grad_norm": 0.024690864607691765, "learning_rate": 6.9789473684210525e-06, "loss": 0.0054, "step": 3375 }, { "epoch": 7.709750566893424, "grad_norm": 0.09139782190322876, "learning_rate": 6.953684210526316e-06, "loss": 0.0043, "step": 3400 }, { "epoch": 7.766439909297052, "grad_norm": 0.3346998393535614, "learning_rate": 6.9273684210526326e-06, "loss": 0.0063, "step": 3425 }, { "epoch": 7.8231292517006805, "grad_norm": 2.5169262886047363, "learning_rate": 6.901052631578948e-06, "loss": 0.0036, "step": 3450 }, { "epoch": 7.879818594104308, "grad_norm": 0.05118921771645546, "learning_rate": 6.874736842105264e-06, "loss": 0.0152, "step": 3475 }, { "epoch": 7.936507936507937, "grad_norm": 0.141609787940979, "learning_rate": 6.84842105263158e-06, "loss": 0.0029, "step": 3500 }, { "epoch": 7.993197278911564, "grad_norm": 0.059452034533023834, "learning_rate": 6.822105263157896e-06, "loss": 0.0084, "step": 3525 }, { "epoch": 8.049886621315192, "grad_norm": 9.327384948730469, "learning_rate": 6.795789473684211e-06, "loss": 0.0056, "step": 3550 }, { "epoch": 8.106575963718821, "grad_norm": 0.031346723437309265, "learning_rate": 6.769473684210527e-06, "loss": 0.0095, "step": 3575 }, { "epoch": 8.16326530612245, "grad_norm": 0.03407048434019089, "learning_rate": 6.7431578947368425e-06, "loss": 0.031, "step": 3600 }, { "epoch": 8.219954648526077, "grad_norm": 0.023022985085844994, "learning_rate": 6.716842105263158e-06, "loss": 0.0061, "step": 3625 }, { "epoch": 8.276643990929704, "grad_norm": 0.1935151070356369, "learning_rate": 6.690526315789474e-06, "loss": 0.0049, "step": 3650 }, { "epoch": 8.333333333333334, "grad_norm": 1.2079312801361084, "learning_rate": 6.66421052631579e-06, "loss": 0.0107, "step": 3675 }, { "epoch": 8.390022675736962, "grad_norm": 0.20077985525131226, "learning_rate": 6.637894736842106e-06, "loss": 0.0021, "step": 3700 }, { "epoch": 8.44671201814059, "grad_norm": 18.450538635253906, "learning_rate": 6.611578947368421e-06, "loss": 0.0159, "step": 3725 }, { "epoch": 8.503401360544217, "grad_norm": 0.03869379311800003, "learning_rate": 6.585263157894738e-06, "loss": 0.0052, "step": 3750 }, { "epoch": 8.560090702947846, "grad_norm": 0.026166923344135284, "learning_rate": 6.558947368421054e-06, "loss": 0.0007, "step": 3775 }, { "epoch": 8.616780045351474, "grad_norm": 0.02891625091433525, "learning_rate": 6.532631578947369e-06, "loss": 0.0041, "step": 3800 }, { "epoch": 8.673469387755102, "grad_norm": 0.08599188923835754, "learning_rate": 6.506315789473685e-06, "loss": 0.0009, "step": 3825 }, { "epoch": 8.73015873015873, "grad_norm": 0.017651915550231934, "learning_rate": 6.480000000000001e-06, "loss": 0.0211, "step": 3850 }, { "epoch": 8.786848072562359, "grad_norm": 14.024144172668457, "learning_rate": 6.4536842105263165e-06, "loss": 0.021, "step": 3875 }, { "epoch": 8.843537414965986, "grad_norm": 0.057578567415475845, "learning_rate": 6.427368421052632e-06, "loss": 0.0021, "step": 3900 }, { "epoch": 8.900226757369614, "grad_norm": 0.04060237482190132, "learning_rate": 6.401052631578948e-06, "loss": 0.0025, "step": 3925 }, { "epoch": 8.956916099773242, "grad_norm": 0.04346761852502823, "learning_rate": 6.374736842105264e-06, "loss": 0.0055, "step": 3950 }, { "epoch": 9.013605442176871, "grad_norm": 0.014886971563100815, "learning_rate": 6.348421052631579e-06, "loss": 0.0067, "step": 3975 }, { "epoch": 9.070294784580499, "grad_norm": 2.309483289718628, "learning_rate": 6.322105263157895e-06, "loss": 0.0334, "step": 4000 }, { "epoch": 9.070294784580499, "eval_loss": 0.20987384021282196, "eval_runtime": 157.5775, "eval_samples_per_second": 2.97, "eval_steps_per_second": 0.495, "eval_wer": 6.303418803418803, "step": 4000 }, { "epoch": 9.126984126984127, "grad_norm": 0.2992580235004425, "learning_rate": 6.2957894736842105e-06, "loss": 0.0048, "step": 4025 }, { "epoch": 9.183673469387756, "grad_norm": 0.1327856183052063, "learning_rate": 6.269473684210526e-06, "loss": 0.0003, "step": 4050 }, { "epoch": 9.240362811791384, "grad_norm": 0.034492090344429016, "learning_rate": 6.243157894736842e-06, "loss": 0.0007, "step": 4075 }, { "epoch": 9.297052154195011, "grad_norm": 0.021785929799079895, "learning_rate": 6.216842105263159e-06, "loss": 0.0013, "step": 4100 }, { "epoch": 9.353741496598639, "grad_norm": 0.17917415499687195, "learning_rate": 6.190526315789475e-06, "loss": 0.0186, "step": 4125 }, { "epoch": 9.410430839002268, "grad_norm": 0.04263261333107948, "learning_rate": 6.1642105263157905e-06, "loss": 0.0011, "step": 4150 }, { "epoch": 9.467120181405896, "grad_norm": 0.06537426263093948, "learning_rate": 6.137894736842106e-06, "loss": 0.0007, "step": 4175 }, { "epoch": 9.523809523809524, "grad_norm": 0.05984114482998848, "learning_rate": 6.111578947368422e-06, "loss": 0.0031, "step": 4200 }, { "epoch": 9.580498866213151, "grad_norm": 0.0186479389667511, "learning_rate": 6.085263157894737e-06, "loss": 0.0067, "step": 4225 }, { "epoch": 9.63718820861678, "grad_norm": 0.01856757327914238, "learning_rate": 6.058947368421053e-06, "loss": 0.0012, "step": 4250 }, { "epoch": 9.693877551020408, "grad_norm": 0.01589180715382099, "learning_rate": 6.032631578947369e-06, "loss": 0.0033, "step": 4275 }, { "epoch": 9.750566893424036, "grad_norm": 0.013659857213497162, "learning_rate": 6.0063157894736845e-06, "loss": 0.0136, "step": 4300 }, { "epoch": 9.807256235827664, "grad_norm": 0.07311205565929413, "learning_rate": 5.98e-06, "loss": 0.0114, "step": 4325 }, { "epoch": 9.863945578231293, "grad_norm": 0.00827726535499096, "learning_rate": 5.953684210526316e-06, "loss": 0.0006, "step": 4350 }, { "epoch": 9.920634920634921, "grad_norm": 0.02168506383895874, "learning_rate": 5.927368421052632e-06, "loss": 0.0141, "step": 4375 }, { "epoch": 9.977324263038549, "grad_norm": 0.09996296465396881, "learning_rate": 5.901052631578947e-06, "loss": 0.0022, "step": 4400 }, { "epoch": 10.034013605442176, "grad_norm": 0.016953065991401672, "learning_rate": 5.8747368421052645e-06, "loss": 0.0024, "step": 4425 }, { "epoch": 10.090702947845806, "grad_norm": 0.0506548210978508, "learning_rate": 5.84842105263158e-06, "loss": 0.0049, "step": 4450 }, { "epoch": 10.147392290249433, "grad_norm": 0.017473401501774788, "learning_rate": 5.822105263157895e-06, "loss": 0.0009, "step": 4475 }, { "epoch": 10.204081632653061, "grad_norm": 0.016834545880556107, "learning_rate": 5.795789473684211e-06, "loss": 0.0016, "step": 4500 }, { "epoch": 10.260770975056689, "grad_norm": 0.07962112873792648, "learning_rate": 5.769473684210527e-06, "loss": 0.0005, "step": 4525 }, { "epoch": 10.317460317460318, "grad_norm": 0.020868808031082153, "learning_rate": 5.743157894736843e-06, "loss": 0.0162, "step": 4550 }, { "epoch": 10.374149659863946, "grad_norm": 0.0351821593940258, "learning_rate": 5.7168421052631585e-06, "loss": 0.0217, "step": 4575 }, { "epoch": 10.430839002267573, "grad_norm": 0.036179013550281525, "learning_rate": 5.690526315789474e-06, "loss": 0.0022, "step": 4600 }, { "epoch": 10.487528344671201, "grad_norm": 0.04735976830124855, "learning_rate": 5.66421052631579e-06, "loss": 0.0061, "step": 4625 }, { "epoch": 10.54421768707483, "grad_norm": 0.015306883491575718, "learning_rate": 5.637894736842105e-06, "loss": 0.0009, "step": 4650 }, { "epoch": 10.600907029478458, "grad_norm": 0.015260276384651661, "learning_rate": 5.611578947368421e-06, "loss": 0.0067, "step": 4675 }, { "epoch": 10.657596371882086, "grad_norm": 0.029503723606467247, "learning_rate": 5.585263157894737e-06, "loss": 0.0006, "step": 4700 }, { "epoch": 10.714285714285714, "grad_norm": 0.017955463379621506, "learning_rate": 5.558947368421053e-06, "loss": 0.0005, "step": 4725 }, { "epoch": 10.770975056689343, "grad_norm": 0.11942701041698456, "learning_rate": 5.532631578947368e-06, "loss": 0.003, "step": 4750 }, { "epoch": 10.82766439909297, "grad_norm": 0.02495400980114937, "learning_rate": 5.506315789473685e-06, "loss": 0.0017, "step": 4775 }, { "epoch": 10.884353741496598, "grad_norm": 0.1065245270729065, "learning_rate": 5.480000000000001e-06, "loss": 0.0047, "step": 4800 }, { "epoch": 10.941043083900226, "grad_norm": 0.10682205855846405, "learning_rate": 5.453684210526317e-06, "loss": 0.0045, "step": 4825 }, { "epoch": 10.997732426303855, "grad_norm": 0.016077643260359764, "learning_rate": 5.4273684210526325e-06, "loss": 0.0007, "step": 4850 }, { "epoch": 11.054421768707483, "grad_norm": 0.020081788301467896, "learning_rate": 5.401052631578948e-06, "loss": 0.0004, "step": 4875 }, { "epoch": 11.11111111111111, "grad_norm": 0.017382116988301277, "learning_rate": 5.374736842105263e-06, "loss": 0.0012, "step": 4900 }, { "epoch": 11.167800453514738, "grad_norm": 0.5074162483215332, "learning_rate": 5.348421052631579e-06, "loss": 0.0038, "step": 4925 }, { "epoch": 11.224489795918368, "grad_norm": 0.011525845155119896, "learning_rate": 5.322105263157895e-06, "loss": 0.0011, "step": 4950 }, { "epoch": 11.281179138321995, "grad_norm": 0.016790462657809258, "learning_rate": 5.295789473684211e-06, "loss": 0.0013, "step": 4975 }, { "epoch": 11.337868480725623, "grad_norm": 2.887037754058838, "learning_rate": 5.269473684210527e-06, "loss": 0.0011, "step": 5000 }, { "epoch": 11.337868480725623, "eval_loss": 0.2228717803955078, "eval_runtime": 177.4582, "eval_samples_per_second": 2.637, "eval_steps_per_second": 0.44, "eval_wer": 6.3835470085470085, "step": 5000 }, { "epoch": 11.39455782312925, "grad_norm": 0.02366207167506218, "learning_rate": 5.243157894736842e-06, "loss": 0.0002, "step": 5025 }, { "epoch": 11.45124716553288, "grad_norm": 1.1653227806091309, "learning_rate": 5.216842105263158e-06, "loss": 0.0052, "step": 5050 }, { "epoch": 11.507936507936508, "grad_norm": 0.2194329798221588, "learning_rate": 5.190526315789474e-06, "loss": 0.0014, "step": 5075 }, { "epoch": 11.564625850340136, "grad_norm": 0.03989162668585777, "learning_rate": 5.164210526315791e-06, "loss": 0.0006, "step": 5100 }, { "epoch": 11.621315192743765, "grad_norm": 0.02930096909403801, "learning_rate": 5.1378947368421065e-06, "loss": 0.003, "step": 5125 }, { "epoch": 11.678004535147393, "grad_norm": 0.011350632645189762, "learning_rate": 5.1115789473684215e-06, "loss": 0.0039, "step": 5150 }, { "epoch": 11.73469387755102, "grad_norm": 0.010673941113054752, "learning_rate": 5.085263157894737e-06, "loss": 0.0004, "step": 5175 }, { "epoch": 11.791383219954648, "grad_norm": 0.05096409469842911, "learning_rate": 5.058947368421053e-06, "loss": 0.0048, "step": 5200 }, { "epoch": 11.848072562358277, "grad_norm": 0.011835623532533646, "learning_rate": 5.032631578947369e-06, "loss": 0.0093, "step": 5225 }, { "epoch": 11.904761904761905, "grad_norm": 0.013359226286411285, "learning_rate": 5.006315789473685e-06, "loss": 0.0062, "step": 5250 }, { "epoch": 11.961451247165533, "grad_norm": 0.03088083118200302, "learning_rate": 4.980000000000001e-06, "loss": 0.0002, "step": 5275 }, { "epoch": 12.01814058956916, "grad_norm": 0.02855735644698143, "learning_rate": 4.953684210526316e-06, "loss": 0.0002, "step": 5300 }, { "epoch": 12.07482993197279, "grad_norm": 0.04174978658556938, "learning_rate": 4.927368421052631e-06, "loss": 0.0002, "step": 5325 }, { "epoch": 12.131519274376418, "grad_norm": 0.013824643567204475, "learning_rate": 4.901052631578947e-06, "loss": 0.0002, "step": 5350 }, { "epoch": 12.188208616780045, "grad_norm": 0.010323552414774895, "learning_rate": 4.874736842105264e-06, "loss": 0.0002, "step": 5375 }, { "epoch": 12.244897959183673, "grad_norm": 7.9211883544921875, "learning_rate": 4.84842105263158e-06, "loss": 0.0041, "step": 5400 }, { "epoch": 12.301587301587302, "grad_norm": 0.011090376414358616, "learning_rate": 4.8221052631578955e-06, "loss": 0.0046, "step": 5425 }, { "epoch": 12.35827664399093, "grad_norm": 0.993200957775116, "learning_rate": 4.7957894736842105e-06, "loss": 0.0077, "step": 5450 }, { "epoch": 12.414965986394558, "grad_norm": 0.5387348532676697, "learning_rate": 4.769473684210526e-06, "loss": 0.0014, "step": 5475 }, { "epoch": 12.471655328798185, "grad_norm": 0.030508503317832947, "learning_rate": 4.743157894736842e-06, "loss": 0.0029, "step": 5500 }, { "epoch": 12.528344671201815, "grad_norm": 0.015367632731795311, "learning_rate": 4.716842105263159e-06, "loss": 0.002, "step": 5525 }, { "epoch": 12.585034013605442, "grad_norm": 0.01937568373978138, "learning_rate": 4.690526315789475e-06, "loss": 0.0016, "step": 5550 }, { "epoch": 12.64172335600907, "grad_norm": 0.06024911627173424, "learning_rate": 4.6642105263157896e-06, "loss": 0.0016, "step": 5575 }, { "epoch": 12.698412698412698, "grad_norm": 0.005301471799612045, "learning_rate": 4.637894736842105e-06, "loss": 0.0014, "step": 5600 }, { "epoch": 12.755102040816327, "grad_norm": 0.03389279916882515, "learning_rate": 4.611578947368421e-06, "loss": 0.0017, "step": 5625 }, { "epoch": 12.811791383219955, "grad_norm": 0.13554659485816956, "learning_rate": 4.585263157894737e-06, "loss": 0.0105, "step": 5650 }, { "epoch": 12.868480725623582, "grad_norm": 0.018741684034466743, "learning_rate": 4.558947368421053e-06, "loss": 0.004, "step": 5675 }, { "epoch": 12.92517006802721, "grad_norm": 0.1560622751712799, "learning_rate": 4.532631578947369e-06, "loss": 0.0005, "step": 5700 }, { "epoch": 12.98185941043084, "grad_norm": 0.1737220734357834, "learning_rate": 4.5063157894736845e-06, "loss": 0.0019, "step": 5725 }, { "epoch": 13.038548752834467, "grad_norm": 0.02937311679124832, "learning_rate": 4.48e-06, "loss": 0.002, "step": 5750 }, { "epoch": 13.095238095238095, "grad_norm": 0.006793774198740721, "learning_rate": 4.453684210526316e-06, "loss": 0.0005, "step": 5775 }, { "epoch": 13.151927437641723, "grad_norm": 0.014649259857833385, "learning_rate": 4.427368421052632e-06, "loss": 0.0025, "step": 5800 }, { "epoch": 13.208616780045352, "grad_norm": 0.0212300606071949, "learning_rate": 4.401052631578948e-06, "loss": 0.0014, "step": 5825 }, { "epoch": 13.26530612244898, "grad_norm": 0.013082647696137428, "learning_rate": 4.374736842105264e-06, "loss": 0.0039, "step": 5850 }, { "epoch": 13.321995464852607, "grad_norm": 0.05612126737833023, "learning_rate": 4.348421052631579e-06, "loss": 0.0007, "step": 5875 }, { "epoch": 13.378684807256235, "grad_norm": 0.37419337034225464, "learning_rate": 4.322105263157895e-06, "loss": 0.0012, "step": 5900 }, { "epoch": 13.435374149659864, "grad_norm": 0.03296487405896187, "learning_rate": 4.295789473684211e-06, "loss": 0.0043, "step": 5925 }, { "epoch": 13.492063492063492, "grad_norm": 0.009840169921517372, "learning_rate": 4.269473684210527e-06, "loss": 0.0039, "step": 5950 }, { "epoch": 13.54875283446712, "grad_norm": 0.015135574154555798, "learning_rate": 4.243157894736843e-06, "loss": 0.0014, "step": 5975 }, { "epoch": 13.60544217687075, "grad_norm": 0.022306112572550774, "learning_rate": 4.2168421052631585e-06, "loss": 0.0001, "step": 6000 }, { "epoch": 13.60544217687075, "eval_loss": 0.22002862393856049, "eval_runtime": 147.8825, "eval_samples_per_second": 3.165, "eval_steps_per_second": 0.527, "eval_wer": 6.209935897435898, "step": 6000 }, { "epoch": 13.662131519274377, "grad_norm": 0.0066378237679600716, "learning_rate": 4.1905263157894735e-06, "loss": 0.0007, "step": 6025 }, { "epoch": 13.718820861678005, "grad_norm": 0.010166754946112633, "learning_rate": 4.16421052631579e-06, "loss": 0.0008, "step": 6050 }, { "epoch": 13.775510204081632, "grad_norm": 0.0136796273291111, "learning_rate": 4.137894736842106e-06, "loss": 0.0009, "step": 6075 }, { "epoch": 13.83219954648526, "grad_norm": 0.017127549275755882, "learning_rate": 4.111578947368422e-06, "loss": 0.0015, "step": 6100 }, { "epoch": 13.88888888888889, "grad_norm": 0.024442024528980255, "learning_rate": 4.085263157894737e-06, "loss": 0.0032, "step": 6125 }, { "epoch": 13.945578231292517, "grad_norm": 0.17017020285129547, "learning_rate": 4.0589473684210526e-06, "loss": 0.0015, "step": 6150 }, { "epoch": 14.002267573696145, "grad_norm": 0.00828185211867094, "learning_rate": 4.032631578947368e-06, "loss": 0.0026, "step": 6175 }, { "epoch": 14.058956916099774, "grad_norm": 0.007860764861106873, "learning_rate": 4.006315789473684e-06, "loss": 0.0034, "step": 6200 }, { "epoch": 14.115646258503402, "grad_norm": 0.011614521034061909, "learning_rate": 3.980000000000001e-06, "loss": 0.0044, "step": 6225 }, { "epoch": 14.17233560090703, "grad_norm": 0.018276942893862724, "learning_rate": 3.953684210526316e-06, "loss": 0.0004, "step": 6250 }, { "epoch": 14.229024943310657, "grad_norm": 0.012453128583729267, "learning_rate": 3.927368421052632e-06, "loss": 0.0002, "step": 6275 }, { "epoch": 14.285714285714286, "grad_norm": 0.015684612095355988, "learning_rate": 3.9010526315789475e-06, "loss": 0.0003, "step": 6300 }, { "epoch": 14.342403628117914, "grad_norm": 0.006171511020511389, "learning_rate": 3.874736842105263e-06, "loss": 0.0001, "step": 6325 }, { "epoch": 14.399092970521542, "grad_norm": 0.006511132698506117, "learning_rate": 3.848421052631579e-06, "loss": 0.0004, "step": 6350 }, { "epoch": 14.45578231292517, "grad_norm": 0.050404928624629974, "learning_rate": 3.822105263157895e-06, "loss": 0.0002, "step": 6375 }, { "epoch": 14.512471655328799, "grad_norm": 10.834954261779785, "learning_rate": 3.795789473684211e-06, "loss": 0.0012, "step": 6400 }, { "epoch": 14.569160997732427, "grad_norm": 0.13081848621368408, "learning_rate": 3.7694736842105266e-06, "loss": 0.0016, "step": 6425 }, { "epoch": 14.625850340136054, "grad_norm": 0.011676596477627754, "learning_rate": 3.7431578947368424e-06, "loss": 0.0044, "step": 6450 }, { "epoch": 14.682539682539682, "grad_norm": 0.014040385372936726, "learning_rate": 3.716842105263158e-06, "loss": 0.0001, "step": 6475 }, { "epoch": 14.739229024943311, "grad_norm": 0.011808693408966064, "learning_rate": 3.690526315789474e-06, "loss": 0.0001, "step": 6500 }, { "epoch": 14.795918367346939, "grad_norm": 0.012787124142050743, "learning_rate": 3.6642105263157894e-06, "loss": 0.0045, "step": 6525 }, { "epoch": 14.852607709750567, "grad_norm": 0.005284798797219992, "learning_rate": 3.6378947368421057e-06, "loss": 0.0001, "step": 6550 }, { "epoch": 14.909297052154194, "grad_norm": 0.036842990666627884, "learning_rate": 3.6115789473684215e-06, "loss": 0.0004, "step": 6575 }, { "epoch": 14.965986394557824, "grad_norm": 0.024015046656131744, "learning_rate": 3.5852631578947373e-06, "loss": 0.0003, "step": 6600 }, { "epoch": 15.022675736961451, "grad_norm": 0.012714927084743977, "learning_rate": 3.558947368421053e-06, "loss": 0.0025, "step": 6625 }, { "epoch": 15.079365079365079, "grad_norm": 0.017115842550992966, "learning_rate": 3.5326315789473685e-06, "loss": 0.0012, "step": 6650 }, { "epoch": 15.136054421768707, "grad_norm": 0.008900342509150505, "learning_rate": 3.5063157894736843e-06, "loss": 0.0001, "step": 6675 }, { "epoch": 15.192743764172336, "grad_norm": 0.007803457789123058, "learning_rate": 3.48e-06, "loss": 0.0004, "step": 6700 }, { "epoch": 15.249433106575964, "grad_norm": 0.013770255260169506, "learning_rate": 3.4536842105263164e-06, "loss": 0.0011, "step": 6725 }, { "epoch": 15.306122448979592, "grad_norm": 0.06877677142620087, "learning_rate": 3.427368421052632e-06, "loss": 0.0018, "step": 6750 }, { "epoch": 15.36281179138322, "grad_norm": 0.011991630308330059, "learning_rate": 3.4010526315789476e-06, "loss": 0.0001, "step": 6775 }, { "epoch": 15.419501133786849, "grad_norm": 0.013049086555838585, "learning_rate": 3.3747368421052634e-06, "loss": 0.0002, "step": 6800 }, { "epoch": 15.476190476190476, "grad_norm": 0.012979848310351372, "learning_rate": 3.3484210526315792e-06, "loss": 0.0002, "step": 6825 }, { "epoch": 15.532879818594104, "grad_norm": 0.006456771399825811, "learning_rate": 3.3221052631578946e-06, "loss": 0.0001, "step": 6850 }, { "epoch": 15.589569160997732, "grad_norm": 0.012274966575205326, "learning_rate": 3.2957894736842104e-06, "loss": 0.0001, "step": 6875 }, { "epoch": 15.646258503401361, "grad_norm": 0.010561280883848667, "learning_rate": 3.2694736842105267e-06, "loss": 0.0004, "step": 6900 }, { "epoch": 15.702947845804989, "grad_norm": 0.009232975542545319, "learning_rate": 3.2431578947368425e-06, "loss": 0.0002, "step": 6925 }, { "epoch": 15.759637188208616, "grad_norm": 0.015166404657065868, "learning_rate": 3.2168421052631583e-06, "loss": 0.0001, "step": 6950 }, { "epoch": 15.816326530612244, "grad_norm": 0.0094530014321208, "learning_rate": 3.1905263157894737e-06, "loss": 0.0001, "step": 6975 }, { "epoch": 15.873015873015873, "grad_norm": 0.017429711297154427, "learning_rate": 3.1642105263157895e-06, "loss": 0.0001, "step": 7000 }, { "epoch": 15.873015873015873, "eval_loss": 0.2297230064868927, "eval_runtime": 188.711, "eval_samples_per_second": 2.48, "eval_steps_per_second": 0.413, "eval_wer": 6.049679487179487, "step": 7000 }, { "epoch": 15.929705215419501, "grad_norm": 0.005318532232195139, "learning_rate": 3.1378947368421054e-06, "loss": 0.001, "step": 7025 }, { "epoch": 15.986394557823129, "grad_norm": 0.011030340567231178, "learning_rate": 3.111578947368421e-06, "loss": 0.0001, "step": 7050 }, { "epoch": 16.04308390022676, "grad_norm": 0.004018599167466164, "learning_rate": 3.0852631578947374e-06, "loss": 0.0001, "step": 7075 }, { "epoch": 16.099773242630384, "grad_norm": 0.006292372010648251, "learning_rate": 3.058947368421053e-06, "loss": 0.0063, "step": 7100 }, { "epoch": 16.156462585034014, "grad_norm": 0.008721155114471912, "learning_rate": 3.0326315789473686e-06, "loss": 0.0001, "step": 7125 }, { "epoch": 16.213151927437643, "grad_norm": 0.007462701760232449, "learning_rate": 3.0063157894736844e-06, "loss": 0.0013, "step": 7150 }, { "epoch": 16.26984126984127, "grad_norm": 0.0060442672111094, "learning_rate": 2.9800000000000003e-06, "loss": 0.0002, "step": 7175 }, { "epoch": 16.3265306122449, "grad_norm": 0.007179939653724432, "learning_rate": 2.9536842105263157e-06, "loss": 0.0001, "step": 7200 }, { "epoch": 16.383219954648528, "grad_norm": 0.006970668211579323, "learning_rate": 2.927368421052632e-06, "loss": 0.0025, "step": 7225 }, { "epoch": 16.439909297052154, "grad_norm": 0.0061109112575650215, "learning_rate": 2.9010526315789477e-06, "loss": 0.0003, "step": 7250 }, { "epoch": 16.496598639455783, "grad_norm": 0.02214565873146057, "learning_rate": 2.8747368421052635e-06, "loss": 0.0001, "step": 7275 }, { "epoch": 16.55328798185941, "grad_norm": 0.009677527472376823, "learning_rate": 2.8484210526315794e-06, "loss": 0.0001, "step": 7300 }, { "epoch": 16.60997732426304, "grad_norm": 3.2391059398651123, "learning_rate": 2.8221052631578948e-06, "loss": 0.0013, "step": 7325 }, { "epoch": 16.666666666666668, "grad_norm": 0.005941161885857582, "learning_rate": 2.7957894736842106e-06, "loss": 0.0001, "step": 7350 }, { "epoch": 16.723356009070294, "grad_norm": 0.005041074473410845, "learning_rate": 2.7694736842105264e-06, "loss": 0.0001, "step": 7375 }, { "epoch": 16.780045351473923, "grad_norm": 0.010110282339155674, "learning_rate": 2.7431578947368426e-06, "loss": 0.0001, "step": 7400 }, { "epoch": 16.836734693877553, "grad_norm": 0.0052981507033109665, "learning_rate": 2.7168421052631585e-06, "loss": 0.0001, "step": 7425 }, { "epoch": 16.89342403628118, "grad_norm": 0.007062564603984356, "learning_rate": 2.690526315789474e-06, "loss": 0.0004, "step": 7450 }, { "epoch": 16.950113378684808, "grad_norm": 0.005766382906585932, "learning_rate": 2.6642105263157897e-06, "loss": 0.0001, "step": 7475 }, { "epoch": 17.006802721088434, "grad_norm": 8.800177574157715, "learning_rate": 2.6378947368421055e-06, "loss": 0.0022, "step": 7500 }, { "epoch": 17.063492063492063, "grad_norm": 0.003864010563120246, "learning_rate": 2.6115789473684213e-06, "loss": 0.0001, "step": 7525 }, { "epoch": 17.120181405895693, "grad_norm": 0.003043045522645116, "learning_rate": 2.5852631578947367e-06, "loss": 0.0001, "step": 7550 }, { "epoch": 17.17687074829932, "grad_norm": 0.0026129058096557856, "learning_rate": 2.558947368421053e-06, "loss": 0.0001, "step": 7575 }, { "epoch": 17.233560090702948, "grad_norm": 0.003994261380285025, "learning_rate": 2.5326315789473688e-06, "loss": 0.0001, "step": 7600 }, { "epoch": 17.290249433106577, "grad_norm": 0.006509356200695038, "learning_rate": 2.5063157894736846e-06, "loss": 0.0001, "step": 7625 }, { "epoch": 17.346938775510203, "grad_norm": 0.006231856532394886, "learning_rate": 2.4800000000000004e-06, "loss": 0.0001, "step": 7650 }, { "epoch": 17.403628117913833, "grad_norm": 0.00826491229236126, "learning_rate": 2.453684210526316e-06, "loss": 0.0027, "step": 7675 }, { "epoch": 17.46031746031746, "grad_norm": 0.007504597306251526, "learning_rate": 2.427368421052632e-06, "loss": 0.0001, "step": 7700 }, { "epoch": 17.517006802721088, "grad_norm": 0.0033706706017255783, "learning_rate": 2.4010526315789474e-06, "loss": 0.0001, "step": 7725 }, { "epoch": 17.573696145124718, "grad_norm": 0.004383792169392109, "learning_rate": 2.3747368421052632e-06, "loss": 0.0018, "step": 7750 }, { "epoch": 17.630385487528343, "grad_norm": 0.004007370211184025, "learning_rate": 2.348421052631579e-06, "loss": 0.0001, "step": 7775 }, { "epoch": 17.687074829931973, "grad_norm": 0.006295809056609869, "learning_rate": 2.322105263157895e-06, "loss": 0.0001, "step": 7800 }, { "epoch": 17.743764172335602, "grad_norm": 0.006831544451415539, "learning_rate": 2.2957894736842107e-06, "loss": 0.0003, "step": 7825 }, { "epoch": 17.800453514739228, "grad_norm": 0.0033715348690748215, "learning_rate": 2.2694736842105265e-06, "loss": 0.0001, "step": 7850 }, { "epoch": 17.857142857142858, "grad_norm": 0.006168752908706665, "learning_rate": 2.2431578947368423e-06, "loss": 0.0012, "step": 7875 }, { "epoch": 17.913832199546484, "grad_norm": 0.006377949379384518, "learning_rate": 2.216842105263158e-06, "loss": 0.0052, "step": 7900 }, { "epoch": 17.970521541950113, "grad_norm": 1.9607151746749878, "learning_rate": 2.190526315789474e-06, "loss": 0.0004, "step": 7925 }, { "epoch": 18.027210884353742, "grad_norm": 0.0046304683201014996, "learning_rate": 2.16421052631579e-06, "loss": 0.001, "step": 7950 }, { "epoch": 18.08390022675737, "grad_norm": 0.008269163779914379, "learning_rate": 2.1378947368421056e-06, "loss": 0.0001, "step": 7975 }, { "epoch": 18.140589569160998, "grad_norm": 0.0044001140631735325, "learning_rate": 2.111578947368421e-06, "loss": 0.0001, "step": 8000 }, { "epoch": 18.140589569160998, "eval_loss": 0.2317376732826233, "eval_runtime": 158.0944, "eval_samples_per_second": 2.96, "eval_steps_per_second": 0.493, "eval_wer": 6.076388888888888, "step": 8000 }, { "epoch": 18.197278911564627, "grad_norm": 0.006642814259976149, "learning_rate": 2.085263157894737e-06, "loss": 0.0001, "step": 8025 }, { "epoch": 18.253968253968253, "grad_norm": 0.00599477905780077, "learning_rate": 2.058947368421053e-06, "loss": 0.0001, "step": 8050 }, { "epoch": 18.310657596371883, "grad_norm": 0.0045234388671815395, "learning_rate": 2.0326315789473685e-06, "loss": 0.0001, "step": 8075 }, { "epoch": 18.367346938775512, "grad_norm": 0.002616587560623884, "learning_rate": 2.0063157894736843e-06, "loss": 0.0001, "step": 8100 }, { "epoch": 18.424036281179138, "grad_norm": 0.0050145648419857025, "learning_rate": 1.98e-06, "loss": 0.0001, "step": 8125 }, { "epoch": 18.480725623582767, "grad_norm": 0.0029045080300420523, "learning_rate": 1.953684210526316e-06, "loss": 0.0002, "step": 8150 }, { "epoch": 18.537414965986393, "grad_norm": 0.0041219014674425125, "learning_rate": 1.9273684210526317e-06, "loss": 0.0009, "step": 8175 }, { "epoch": 18.594104308390023, "grad_norm": 0.006918082479387522, "learning_rate": 1.9010526315789476e-06, "loss": 0.0001, "step": 8200 }, { "epoch": 18.650793650793652, "grad_norm": 0.0045529440976679325, "learning_rate": 1.8747368421052634e-06, "loss": 0.0005, "step": 8225 }, { "epoch": 18.707482993197278, "grad_norm": 0.004559030756354332, "learning_rate": 1.848421052631579e-06, "loss": 0.0001, "step": 8250 }, { "epoch": 18.764172335600907, "grad_norm": 0.003188680624589324, "learning_rate": 1.8221052631578948e-06, "loss": 0.0001, "step": 8275 }, { "epoch": 18.820861678004537, "grad_norm": 0.0051582190208137035, "learning_rate": 1.7957894736842108e-06, "loss": 0.0001, "step": 8300 }, { "epoch": 18.877551020408163, "grad_norm": 0.003925441298633814, "learning_rate": 1.7694736842105264e-06, "loss": 0.0001, "step": 8325 }, { "epoch": 18.934240362811792, "grad_norm": 0.005711190402507782, "learning_rate": 1.7431578947368423e-06, "loss": 0.0, "step": 8350 }, { "epoch": 18.990929705215418, "grad_norm": 0.006396492477506399, "learning_rate": 1.716842105263158e-06, "loss": 0.0002, "step": 8375 }, { "epoch": 19.047619047619047, "grad_norm": 0.0038605357985943556, "learning_rate": 1.6905263157894739e-06, "loss": 0.0001, "step": 8400 }, { "epoch": 19.104308390022677, "grad_norm": 0.003054672619327903, "learning_rate": 1.6642105263157895e-06, "loss": 0.0, "step": 8425 }, { "epoch": 19.160997732426303, "grad_norm": 0.0045293658040463924, "learning_rate": 1.6378947368421053e-06, "loss": 0.0, "step": 8450 }, { "epoch": 19.217687074829932, "grad_norm": 0.005090142600238323, "learning_rate": 1.6115789473684211e-06, "loss": 0.0001, "step": 8475 }, { "epoch": 19.27437641723356, "grad_norm": 0.003649334190413356, "learning_rate": 1.585263157894737e-06, "loss": 0.0, "step": 8500 }, { "epoch": 19.331065759637188, "grad_norm": 0.014431001618504524, "learning_rate": 1.5589473684210526e-06, "loss": 0.0001, "step": 8525 }, { "epoch": 19.387755102040817, "grad_norm": 0.005074130836874247, "learning_rate": 1.5326315789473686e-06, "loss": 0.0, "step": 8550 }, { "epoch": 19.444444444444443, "grad_norm": 0.0036257512401789427, "learning_rate": 1.5063157894736844e-06, "loss": 0.0001, "step": 8575 }, { "epoch": 19.501133786848072, "grad_norm": 0.0060266111977398396, "learning_rate": 1.48e-06, "loss": 0.0001, "step": 8600 }, { "epoch": 19.5578231292517, "grad_norm": 0.004775646608322859, "learning_rate": 1.453684210526316e-06, "loss": 0.0, "step": 8625 }, { "epoch": 19.614512471655328, "grad_norm": 0.006195446942001581, "learning_rate": 1.4273684210526317e-06, "loss": 0.0003, "step": 8650 }, { "epoch": 19.671201814058957, "grad_norm": 0.0044461763463914394, "learning_rate": 1.4010526315789475e-06, "loss": 0.0001, "step": 8675 }, { "epoch": 19.727891156462587, "grad_norm": 0.0022899750620126724, "learning_rate": 1.374736842105263e-06, "loss": 0.0, "step": 8700 }, { "epoch": 19.784580498866212, "grad_norm": 0.005077675450593233, "learning_rate": 1.3484210526315791e-06, "loss": 0.0001, "step": 8725 }, { "epoch": 19.841269841269842, "grad_norm": 0.005482817534357309, "learning_rate": 1.322105263157895e-06, "loss": 0.0, "step": 8750 }, { "epoch": 19.897959183673468, "grad_norm": 0.00202305824495852, "learning_rate": 1.2957894736842105e-06, "loss": 0.0001, "step": 8775 }, { "epoch": 19.954648526077097, "grad_norm": 0.005788388196378946, "learning_rate": 1.2694736842105266e-06, "loss": 0.0001, "step": 8800 }, { "epoch": 20.011337868480727, "grad_norm": 0.004344166722148657, "learning_rate": 1.2431578947368422e-06, "loss": 0.0014, "step": 8825 }, { "epoch": 20.068027210884352, "grad_norm": 0.004923074971884489, "learning_rate": 1.216842105263158e-06, "loss": 0.0, "step": 8850 }, { "epoch": 20.124716553287982, "grad_norm": 0.0024910017382353544, "learning_rate": 1.1905263157894738e-06, "loss": 0.0, "step": 8875 }, { "epoch": 20.18140589569161, "grad_norm": 0.0026297103613615036, "learning_rate": 1.1642105263157896e-06, "loss": 0.0, "step": 8900 }, { "epoch": 20.238095238095237, "grad_norm": 0.005076109431684017, "learning_rate": 1.1378947368421052e-06, "loss": 0.0001, "step": 8925 }, { "epoch": 20.294784580498867, "grad_norm": 0.003189537674188614, "learning_rate": 1.1115789473684213e-06, "loss": 0.0, "step": 8950 }, { "epoch": 20.351473922902493, "grad_norm": 0.0030386645812541246, "learning_rate": 1.0852631578947369e-06, "loss": 0.0, "step": 8975 }, { "epoch": 20.408163265306122, "grad_norm": 0.001944896299391985, "learning_rate": 1.0589473684210527e-06, "loss": 0.0001, "step": 9000 }, { "epoch": 20.408163265306122, "eval_loss": 0.23750941455364227, "eval_runtime": 160.3655, "eval_samples_per_second": 2.918, "eval_steps_per_second": 0.486, "eval_wer": 6.396901709401709, "step": 9000 }, { "epoch": 20.46485260770975, "grad_norm": 0.0040268674492836, "learning_rate": 1.0326315789473685e-06, "loss": 0.0, "step": 9025 }, { "epoch": 20.521541950113377, "grad_norm": 2.8923232555389404, "learning_rate": 1.0063157894736843e-06, "loss": 0.0002, "step": 9050 }, { "epoch": 20.578231292517007, "grad_norm": 0.002543982584029436, "learning_rate": 9.800000000000001e-07, "loss": 0.0, "step": 9075 }, { "epoch": 20.634920634920636, "grad_norm": 0.002120724180713296, "learning_rate": 9.536842105263158e-07, "loss": 0.0002, "step": 9100 }, { "epoch": 20.691609977324262, "grad_norm": 0.0036805281415581703, "learning_rate": 9.273684210526317e-07, "loss": 0.0, "step": 9125 }, { "epoch": 20.74829931972789, "grad_norm": 0.002348339883610606, "learning_rate": 9.010526315789474e-07, "loss": 0.0001, "step": 9150 }, { "epoch": 20.80498866213152, "grad_norm": 0.0036750957369804382, "learning_rate": 8.747368421052632e-07, "loss": 0.0, "step": 9175 }, { "epoch": 20.861678004535147, "grad_norm": 0.004974485840648413, "learning_rate": 8.48421052631579e-07, "loss": 0.0, "step": 9200 }, { "epoch": 20.918367346938776, "grad_norm": 0.004362870939075947, "learning_rate": 8.221052631578947e-07, "loss": 0.0001, "step": 9225 }, { "epoch": 20.975056689342402, "grad_norm": 0.002504055853933096, "learning_rate": 7.957894736842107e-07, "loss": 0.0001, "step": 9250 }, { "epoch": 21.03174603174603, "grad_norm": 0.004000427667051554, "learning_rate": 7.694736842105263e-07, "loss": 0.0001, "step": 9275 }, { "epoch": 21.08843537414966, "grad_norm": 0.003886349266394973, "learning_rate": 7.431578947368422e-07, "loss": 0.0, "step": 9300 }, { "epoch": 21.145124716553287, "grad_norm": 0.003481630701571703, "learning_rate": 7.16842105263158e-07, "loss": 0.0, "step": 9325 }, { "epoch": 21.201814058956916, "grad_norm": 0.01767110824584961, "learning_rate": 6.905263157894737e-07, "loss": 0.0, "step": 9350 }, { "epoch": 21.258503401360546, "grad_norm": 0.005323050078004599, "learning_rate": 6.642105263157895e-07, "loss": 0.0, "step": 9375 }, { "epoch": 21.31519274376417, "grad_norm": 0.05196991562843323, "learning_rate": 6.378947368421053e-07, "loss": 0.0001, "step": 9400 }, { "epoch": 21.3718820861678, "grad_norm": 0.003023393452167511, "learning_rate": 6.115789473684211e-07, "loss": 0.0, "step": 9425 }, { "epoch": 21.428571428571427, "grad_norm": 0.0037847934290766716, "learning_rate": 5.852631578947369e-07, "loss": 0.0, "step": 9450 }, { "epoch": 21.485260770975056, "grad_norm": 0.0039050974883139133, "learning_rate": 5.589473684210526e-07, "loss": 0.0001, "step": 9475 }, { "epoch": 21.541950113378686, "grad_norm": 0.0036291517317295074, "learning_rate": 5.326315789473684e-07, "loss": 0.0001, "step": 9500 }, { "epoch": 21.598639455782312, "grad_norm": 0.003725625341758132, "learning_rate": 5.063157894736842e-07, "loss": 0.0001, "step": 9525 }, { "epoch": 21.65532879818594, "grad_norm": 0.0034233913756906986, "learning_rate": 4.800000000000001e-07, "loss": 0.0, "step": 9550 }, { "epoch": 21.71201814058957, "grad_norm": 0.005571336485445499, "learning_rate": 4.5368421052631583e-07, "loss": 0.0, "step": 9575 }, { "epoch": 21.768707482993197, "grad_norm": 0.0034070161636918783, "learning_rate": 4.273684210526316e-07, "loss": 0.0001, "step": 9600 }, { "epoch": 21.825396825396826, "grad_norm": 0.0027184481732547283, "learning_rate": 4.0105263157894736e-07, "loss": 0.0, "step": 9625 }, { "epoch": 21.882086167800452, "grad_norm": 0.001929171965457499, "learning_rate": 3.7473684210526323e-07, "loss": 0.0, "step": 9650 }, { "epoch": 21.93877551020408, "grad_norm": 0.003997990861535072, "learning_rate": 3.48421052631579e-07, "loss": 0.0, "step": 9675 }, { "epoch": 21.99546485260771, "grad_norm": 0.0038647083565592766, "learning_rate": 3.2210526315789476e-07, "loss": 0.0, "step": 9700 }, { "epoch": 22.052154195011337, "grad_norm": 0.00401474442332983, "learning_rate": 2.9578947368421053e-07, "loss": 0.0, "step": 9725 }, { "epoch": 22.108843537414966, "grad_norm": 0.0030088857747614384, "learning_rate": 2.6947368421052635e-07, "loss": 0.0001, "step": 9750 }, { "epoch": 22.165532879818596, "grad_norm": 0.003003130666911602, "learning_rate": 2.431578947368421e-07, "loss": 0.0, "step": 9775 }, { "epoch": 22.22222222222222, "grad_norm": 0.004516700282692909, "learning_rate": 2.168421052631579e-07, "loss": 0.0, "step": 9800 }, { "epoch": 22.27891156462585, "grad_norm": 0.002650737063959241, "learning_rate": 1.9052631578947372e-07, "loss": 0.0, "step": 9825 }, { "epoch": 22.335600907029477, "grad_norm": 0.00888384971767664, "learning_rate": 1.642105263157895e-07, "loss": 0.0, "step": 9850 }, { "epoch": 22.392290249433106, "grad_norm": 0.0020597188267856836, "learning_rate": 1.3789473684210528e-07, "loss": 0.0, "step": 9875 }, { "epoch": 22.448979591836736, "grad_norm": 0.004189135041087866, "learning_rate": 1.1157894736842106e-07, "loss": 0.0, "step": 9900 }, { "epoch": 22.50566893424036, "grad_norm": 0.003128908108919859, "learning_rate": 8.526315789473685e-08, "loss": 0.0, "step": 9925 }, { "epoch": 22.56235827664399, "grad_norm": 0.0031592377927154303, "learning_rate": 5.8947368421052637e-08, "loss": 0.0, "step": 9950 }, { "epoch": 22.61904761904762, "grad_norm": 0.004137367941439152, "learning_rate": 3.263157894736842e-08, "loss": 0.0, "step": 9975 }, { "epoch": 22.675736961451246, "grad_norm": 0.0022842560429126024, "learning_rate": 6.315789473684211e-09, "loss": 0.0, "step": 10000 }, { "epoch": 22.675736961451246, "eval_loss": 0.23789168894290924, "eval_runtime": 182.0655, "eval_samples_per_second": 2.571, "eval_steps_per_second": 0.428, "eval_wer": 6.490384615384616, "step": 10000 }, { "epoch": 22.675736961451246, "step": 10000, "total_flos": 1.73151240192e+19, "train_loss": 0.06552563527043676, "train_runtime": 7527.9049, "train_samples_per_second": 7.97, "train_steps_per_second": 1.328 } ], "logging_steps": 25, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 23, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.73151240192e+19, "train_batch_size": 6, "trial_name": null, "trial_params": null }