{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.456659619450317, "eval_steps": 1000, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.021141649048625793, "grad_norm": 13.565643310546875, "learning_rate": 4.800000000000001e-07, "loss": 1.0091, "step": 25 }, { "epoch": 0.042283298097251586, "grad_norm": 9.143893241882324, "learning_rate": 9.800000000000001e-07, "loss": 0.7687, "step": 50 }, { "epoch": 0.06342494714587738, "grad_norm": 6.373236179351807, "learning_rate": 1.48e-06, "loss": 0.3227, "step": 75 }, { "epoch": 0.08456659619450317, "grad_norm": 5.452936172485352, "learning_rate": 1.98e-06, "loss": 0.2331, "step": 100 }, { "epoch": 0.10570824524312897, "grad_norm": 5.6975626945495605, "learning_rate": 2.4800000000000004e-06, "loss": 0.2023, "step": 125 }, { "epoch": 0.12684989429175475, "grad_norm": 4.201228141784668, "learning_rate": 2.9800000000000003e-06, "loss": 0.2185, "step": 150 }, { "epoch": 0.14799154334038056, "grad_norm": 5.918435573577881, "learning_rate": 3.48e-06, "loss": 0.1956, "step": 175 }, { "epoch": 0.16913319238900634, "grad_norm": 4.987053871154785, "learning_rate": 3.980000000000001e-06, "loss": 0.1898, "step": 200 }, { "epoch": 0.19027484143763213, "grad_norm": 4.0125203132629395, "learning_rate": 4.48e-06, "loss": 0.195, "step": 225 }, { "epoch": 0.21141649048625794, "grad_norm": 4.057735919952393, "learning_rate": 4.980000000000001e-06, "loss": 0.1765, "step": 250 }, { "epoch": 0.23255813953488372, "grad_norm": 5.463469982147217, "learning_rate": 5.480000000000001e-06, "loss": 0.1882, "step": 275 }, { "epoch": 0.2536997885835095, "grad_norm": 4.29346227645874, "learning_rate": 5.98e-06, "loss": 0.1591, "step": 300 }, { "epoch": 0.2748414376321353, "grad_norm": 5.261208534240723, "learning_rate": 6.480000000000001e-06, "loss": 0.1784, "step": 325 }, { "epoch": 0.2959830866807611, "grad_norm": 5.871801376342773, "learning_rate": 6.98e-06, "loss": 0.1618, "step": 350 }, { "epoch": 0.3171247357293869, "grad_norm": 4.745639324188232, "learning_rate": 7.48e-06, "loss": 0.1639, "step": 375 }, { "epoch": 0.3382663847780127, "grad_norm": 5.110447406768799, "learning_rate": 7.980000000000002e-06, "loss": 0.1769, "step": 400 }, { "epoch": 0.3594080338266385, "grad_norm": 6.022309303283691, "learning_rate": 8.48e-06, "loss": 0.1736, "step": 425 }, { "epoch": 0.38054968287526425, "grad_norm": 3.8885881900787354, "learning_rate": 8.98e-06, "loss": 0.1723, "step": 450 }, { "epoch": 0.40169133192389006, "grad_norm": 5.72865104675293, "learning_rate": 9.48e-06, "loss": 0.1688, "step": 475 }, { "epoch": 0.42283298097251587, "grad_norm": 4.54589319229126, "learning_rate": 9.980000000000001e-06, "loss": 0.1789, "step": 500 }, { "epoch": 0.4439746300211416, "grad_norm": 4.388618469238281, "learning_rate": 9.974736842105263e-06, "loss": 0.1712, "step": 525 }, { "epoch": 0.46511627906976744, "grad_norm": 3.574472427368164, "learning_rate": 9.94842105263158e-06, "loss": 0.1549, "step": 550 }, { "epoch": 0.48625792811839325, "grad_norm": 4.669585227966309, "learning_rate": 9.922105263157895e-06, "loss": 0.1652, "step": 575 }, { "epoch": 0.507399577167019, "grad_norm": 4.39477014541626, "learning_rate": 9.895789473684212e-06, "loss": 0.1504, "step": 600 }, { "epoch": 0.5285412262156448, "grad_norm": 2.9595720767974854, "learning_rate": 9.869473684210528e-06, "loss": 0.1596, "step": 625 }, { "epoch": 0.5496828752642706, "grad_norm": 3.7537808418273926, "learning_rate": 9.843157894736843e-06, "loss": 0.1459, "step": 650 }, { "epoch": 0.5708245243128964, "grad_norm": 5.807609558105469, "learning_rate": 9.816842105263158e-06, "loss": 0.1486, "step": 675 }, { "epoch": 0.5919661733615222, "grad_norm": 3.948618173599243, "learning_rate": 9.790526315789475e-06, "loss": 0.1673, "step": 700 }, { "epoch": 0.6131078224101479, "grad_norm": 5.866642475128174, "learning_rate": 9.76421052631579e-06, "loss": 0.1667, "step": 725 }, { "epoch": 0.6342494714587738, "grad_norm": 3.490234375, "learning_rate": 9.737894736842107e-06, "loss": 0.1321, "step": 750 }, { "epoch": 0.6553911205073996, "grad_norm": 4.5152740478515625, "learning_rate": 9.711578947368422e-06, "loss": 0.1446, "step": 775 }, { "epoch": 0.6765327695560254, "grad_norm": 4.738922595977783, "learning_rate": 9.685263157894738e-06, "loss": 0.1567, "step": 800 }, { "epoch": 0.6976744186046512, "grad_norm": 5.560998439788818, "learning_rate": 9.658947368421053e-06, "loss": 0.1503, "step": 825 }, { "epoch": 0.718816067653277, "grad_norm": 4.573572158813477, "learning_rate": 9.63263157894737e-06, "loss": 0.1613, "step": 850 }, { "epoch": 0.7399577167019028, "grad_norm": 4.3108134269714355, "learning_rate": 9.606315789473685e-06, "loss": 0.148, "step": 875 }, { "epoch": 0.7610993657505285, "grad_norm": 4.116722583770752, "learning_rate": 9.58e-06, "loss": 0.1437, "step": 900 }, { "epoch": 0.7822410147991543, "grad_norm": 4.437409400939941, "learning_rate": 9.553684210526316e-06, "loss": 0.1572, "step": 925 }, { "epoch": 0.8033826638477801, "grad_norm": 3.4169201850891113, "learning_rate": 9.527368421052631e-06, "loss": 0.1631, "step": 950 }, { "epoch": 0.8245243128964059, "grad_norm": 5.115376949310303, "learning_rate": 9.501052631578948e-06, "loss": 0.1533, "step": 975 }, { "epoch": 0.8456659619450317, "grad_norm": 3.852684259414673, "learning_rate": 9.474736842105265e-06, "loss": 0.132, "step": 1000 }, { "epoch": 0.8456659619450317, "eval_loss": 0.09629034250974655, "eval_runtime": 803.1828, "eval_samples_per_second": 4.533, "eval_steps_per_second": 0.284, "eval_wer": 0.0747134437792937, "step": 1000 }, { "epoch": 0.8668076109936576, "grad_norm": 3.996983766555786, "learning_rate": 9.44842105263158e-06, "loss": 0.149, "step": 1025 }, { "epoch": 0.8879492600422833, "grad_norm": 5.054391860961914, "learning_rate": 9.422105263157896e-06, "loss": 0.122, "step": 1050 }, { "epoch": 0.9090909090909091, "grad_norm": 3.469428777694702, "learning_rate": 9.395789473684211e-06, "loss": 0.1259, "step": 1075 }, { "epoch": 0.9302325581395349, "grad_norm": 4.308193206787109, "learning_rate": 9.369473684210528e-06, "loss": 0.1187, "step": 1100 }, { "epoch": 0.9513742071881607, "grad_norm": 3.8684639930725098, "learning_rate": 9.343157894736843e-06, "loss": 0.1332, "step": 1125 }, { "epoch": 0.9725158562367865, "grad_norm": 3.6519436836242676, "learning_rate": 9.316842105263158e-06, "loss": 0.1314, "step": 1150 }, { "epoch": 0.9936575052854123, "grad_norm": 4.258838653564453, "learning_rate": 9.290526315789475e-06, "loss": 0.1244, "step": 1175 }, { "epoch": 1.014799154334038, "grad_norm": 2.686086654663086, "learning_rate": 9.265263157894737e-06, "loss": 0.0855, "step": 1200 }, { "epoch": 1.0359408033826638, "grad_norm": 3.826284646987915, "learning_rate": 9.238947368421052e-06, "loss": 0.0683, "step": 1225 }, { "epoch": 1.0570824524312896, "grad_norm": 2.2538001537323, "learning_rate": 9.21263157894737e-06, "loss": 0.0571, "step": 1250 }, { "epoch": 1.0782241014799154, "grad_norm": 3.7690203189849854, "learning_rate": 9.186315789473685e-06, "loss": 0.0692, "step": 1275 }, { "epoch": 1.0993657505285412, "grad_norm": 3.0382161140441895, "learning_rate": 9.16e-06, "loss": 0.0604, "step": 1300 }, { "epoch": 1.120507399577167, "grad_norm": 2.6046435832977295, "learning_rate": 9.133684210526317e-06, "loss": 0.055, "step": 1325 }, { "epoch": 1.1416490486257929, "grad_norm": 2.4167184829711914, "learning_rate": 9.107368421052632e-06, "loss": 0.0679, "step": 1350 }, { "epoch": 1.1627906976744187, "grad_norm": 3.173344850540161, "learning_rate": 9.081052631578949e-06, "loss": 0.0556, "step": 1375 }, { "epoch": 1.1839323467230445, "grad_norm": 3.099440336227417, "learning_rate": 9.054736842105264e-06, "loss": 0.0707, "step": 1400 }, { "epoch": 1.20507399577167, "grad_norm": 2.8110969066619873, "learning_rate": 9.02842105263158e-06, "loss": 0.0564, "step": 1425 }, { "epoch": 1.226215644820296, "grad_norm": 2.5299370288848877, "learning_rate": 9.002105263157895e-06, "loss": 0.0621, "step": 1450 }, { "epoch": 1.2473572938689217, "grad_norm": 1.7993814945220947, "learning_rate": 8.97578947368421e-06, "loss": 0.063, "step": 1475 }, { "epoch": 1.2684989429175475, "grad_norm": 2.9655444622039795, "learning_rate": 8.949473684210527e-06, "loss": 0.0578, "step": 1500 }, { "epoch": 1.2896405919661733, "grad_norm": 3.150512456893921, "learning_rate": 8.923157894736842e-06, "loss": 0.0676, "step": 1525 }, { "epoch": 1.3107822410147991, "grad_norm": 2.2454750537872314, "learning_rate": 8.896842105263159e-06, "loss": 0.057, "step": 1550 }, { "epoch": 1.331923890063425, "grad_norm": 2.760533332824707, "learning_rate": 8.870526315789474e-06, "loss": 0.0738, "step": 1575 }, { "epoch": 1.3530655391120507, "grad_norm": 3.960843086242676, "learning_rate": 8.84421052631579e-06, "loss": 0.0641, "step": 1600 }, { "epoch": 1.3742071881606766, "grad_norm": 2.070232391357422, "learning_rate": 8.817894736842107e-06, "loss": 0.055, "step": 1625 }, { "epoch": 1.3953488372093024, "grad_norm": 2.532212734222412, "learning_rate": 8.791578947368422e-06, "loss": 0.0523, "step": 1650 }, { "epoch": 1.4164904862579282, "grad_norm": 3.0113463401794434, "learning_rate": 8.765263157894739e-06, "loss": 0.0613, "step": 1675 }, { "epoch": 1.437632135306554, "grad_norm": 2.228800058364868, "learning_rate": 8.738947368421053e-06, "loss": 0.0506, "step": 1700 }, { "epoch": 1.4587737843551798, "grad_norm": 4.342855453491211, "learning_rate": 8.712631578947368e-06, "loss": 0.078, "step": 1725 }, { "epoch": 1.4799154334038054, "grad_norm": 2.6026878356933594, "learning_rate": 8.686315789473685e-06, "loss": 0.0566, "step": 1750 }, { "epoch": 1.5010570824524314, "grad_norm": 1.9011883735656738, "learning_rate": 8.66e-06, "loss": 0.0524, "step": 1775 }, { "epoch": 1.522198731501057, "grad_norm": 5.088387489318848, "learning_rate": 8.633684210526317e-06, "loss": 0.0708, "step": 1800 }, { "epoch": 1.543340380549683, "grad_norm": 2.1307568550109863, "learning_rate": 8.607368421052632e-06, "loss": 0.0586, "step": 1825 }, { "epoch": 1.5644820295983086, "grad_norm": 2.45223069190979, "learning_rate": 8.581052631578948e-06, "loss": 0.0566, "step": 1850 }, { "epoch": 1.5856236786469344, "grad_norm": 2.4557642936706543, "learning_rate": 8.554736842105263e-06, "loss": 0.0506, "step": 1875 }, { "epoch": 1.6067653276955602, "grad_norm": 3.5096094608306885, "learning_rate": 8.528421052631578e-06, "loss": 0.0569, "step": 1900 }, { "epoch": 1.627906976744186, "grad_norm": 1.774232268333435, "learning_rate": 8.502105263157897e-06, "loss": 0.0545, "step": 1925 }, { "epoch": 1.6490486257928119, "grad_norm": 2.894585609436035, "learning_rate": 8.475789473684212e-06, "loss": 0.0557, "step": 1950 }, { "epoch": 1.6701902748414377, "grad_norm": 2.1152944564819336, "learning_rate": 8.449473684210527e-06, "loss": 0.0533, "step": 1975 }, { "epoch": 1.6913319238900635, "grad_norm": 2.191254138946533, "learning_rate": 8.423157894736843e-06, "loss": 0.0503, "step": 2000 }, { "epoch": 1.6913319238900635, "eval_loss": 0.0664231926202774, "eval_runtime": 764.9205, "eval_samples_per_second": 4.76, "eval_steps_per_second": 0.298, "eval_wer": 0.05257784583971414, "step": 2000 }, { "epoch": 1.712473572938689, "grad_norm": 2.3676247596740723, "learning_rate": 8.396842105263158e-06, "loss": 0.0624, "step": 2025 }, { "epoch": 1.733615221987315, "grad_norm": 3.942397356033325, "learning_rate": 8.370526315789475e-06, "loss": 0.0592, "step": 2050 }, { "epoch": 1.7547568710359407, "grad_norm": 3.637500047683716, "learning_rate": 8.34421052631579e-06, "loss": 0.0437, "step": 2075 }, { "epoch": 1.7758985200845667, "grad_norm": 1.2622654438018799, "learning_rate": 8.317894736842107e-06, "loss": 0.0543, "step": 2100 }, { "epoch": 1.7970401691331923, "grad_norm": 2.234199047088623, "learning_rate": 8.291578947368422e-06, "loss": 0.0571, "step": 2125 }, { "epoch": 1.8181818181818183, "grad_norm": 2.8800275325775146, "learning_rate": 8.265263157894737e-06, "loss": 0.0475, "step": 2150 }, { "epoch": 1.839323467230444, "grad_norm": 3.2477574348449707, "learning_rate": 8.238947368421053e-06, "loss": 0.0529, "step": 2175 }, { "epoch": 1.8604651162790697, "grad_norm": 3.2765121459960938, "learning_rate": 8.212631578947368e-06, "loss": 0.0476, "step": 2200 }, { "epoch": 1.8816067653276956, "grad_norm": 2.850074529647827, "learning_rate": 8.186315789473685e-06, "loss": 0.0464, "step": 2225 }, { "epoch": 1.9027484143763214, "grad_norm": 2.3164877891540527, "learning_rate": 8.16e-06, "loss": 0.048, "step": 2250 }, { "epoch": 1.9238900634249472, "grad_norm": 4.057616710662842, "learning_rate": 8.133684210526316e-06, "loss": 0.044, "step": 2275 }, { "epoch": 1.945031712473573, "grad_norm": 2.687540292739868, "learning_rate": 8.107368421052633e-06, "loss": 0.0439, "step": 2300 }, { "epoch": 1.9661733615221988, "grad_norm": 2.555338144302368, "learning_rate": 8.081052631578948e-06, "loss": 0.0563, "step": 2325 }, { "epoch": 1.9873150105708244, "grad_norm": 2.214106798171997, "learning_rate": 8.054736842105265e-06, "loss": 0.0512, "step": 2350 }, { "epoch": 2.0084566596194504, "grad_norm": 1.0390377044677734, "learning_rate": 8.02842105263158e-06, "loss": 0.035, "step": 2375 }, { "epoch": 2.029598308668076, "grad_norm": 1.4279701709747314, "learning_rate": 8.002105263157895e-06, "loss": 0.0214, "step": 2400 }, { "epoch": 2.050739957716702, "grad_norm": 1.2955684661865234, "learning_rate": 7.975789473684211e-06, "loss": 0.0221, "step": 2425 }, { "epoch": 2.0718816067653276, "grad_norm": 1.0678218603134155, "learning_rate": 7.949473684210526e-06, "loss": 0.0185, "step": 2450 }, { "epoch": 2.0930232558139537, "grad_norm": 2.0018396377563477, "learning_rate": 7.923157894736843e-06, "loss": 0.0201, "step": 2475 }, { "epoch": 2.1141649048625792, "grad_norm": 1.0335798263549805, "learning_rate": 7.896842105263158e-06, "loss": 0.0156, "step": 2500 }, { "epoch": 2.1353065539112053, "grad_norm": 3.049020528793335, "learning_rate": 7.870526315789475e-06, "loss": 0.0218, "step": 2525 }, { "epoch": 2.156448202959831, "grad_norm": 0.5251482725143433, "learning_rate": 7.84421052631579e-06, "loss": 0.0192, "step": 2550 }, { "epoch": 2.177589852008457, "grad_norm": 2.4763119220733643, "learning_rate": 7.817894736842105e-06, "loss": 0.0199, "step": 2575 }, { "epoch": 2.1987315010570825, "grad_norm": 0.3287382423877716, "learning_rate": 7.791578947368423e-06, "loss": 0.0202, "step": 2600 }, { "epoch": 2.219873150105708, "grad_norm": 2.3478755950927734, "learning_rate": 7.765263157894738e-06, "loss": 0.0172, "step": 2625 }, { "epoch": 2.241014799154334, "grad_norm": 1.5515483617782593, "learning_rate": 7.738947368421053e-06, "loss": 0.0157, "step": 2650 }, { "epoch": 2.2621564482029597, "grad_norm": 3.578237295150757, "learning_rate": 7.71263157894737e-06, "loss": 0.024, "step": 2675 }, { "epoch": 2.2832980972515857, "grad_norm": 1.1182405948638916, "learning_rate": 7.686315789473685e-06, "loss": 0.0165, "step": 2700 }, { "epoch": 2.3044397463002113, "grad_norm": 1.8532599210739136, "learning_rate": 7.660000000000001e-06, "loss": 0.0241, "step": 2725 }, { "epoch": 2.3255813953488373, "grad_norm": 0.5601603984832764, "learning_rate": 7.633684210526316e-06, "loss": 0.0178, "step": 2750 }, { "epoch": 2.346723044397463, "grad_norm": 3.074402332305908, "learning_rate": 7.607368421052632e-06, "loss": 0.0276, "step": 2775 }, { "epoch": 2.367864693446089, "grad_norm": 1.695233702659607, "learning_rate": 7.581052631578948e-06, "loss": 0.0227, "step": 2800 }, { "epoch": 2.3890063424947146, "grad_norm": 2.522712469100952, "learning_rate": 7.554736842105264e-06, "loss": 0.029, "step": 2825 }, { "epoch": 2.41014799154334, "grad_norm": 0.7854623794555664, "learning_rate": 7.5284210526315794e-06, "loss": 0.0197, "step": 2850 }, { "epoch": 2.431289640591966, "grad_norm": 0.5606433749198914, "learning_rate": 7.502105263157895e-06, "loss": 0.02, "step": 2875 }, { "epoch": 2.452431289640592, "grad_norm": 2.8828301429748535, "learning_rate": 7.475789473684211e-06, "loss": 0.0198, "step": 2900 }, { "epoch": 2.473572938689218, "grad_norm": 1.5144062042236328, "learning_rate": 7.449473684210526e-06, "loss": 0.0201, "step": 2925 }, { "epoch": 2.4947145877378434, "grad_norm": 1.25803542137146, "learning_rate": 7.4231578947368436e-06, "loss": 0.0216, "step": 2950 }, { "epoch": 2.5158562367864694, "grad_norm": 1.2275983095169067, "learning_rate": 7.3968421052631585e-06, "loss": 0.032, "step": 2975 }, { "epoch": 2.536997885835095, "grad_norm": 1.5076512098312378, "learning_rate": 7.370526315789474e-06, "loss": 0.023, "step": 3000 }, { "epoch": 2.536997885835095, "eval_loss": 0.06277064979076385, "eval_runtime": 778.7671, "eval_samples_per_second": 4.675, "eval_steps_per_second": 0.293, "eval_wer": 0.07271799155413244, "step": 3000 }, { "epoch": 2.558139534883721, "grad_norm": 2.08162784576416, "learning_rate": 7.34421052631579e-06, "loss": 0.0211, "step": 3025 }, { "epoch": 2.5792811839323466, "grad_norm": 1.1717668771743774, "learning_rate": 7.317894736842106e-06, "loss": 0.019, "step": 3050 }, { "epoch": 2.6004228329809727, "grad_norm": 1.7563270330429077, "learning_rate": 7.291578947368422e-06, "loss": 0.0235, "step": 3075 }, { "epoch": 2.6215644820295982, "grad_norm": 2.0151233673095703, "learning_rate": 7.265263157894738e-06, "loss": 0.0201, "step": 3100 }, { "epoch": 2.6427061310782243, "grad_norm": 1.1807574033737183, "learning_rate": 7.2389473684210534e-06, "loss": 0.0232, "step": 3125 }, { "epoch": 2.66384778012685, "grad_norm": 2.389129877090454, "learning_rate": 7.212631578947369e-06, "loss": 0.0189, "step": 3150 }, { "epoch": 2.6849894291754755, "grad_norm": 1.0791960954666138, "learning_rate": 7.186315789473684e-06, "loss": 0.0138, "step": 3175 }, { "epoch": 2.7061310782241015, "grad_norm": 1.2188448905944824, "learning_rate": 7.16e-06, "loss": 0.0177, "step": 3200 }, { "epoch": 2.7272727272727275, "grad_norm": 2.194073438644409, "learning_rate": 7.133684210526316e-06, "loss": 0.0236, "step": 3225 }, { "epoch": 2.748414376321353, "grad_norm": 3.0970866680145264, "learning_rate": 7.107368421052632e-06, "loss": 0.0246, "step": 3250 }, { "epoch": 2.7695560253699787, "grad_norm": 1.133362054824829, "learning_rate": 7.0810526315789475e-06, "loss": 0.0177, "step": 3275 }, { "epoch": 2.7906976744186047, "grad_norm": 2.0186614990234375, "learning_rate": 7.054736842105264e-06, "loss": 0.0206, "step": 3300 }, { "epoch": 2.8118393234672303, "grad_norm": 1.397058367729187, "learning_rate": 7.02842105263158e-06, "loss": 0.0182, "step": 3325 }, { "epoch": 2.8329809725158563, "grad_norm": 0.7610916495323181, "learning_rate": 7.002105263157896e-06, "loss": 0.0186, "step": 3350 }, { "epoch": 2.854122621564482, "grad_norm": 1.2149909734725952, "learning_rate": 6.975789473684212e-06, "loss": 0.0244, "step": 3375 }, { "epoch": 2.875264270613108, "grad_norm": 0.7443040609359741, "learning_rate": 6.9494736842105275e-06, "loss": 0.0204, "step": 3400 }, { "epoch": 2.8964059196617336, "grad_norm": 1.4333350658416748, "learning_rate": 6.9231578947368424e-06, "loss": 0.0202, "step": 3425 }, { "epoch": 2.9175475687103596, "grad_norm": 0.8187249898910522, "learning_rate": 6.896842105263158e-06, "loss": 0.0236, "step": 3450 }, { "epoch": 2.938689217758985, "grad_norm": 0.5101013779640198, "learning_rate": 6.870526315789474e-06, "loss": 0.0265, "step": 3475 }, { "epoch": 2.9598308668076108, "grad_norm": 0.9081612229347229, "learning_rate": 6.84421052631579e-06, "loss": 0.0213, "step": 3500 }, { "epoch": 2.980972515856237, "grad_norm": 3.310152769088745, "learning_rate": 6.817894736842106e-06, "loss": 0.0158, "step": 3525 }, { "epoch": 3.0021141649048624, "grad_norm": 0.17649300396442413, "learning_rate": 6.7915789473684215e-06, "loss": 0.013, "step": 3550 }, { "epoch": 3.0232558139534884, "grad_norm": 1.0084452629089355, "learning_rate": 6.765263157894737e-06, "loss": 0.0098, "step": 3575 }, { "epoch": 3.044397463002114, "grad_norm": 0.7109575271606445, "learning_rate": 6.738947368421052e-06, "loss": 0.0063, "step": 3600 }, { "epoch": 3.06553911205074, "grad_norm": 1.4222323894500732, "learning_rate": 6.71263157894737e-06, "loss": 0.0104, "step": 3625 }, { "epoch": 3.0866807610993656, "grad_norm": 1.3189831972122192, "learning_rate": 6.686315789473685e-06, "loss": 0.0071, "step": 3650 }, { "epoch": 3.1078224101479917, "grad_norm": 0.26805758476257324, "learning_rate": 6.660000000000001e-06, "loss": 0.0069, "step": 3675 }, { "epoch": 3.1289640591966172, "grad_norm": 1.9511579275131226, "learning_rate": 6.6336842105263164e-06, "loss": 0.0107, "step": 3700 }, { "epoch": 3.1501057082452433, "grad_norm": 0.3703208863735199, "learning_rate": 6.607368421052632e-06, "loss": 0.0068, "step": 3725 }, { "epoch": 3.171247357293869, "grad_norm": 0.5500373840332031, "learning_rate": 6.581052631578948e-06, "loss": 0.0055, "step": 3750 }, { "epoch": 3.192389006342495, "grad_norm": 1.709043025970459, "learning_rate": 6.554736842105264e-06, "loss": 0.0087, "step": 3775 }, { "epoch": 3.2135306553911205, "grad_norm": 1.1865346431732178, "learning_rate": 6.52842105263158e-06, "loss": 0.0095, "step": 3800 }, { "epoch": 3.234672304439746, "grad_norm": 0.505714476108551, "learning_rate": 6.5021052631578955e-06, "loss": 0.0051, "step": 3825 }, { "epoch": 3.255813953488372, "grad_norm": 0.646237850189209, "learning_rate": 6.4757894736842105e-06, "loss": 0.0109, "step": 3850 }, { "epoch": 3.276955602536998, "grad_norm": 4.061963081359863, "learning_rate": 6.449473684210526e-06, "loss": 0.0091, "step": 3875 }, { "epoch": 3.2980972515856237, "grad_norm": 0.9798858761787415, "learning_rate": 6.423157894736842e-06, "loss": 0.0071, "step": 3900 }, { "epoch": 3.3192389006342493, "grad_norm": 0.07478220015764236, "learning_rate": 6.396842105263158e-06, "loss": 0.0087, "step": 3925 }, { "epoch": 3.3403805496828753, "grad_norm": 0.36445191502571106, "learning_rate": 6.370526315789474e-06, "loss": 0.0112, "step": 3950 }, { "epoch": 3.361522198731501, "grad_norm": 0.7756998538970947, "learning_rate": 6.3442105263157904e-06, "loss": 0.0092, "step": 3975 }, { "epoch": 3.382663847780127, "grad_norm": 0.12039519846439362, "learning_rate": 6.317894736842106e-06, "loss": 0.011, "step": 4000 }, { "epoch": 3.382663847780127, "eval_loss": 0.059302762150764465, "eval_runtime": 760.4332, "eval_samples_per_second": 4.788, "eval_steps_per_second": 0.3, "eval_wer": 0.04371432549074203, "step": 4000 }, { "epoch": 3.4038054968287526, "grad_norm": 0.5047985315322876, "learning_rate": 6.291578947368422e-06, "loss": 0.0082, "step": 4025 }, { "epoch": 3.4249471458773786, "grad_norm": 1.0148366689682007, "learning_rate": 6.265263157894738e-06, "loss": 0.0059, "step": 4050 }, { "epoch": 3.446088794926004, "grad_norm": 2.402777910232544, "learning_rate": 6.238947368421054e-06, "loss": 0.0075, "step": 4075 }, { "epoch": 3.46723044397463, "grad_norm": 1.122701644897461, "learning_rate": 6.212631578947369e-06, "loss": 0.0091, "step": 4100 }, { "epoch": 3.488372093023256, "grad_norm": 1.688050627708435, "learning_rate": 6.1863157894736845e-06, "loss": 0.0066, "step": 4125 }, { "epoch": 3.5095137420718814, "grad_norm": 0.5308606624603271, "learning_rate": 6.16e-06, "loss": 0.0084, "step": 4150 }, { "epoch": 3.5306553911205074, "grad_norm": 0.182705819606781, "learning_rate": 6.133684210526316e-06, "loss": 0.01, "step": 4175 }, { "epoch": 3.5517970401691334, "grad_norm": 1.427296757698059, "learning_rate": 6.107368421052632e-06, "loss": 0.0093, "step": 4200 }, { "epoch": 3.572938689217759, "grad_norm": 0.2774752974510193, "learning_rate": 6.081052631578948e-06, "loss": 0.0088, "step": 4225 }, { "epoch": 3.5940803382663846, "grad_norm": 1.2511072158813477, "learning_rate": 6.054736842105264e-06, "loss": 0.0105, "step": 4250 }, { "epoch": 3.6152219873150107, "grad_norm": 0.6384909152984619, "learning_rate": 6.0284210526315786e-06, "loss": 0.0112, "step": 4275 }, { "epoch": 3.6363636363636362, "grad_norm": 3.168208599090576, "learning_rate": 6.002105263157896e-06, "loss": 0.0108, "step": 4300 }, { "epoch": 3.6575052854122623, "grad_norm": 1.4083985090255737, "learning_rate": 5.975789473684212e-06, "loss": 0.0075, "step": 4325 }, { "epoch": 3.678646934460888, "grad_norm": 0.34528228640556335, "learning_rate": 5.949473684210527e-06, "loss": 0.0113, "step": 4350 }, { "epoch": 3.699788583509514, "grad_norm": 1.5832304954528809, "learning_rate": 5.923157894736843e-06, "loss": 0.0183, "step": 4375 }, { "epoch": 3.7209302325581395, "grad_norm": 0.5489822030067444, "learning_rate": 5.8968421052631585e-06, "loss": 0.0111, "step": 4400 }, { "epoch": 3.7420718816067655, "grad_norm": 2.3120322227478027, "learning_rate": 5.870526315789474e-06, "loss": 0.0081, "step": 4425 }, { "epoch": 3.763213530655391, "grad_norm": 0.9819779396057129, "learning_rate": 5.84421052631579e-06, "loss": 0.0111, "step": 4450 }, { "epoch": 3.7843551797040167, "grad_norm": 1.0312461853027344, "learning_rate": 5.817894736842106e-06, "loss": 0.0087, "step": 4475 }, { "epoch": 3.8054968287526427, "grad_norm": 3.042786121368408, "learning_rate": 5.791578947368422e-06, "loss": 0.0127, "step": 4500 }, { "epoch": 3.8266384778012688, "grad_norm": 0.740356981754303, "learning_rate": 5.765263157894737e-06, "loss": 0.0091, "step": 4525 }, { "epoch": 3.8477801268498943, "grad_norm": 0.25214338302612305, "learning_rate": 5.7389473684210526e-06, "loss": 0.0077, "step": 4550 }, { "epoch": 3.86892177589852, "grad_norm": 0.2431076020002365, "learning_rate": 5.712631578947368e-06, "loss": 0.0084, "step": 4575 }, { "epoch": 3.890063424947146, "grad_norm": 0.2818649113178253, "learning_rate": 5.686315789473684e-06, "loss": 0.0082, "step": 4600 }, { "epoch": 3.9112050739957716, "grad_norm": 0.6972672343254089, "learning_rate": 5.66e-06, "loss": 0.01, "step": 4625 }, { "epoch": 3.9323467230443976, "grad_norm": 1.6746132373809814, "learning_rate": 5.633684210526317e-06, "loss": 0.0097, "step": 4650 }, { "epoch": 3.953488372093023, "grad_norm": 1.0258132219314575, "learning_rate": 5.6073684210526325e-06, "loss": 0.0104, "step": 4675 }, { "epoch": 3.974630021141649, "grad_norm": 2.0761055946350098, "learning_rate": 5.581052631578948e-06, "loss": 0.008, "step": 4700 }, { "epoch": 3.995771670190275, "grad_norm": 0.18523390591144562, "learning_rate": 5.554736842105264e-06, "loss": 0.0078, "step": 4725 }, { "epoch": 4.016913319238901, "grad_norm": 0.3107724189758301, "learning_rate": 5.52842105263158e-06, "loss": 0.0042, "step": 4750 }, { "epoch": 4.038054968287526, "grad_norm": 1.8015187978744507, "learning_rate": 5.502105263157895e-06, "loss": 0.0059, "step": 4775 }, { "epoch": 4.059196617336152, "grad_norm": 0.2251424789428711, "learning_rate": 5.475789473684211e-06, "loss": 0.0047, "step": 4800 }, { "epoch": 4.080338266384778, "grad_norm": 1.2889034748077393, "learning_rate": 5.4494736842105266e-06, "loss": 0.0046, "step": 4825 }, { "epoch": 4.101479915433404, "grad_norm": 0.32070282101631165, "learning_rate": 5.423157894736842e-06, "loss": 0.0031, "step": 4850 }, { "epoch": 4.12262156448203, "grad_norm": 0.26217707991600037, "learning_rate": 5.396842105263158e-06, "loss": 0.0063, "step": 4875 }, { "epoch": 4.143763213530655, "grad_norm": 0.8193647861480713, "learning_rate": 5.370526315789474e-06, "loss": 0.0043, "step": 4900 }, { "epoch": 4.164904862579281, "grad_norm": 0.054519519209861755, "learning_rate": 5.34421052631579e-06, "loss": 0.0047, "step": 4925 }, { "epoch": 4.186046511627907, "grad_norm": 0.17601265013217926, "learning_rate": 5.317894736842105e-06, "loss": 0.0058, "step": 4950 }, { "epoch": 4.207188160676533, "grad_norm": 0.08115002512931824, "learning_rate": 5.291578947368422e-06, "loss": 0.0015, "step": 4975 }, { "epoch": 4.2283298097251585, "grad_norm": 0.0700853168964386, "learning_rate": 5.265263157894738e-06, "loss": 0.0033, "step": 5000 }, { "epoch": 4.2283298097251585, "eval_loss": 0.057503484189510345, "eval_runtime": 759.4854, "eval_samples_per_second": 4.794, "eval_steps_per_second": 0.3, "eval_wer": 0.040697944220149426, "step": 5000 }, { "epoch": 4.249471458773784, "grad_norm": 0.49534428119659424, "learning_rate": 5.238947368421053e-06, "loss": 0.0025, "step": 5025 }, { "epoch": 4.2706131078224105, "grad_norm": 0.10419642180204391, "learning_rate": 5.212631578947369e-06, "loss": 0.003, "step": 5050 }, { "epoch": 4.291754756871036, "grad_norm": 1.5288044214248657, "learning_rate": 5.186315789473685e-06, "loss": 0.0044, "step": 5075 }, { "epoch": 4.312896405919662, "grad_norm": 0.3455657660961151, "learning_rate": 5.1600000000000006e-06, "loss": 0.0053, "step": 5100 }, { "epoch": 4.334038054968287, "grad_norm": 0.32805994153022766, "learning_rate": 5.133684210526316e-06, "loss": 0.006, "step": 5125 }, { "epoch": 4.355179704016914, "grad_norm": 0.0669274553656578, "learning_rate": 5.107368421052632e-06, "loss": 0.0042, "step": 5150 }, { "epoch": 4.376321353065539, "grad_norm": 0.46637606620788574, "learning_rate": 5.081052631578948e-06, "loss": 0.0039, "step": 5175 }, { "epoch": 4.397463002114165, "grad_norm": 0.06938227266073227, "learning_rate": 5.054736842105263e-06, "loss": 0.0045, "step": 5200 }, { "epoch": 4.4186046511627906, "grad_norm": 0.25897443294525146, "learning_rate": 5.028421052631579e-06, "loss": 0.0019, "step": 5225 }, { "epoch": 4.439746300211416, "grad_norm": 0.0884798988699913, "learning_rate": 5.002105263157895e-06, "loss": 0.0051, "step": 5250 }, { "epoch": 4.460887949260043, "grad_norm": 2.0697453022003174, "learning_rate": 4.976842105263158e-06, "loss": 0.0036, "step": 5275 }, { "epoch": 4.482029598308668, "grad_norm": 0.494328111410141, "learning_rate": 4.950526315789474e-06, "loss": 0.0044, "step": 5300 }, { "epoch": 4.503171247357294, "grad_norm": 1.0127891302108765, "learning_rate": 4.92421052631579e-06, "loss": 0.0032, "step": 5325 }, { "epoch": 4.524312896405919, "grad_norm": Infinity, "learning_rate": 4.898947368421053e-06, "loss": 0.0058, "step": 5350 }, { "epoch": 4.545454545454545, "grad_norm": 0.9486972093582153, "learning_rate": 4.872631578947369e-06, "loss": 0.0072, "step": 5375 }, { "epoch": 4.5665961945031714, "grad_norm": 0.37844017148017883, "learning_rate": 4.846315789473685e-06, "loss": 0.0043, "step": 5400 }, { "epoch": 4.587737843551797, "grad_norm": 1.3698139190673828, "learning_rate": 4.8200000000000004e-06, "loss": 0.0046, "step": 5425 }, { "epoch": 4.608879492600423, "grad_norm": 0.14651530981063843, "learning_rate": 4.793684210526316e-06, "loss": 0.0041, "step": 5450 }, { "epoch": 4.630021141649049, "grad_norm": 1.030840277671814, "learning_rate": 4.767368421052632e-06, "loss": 0.0053, "step": 5475 }, { "epoch": 4.651162790697675, "grad_norm": 0.837679922580719, "learning_rate": 4.741052631578948e-06, "loss": 0.0034, "step": 5500 }, { "epoch": 4.6723044397463, "grad_norm": 0.12558767199516296, "learning_rate": 4.714736842105264e-06, "loss": 0.0024, "step": 5525 }, { "epoch": 4.693446088794926, "grad_norm": 2.1240885257720947, "learning_rate": 4.6884210526315795e-06, "loss": 0.0042, "step": 5550 }, { "epoch": 4.7145877378435515, "grad_norm": 0.08116896450519562, "learning_rate": 4.662105263157895e-06, "loss": 0.0029, "step": 5575 }, { "epoch": 4.735729386892178, "grad_norm": 1.2541615962982178, "learning_rate": 4.63578947368421e-06, "loss": 0.0085, "step": 5600 }, { "epoch": 4.7568710359408035, "grad_norm": 0.21351023018360138, "learning_rate": 4.609473684210526e-06, "loss": 0.0026, "step": 5625 }, { "epoch": 4.778012684989429, "grad_norm": 0.967060387134552, "learning_rate": 4.583157894736843e-06, "loss": 0.0058, "step": 5650 }, { "epoch": 4.799154334038055, "grad_norm": 0.8480948209762573, "learning_rate": 4.556842105263159e-06, "loss": 0.0038, "step": 5675 }, { "epoch": 4.82029598308668, "grad_norm": 0.0918637365102768, "learning_rate": 4.5305263157894744e-06, "loss": 0.0038, "step": 5700 }, { "epoch": 4.841437632135307, "grad_norm": 0.32755616307258606, "learning_rate": 4.504210526315789e-06, "loss": 0.0039, "step": 5725 }, { "epoch": 4.862579281183932, "grad_norm": 0.1790982037782669, "learning_rate": 4.477894736842105e-06, "loss": 0.0025, "step": 5750 }, { "epoch": 4.883720930232558, "grad_norm": 0.16250011324882507, "learning_rate": 4.451578947368421e-06, "loss": 0.0046, "step": 5775 }, { "epoch": 4.904862579281184, "grad_norm": 0.056490566581487656, "learning_rate": 4.425263157894737e-06, "loss": 0.0029, "step": 5800 }, { "epoch": 4.92600422832981, "grad_norm": 0.6812607645988464, "learning_rate": 4.3989473684210535e-06, "loss": 0.0034, "step": 5825 }, { "epoch": 4.947145877378436, "grad_norm": 0.10844116657972336, "learning_rate": 4.3726315789473685e-06, "loss": 0.0041, "step": 5850 }, { "epoch": 4.968287526427061, "grad_norm": 0.13944801688194275, "learning_rate": 4.346315789473684e-06, "loss": 0.0032, "step": 5875 }, { "epoch": 4.989429175475687, "grad_norm": 0.1417611837387085, "learning_rate": 4.32e-06, "loss": 0.0021, "step": 5900 }, { "epoch": 5.010570824524313, "grad_norm": 0.04675103724002838, "learning_rate": 4.293684210526316e-06, "loss": 0.0018, "step": 5925 }, { "epoch": 5.031712473572939, "grad_norm": 0.06213444098830223, "learning_rate": 4.267368421052632e-06, "loss": 0.0017, "step": 5950 }, { "epoch": 5.052854122621564, "grad_norm": 0.025097988545894623, "learning_rate": 4.241052631578948e-06, "loss": 0.0022, "step": 5975 }, { "epoch": 5.07399577167019, "grad_norm": 1.2869890928268433, "learning_rate": 4.214736842105263e-06, "loss": 0.0017, "step": 6000 }, { "epoch": 5.07399577167019, "eval_loss": 0.057429373264312744, "eval_runtime": 764.4931, "eval_samples_per_second": 4.763, "eval_steps_per_second": 0.298, "eval_wer": 0.044828066267576225, "step": 6000 }, { "epoch": 5.0951374207188165, "grad_norm": 0.08272965997457504, "learning_rate": 4.188421052631579e-06, "loss": 0.0038, "step": 6025 }, { "epoch": 5.116279069767442, "grad_norm": 0.03754328191280365, "learning_rate": 4.162105263157895e-06, "loss": 0.0019, "step": 6050 }, { "epoch": 5.137420718816068, "grad_norm": 0.021387765184044838, "learning_rate": 4.135789473684211e-06, "loss": 0.0009, "step": 6075 }, { "epoch": 5.158562367864693, "grad_norm": 0.8662779927253723, "learning_rate": 4.109473684210527e-06, "loss": 0.0013, "step": 6100 }, { "epoch": 5.179704016913319, "grad_norm": 0.04760267958045006, "learning_rate": 4.0831578947368425e-06, "loss": 0.0037, "step": 6125 }, { "epoch": 5.200845665961945, "grad_norm": 0.07986485213041306, "learning_rate": 4.056842105263158e-06, "loss": 0.0021, "step": 6150 }, { "epoch": 5.221987315010571, "grad_norm": 0.025027699768543243, "learning_rate": 4.030526315789474e-06, "loss": 0.0034, "step": 6175 }, { "epoch": 5.2431289640591965, "grad_norm": 0.04851312190294266, "learning_rate": 4.00421052631579e-06, "loss": 0.0025, "step": 6200 }, { "epoch": 5.264270613107822, "grad_norm": 0.7498076558113098, "learning_rate": 3.977894736842106e-06, "loss": 0.0028, "step": 6225 }, { "epoch": 5.2854122621564485, "grad_norm": 0.032242584973573685, "learning_rate": 3.951578947368422e-06, "loss": 0.0023, "step": 6250 }, { "epoch": 5.306553911205074, "grad_norm": 0.10821150243282318, "learning_rate": 3.9252631578947366e-06, "loss": 0.0065, "step": 6275 }, { "epoch": 5.3276955602537, "grad_norm": 0.05587538704276085, "learning_rate": 3.898947368421052e-06, "loss": 0.0011, "step": 6300 }, { "epoch": 5.348837209302325, "grad_norm": 0.046281397342681885, "learning_rate": 3.872631578947369e-06, "loss": 0.0027, "step": 6325 }, { "epoch": 5.369978858350952, "grad_norm": 0.6012808680534363, "learning_rate": 3.846315789473685e-06, "loss": 0.0019, "step": 6350 }, { "epoch": 5.391120507399577, "grad_norm": 0.08833307027816772, "learning_rate": 3.820000000000001e-06, "loss": 0.0013, "step": 6375 }, { "epoch": 5.412262156448203, "grad_norm": 0.02564876712858677, "learning_rate": 3.793684210526316e-06, "loss": 0.0021, "step": 6400 }, { "epoch": 5.4334038054968286, "grad_norm": 0.06358140707015991, "learning_rate": 3.767368421052632e-06, "loss": 0.003, "step": 6425 }, { "epoch": 5.454545454545454, "grad_norm": 0.048556044697761536, "learning_rate": 3.7410526315789473e-06, "loss": 0.0013, "step": 6450 }, { "epoch": 5.475687103594081, "grad_norm": 0.1476634293794632, "learning_rate": 3.714736842105263e-06, "loss": 0.0017, "step": 6475 }, { "epoch": 5.496828752642706, "grad_norm": 0.11191302537918091, "learning_rate": 3.6884210526315794e-06, "loss": 0.0022, "step": 6500 }, { "epoch": 5.517970401691332, "grad_norm": 0.08527684956789017, "learning_rate": 3.662105263157895e-06, "loss": 0.0034, "step": 6525 }, { "epoch": 5.539112050739957, "grad_norm": 0.04578416422009468, "learning_rate": 3.635789473684211e-06, "loss": 0.0028, "step": 6550 }, { "epoch": 5.560253699788584, "grad_norm": 1.1695352792739868, "learning_rate": 3.6094736842105264e-06, "loss": 0.0017, "step": 6575 }, { "epoch": 5.5813953488372094, "grad_norm": 0.02468816004693508, "learning_rate": 3.5831578947368422e-06, "loss": 0.0027, "step": 6600 }, { "epoch": 5.602536997885835, "grad_norm": 2.5693840980529785, "learning_rate": 3.556842105263158e-06, "loss": 0.0038, "step": 6625 }, { "epoch": 5.623678646934461, "grad_norm": 0.12754817306995392, "learning_rate": 3.5305263157894743e-06, "loss": 0.0047, "step": 6650 }, { "epoch": 5.644820295983086, "grad_norm": 0.22026591002941132, "learning_rate": 3.50421052631579e-06, "loss": 0.0013, "step": 6675 }, { "epoch": 5.665961945031713, "grad_norm": 0.052474796772003174, "learning_rate": 3.4778947368421055e-06, "loss": 0.004, "step": 6700 }, { "epoch": 5.687103594080338, "grad_norm": 0.05022185668349266, "learning_rate": 3.4515789473684213e-06, "loss": 0.0015, "step": 6725 }, { "epoch": 5.708245243128964, "grad_norm": 0.10212918370962143, "learning_rate": 3.425263157894737e-06, "loss": 0.0024, "step": 6750 }, { "epoch": 5.72938689217759, "grad_norm": 0.09313949197530746, "learning_rate": 3.398947368421053e-06, "loss": 0.0048, "step": 6775 }, { "epoch": 5.750528541226216, "grad_norm": 0.07640087604522705, "learning_rate": 3.3726315789473683e-06, "loss": 0.0026, "step": 6800 }, { "epoch": 5.7716701902748415, "grad_norm": 0.05444110184907913, "learning_rate": 3.3463157894736846e-06, "loss": 0.0017, "step": 6825 }, { "epoch": 5.792811839323467, "grad_norm": 0.03372509405016899, "learning_rate": 3.3200000000000004e-06, "loss": 0.0028, "step": 6850 }, { "epoch": 5.813953488372093, "grad_norm": 0.04323631897568703, "learning_rate": 3.2936842105263162e-06, "loss": 0.0015, "step": 6875 }, { "epoch": 5.835095137420719, "grad_norm": 0.6991068720817566, "learning_rate": 3.267368421052632e-06, "loss": 0.0027, "step": 6900 }, { "epoch": 5.856236786469345, "grad_norm": 0.12359145283699036, "learning_rate": 3.2410526315789474e-06, "loss": 0.0039, "step": 6925 }, { "epoch": 5.87737843551797, "grad_norm": 1.7748690843582153, "learning_rate": 3.2147368421052633e-06, "loss": 0.0021, "step": 6950 }, { "epoch": 5.898520084566596, "grad_norm": 0.050521768629550934, "learning_rate": 3.188421052631579e-06, "loss": 0.0012, "step": 6975 }, { "epoch": 5.9196617336152215, "grad_norm": 0.33487266302108765, "learning_rate": 3.1621052631578953e-06, "loss": 0.0013, "step": 7000 }, { "epoch": 5.9196617336152215, "eval_loss": 0.05544720217585564, "eval_runtime": 757.6669, "eval_samples_per_second": 4.806, "eval_steps_per_second": 0.301, "eval_wer": 0.038563274397883894, "step": 7000 }, { "epoch": 5.940803382663848, "grad_norm": 1.779146671295166, "learning_rate": 3.135789473684211e-06, "loss": 0.0046, "step": 7025 }, { "epoch": 5.961945031712474, "grad_norm": 0.150315061211586, "learning_rate": 3.1094736842105265e-06, "loss": 0.0016, "step": 7050 }, { "epoch": 5.983086680761099, "grad_norm": 1.2545819282531738, "learning_rate": 3.0831578947368423e-06, "loss": 0.0023, "step": 7075 }, { "epoch": 6.004228329809725, "grad_norm": 0.5065405368804932, "learning_rate": 3.056842105263158e-06, "loss": 0.0017, "step": 7100 }, { "epoch": 6.025369978858351, "grad_norm": 0.10508285462856293, "learning_rate": 3.0305263157894736e-06, "loss": 0.0015, "step": 7125 }, { "epoch": 6.046511627906977, "grad_norm": 0.020149648189544678, "learning_rate": 3.0042105263157894e-06, "loss": 0.001, "step": 7150 }, { "epoch": 6.067653276955602, "grad_norm": 0.021365733817219734, "learning_rate": 2.9778947368421056e-06, "loss": 0.0017, "step": 7175 }, { "epoch": 6.088794926004228, "grad_norm": 0.026428379118442535, "learning_rate": 2.9515789473684214e-06, "loss": 0.0014, "step": 7200 }, { "epoch": 6.1099365750528545, "grad_norm": 0.03086891956627369, "learning_rate": 2.9252631578947373e-06, "loss": 0.0011, "step": 7225 }, { "epoch": 6.13107822410148, "grad_norm": 1.588483452796936, "learning_rate": 2.8989473684210526e-06, "loss": 0.0019, "step": 7250 }, { "epoch": 6.152219873150106, "grad_norm": 1.1269233226776123, "learning_rate": 2.8726315789473685e-06, "loss": 0.0006, "step": 7275 }, { "epoch": 6.173361522198731, "grad_norm": 0.06000743806362152, "learning_rate": 2.8463157894736843e-06, "loss": 0.0008, "step": 7300 }, { "epoch": 6.194503171247358, "grad_norm": 0.022325266152620316, "learning_rate": 2.82e-06, "loss": 0.0008, "step": 7325 }, { "epoch": 6.215644820295983, "grad_norm": 0.023885082453489304, "learning_rate": 2.7936842105263163e-06, "loss": 0.0012, "step": 7350 }, { "epoch": 6.236786469344609, "grad_norm": 0.01704108528792858, "learning_rate": 2.7673684210526317e-06, "loss": 0.0009, "step": 7375 }, { "epoch": 6.2579281183932345, "grad_norm": 0.23580221831798553, "learning_rate": 2.7410526315789476e-06, "loss": 0.0012, "step": 7400 }, { "epoch": 6.27906976744186, "grad_norm": 0.018755685538053513, "learning_rate": 2.7147368421052634e-06, "loss": 0.0015, "step": 7425 }, { "epoch": 6.3002114164904865, "grad_norm": 0.039177898317575455, "learning_rate": 2.688421052631579e-06, "loss": 0.0008, "step": 7450 }, { "epoch": 6.321353065539112, "grad_norm": 1.1837704181671143, "learning_rate": 2.6621052631578946e-06, "loss": 0.0012, "step": 7475 }, { "epoch": 6.342494714587738, "grad_norm": 0.07270597666501999, "learning_rate": 2.635789473684211e-06, "loss": 0.0006, "step": 7500 }, { "epoch": 6.363636363636363, "grad_norm": 0.014915907755494118, "learning_rate": 2.6094736842105267e-06, "loss": 0.0011, "step": 7525 }, { "epoch": 6.38477801268499, "grad_norm": 0.020498577505350113, "learning_rate": 2.5831578947368425e-06, "loss": 0.0013, "step": 7550 }, { "epoch": 6.405919661733615, "grad_norm": 0.11151342839002609, "learning_rate": 2.5568421052631583e-06, "loss": 0.0014, "step": 7575 }, { "epoch": 6.427061310782241, "grad_norm": 0.07982934266328812, "learning_rate": 2.5305263157894737e-06, "loss": 0.001, "step": 7600 }, { "epoch": 6.4482029598308666, "grad_norm": 0.035042643547058105, "learning_rate": 2.5042105263157895e-06, "loss": 0.0009, "step": 7625 }, { "epoch": 6.469344608879492, "grad_norm": 0.016940327361226082, "learning_rate": 2.4778947368421053e-06, "loss": 0.002, "step": 7650 }, { "epoch": 6.490486257928119, "grad_norm": 0.017380723729729652, "learning_rate": 2.451578947368421e-06, "loss": 0.0006, "step": 7675 }, { "epoch": 6.511627906976744, "grad_norm": 0.028508609160780907, "learning_rate": 2.425263157894737e-06, "loss": 0.0018, "step": 7700 }, { "epoch": 6.53276955602537, "grad_norm": 0.020123794674873352, "learning_rate": 2.3989473684210528e-06, "loss": 0.0017, "step": 7725 }, { "epoch": 6.553911205073996, "grad_norm": 0.06142396479845047, "learning_rate": 2.3726315789473686e-06, "loss": 0.0016, "step": 7750 }, { "epoch": 6.575052854122622, "grad_norm": 0.02878367342054844, "learning_rate": 2.3463157894736844e-06, "loss": 0.0012, "step": 7775 }, { "epoch": 6.5961945031712474, "grad_norm": 0.020016765221953392, "learning_rate": 2.3200000000000002e-06, "loss": 0.0023, "step": 7800 }, { "epoch": 6.617336152219873, "grad_norm": 0.02607109770178795, "learning_rate": 2.293684210526316e-06, "loss": 0.0012, "step": 7825 }, { "epoch": 6.638477801268499, "grad_norm": 0.030171332880854607, "learning_rate": 2.267368421052632e-06, "loss": 0.0016, "step": 7850 }, { "epoch": 6.659619450317125, "grad_norm": 0.02076024003326893, "learning_rate": 2.2410526315789473e-06, "loss": 0.0006, "step": 7875 }, { "epoch": 6.680761099365751, "grad_norm": 0.014649393036961555, "learning_rate": 2.2147368421052635e-06, "loss": 0.0017, "step": 7900 }, { "epoch": 6.701902748414376, "grad_norm": 0.034021761268377304, "learning_rate": 2.188421052631579e-06, "loss": 0.0014, "step": 7925 }, { "epoch": 6.723044397463002, "grad_norm": 0.01666625402867794, "learning_rate": 2.1621052631578947e-06, "loss": 0.0017, "step": 7950 }, { "epoch": 6.7441860465116275, "grad_norm": 0.02546251006424427, "learning_rate": 2.135789473684211e-06, "loss": 0.0005, "step": 7975 }, { "epoch": 6.765327695560254, "grad_norm": 0.39473649859428406, "learning_rate": 2.1094736842105264e-06, "loss": 0.002, "step": 8000 }, { "epoch": 6.765327695560254, "eval_loss": 0.055500857532024384, "eval_runtime": 764.4429, "eval_samples_per_second": 4.763, "eval_steps_per_second": 0.298, "eval_wer": 0.04255417884820641, "step": 8000 }, { "epoch": 6.7864693446088795, "grad_norm": 0.03211165964603424, "learning_rate": 2.083157894736842e-06, "loss": 0.0018, "step": 8025 }, { "epoch": 6.807610993657505, "grad_norm": 0.01745191030204296, "learning_rate": 2.056842105263158e-06, "loss": 0.0016, "step": 8050 }, { "epoch": 6.828752642706131, "grad_norm": 0.020556088536977768, "learning_rate": 2.030526315789474e-06, "loss": 0.0003, "step": 8075 }, { "epoch": 6.849894291754757, "grad_norm": 0.01723705790936947, "learning_rate": 2.0042105263157896e-06, "loss": 0.0008, "step": 8100 }, { "epoch": 6.871035940803383, "grad_norm": 0.02272706665098667, "learning_rate": 1.9778947368421055e-06, "loss": 0.0018, "step": 8125 }, { "epoch": 6.892177589852008, "grad_norm": 0.015349403955042362, "learning_rate": 1.9515789473684213e-06, "loss": 0.0005, "step": 8150 }, { "epoch": 6.913319238900634, "grad_norm": 0.9683336019515991, "learning_rate": 1.925263157894737e-06, "loss": 0.0008, "step": 8175 }, { "epoch": 6.93446088794926, "grad_norm": 0.02504642680287361, "learning_rate": 1.8989473684210527e-06, "loss": 0.0022, "step": 8200 }, { "epoch": 6.955602536997886, "grad_norm": 0.013240883126854897, "learning_rate": 1.8726315789473687e-06, "loss": 0.0004, "step": 8225 }, { "epoch": 6.976744186046512, "grad_norm": 0.10656526684761047, "learning_rate": 1.8463157894736843e-06, "loss": 0.0008, "step": 8250 }, { "epoch": 6.997885835095137, "grad_norm": 0.02900248020887375, "learning_rate": 1.8200000000000002e-06, "loss": 0.0015, "step": 8275 }, { "epoch": 7.019027484143764, "grad_norm": 0.01494303997606039, "learning_rate": 1.7936842105263158e-06, "loss": 0.0007, "step": 8300 }, { "epoch": 7.040169133192389, "grad_norm": 0.011092742905020714, "learning_rate": 1.7673684210526318e-06, "loss": 0.0006, "step": 8325 }, { "epoch": 7.061310782241015, "grad_norm": 0.00914891716092825, "learning_rate": 1.7410526315789474e-06, "loss": 0.0005, "step": 8350 }, { "epoch": 7.08245243128964, "grad_norm": 0.01832897588610649, "learning_rate": 1.7147368421052632e-06, "loss": 0.0002, "step": 8375 }, { "epoch": 7.103594080338266, "grad_norm": 0.016141528263688087, "learning_rate": 1.6884210526315792e-06, "loss": 0.0004, "step": 8400 }, { "epoch": 7.1247357293868925, "grad_norm": 0.01363268494606018, "learning_rate": 1.6621052631578948e-06, "loss": 0.0006, "step": 8425 }, { "epoch": 7.145877378435518, "grad_norm": 0.7340702414512634, "learning_rate": 1.6357894736842107e-06, "loss": 0.0008, "step": 8450 }, { "epoch": 7.167019027484144, "grad_norm": 0.01294713094830513, "learning_rate": 1.6094736842105265e-06, "loss": 0.0008, "step": 8475 }, { "epoch": 7.188160676532769, "grad_norm": 0.009142986498773098, "learning_rate": 1.5831578947368423e-06, "loss": 0.0003, "step": 8500 }, { "epoch": 7.209302325581396, "grad_norm": 0.017382999882102013, "learning_rate": 1.556842105263158e-06, "loss": 0.0009, "step": 8525 }, { "epoch": 7.230443974630021, "grad_norm": 0.014677566476166248, "learning_rate": 1.5305263157894737e-06, "loss": 0.0002, "step": 8550 }, { "epoch": 7.251585623678647, "grad_norm": 0.9460182785987854, "learning_rate": 1.5042105263157898e-06, "loss": 0.0014, "step": 8575 }, { "epoch": 7.2727272727272725, "grad_norm": 0.9913358688354492, "learning_rate": 1.4778947368421054e-06, "loss": 0.0009, "step": 8600 }, { "epoch": 7.293868921775898, "grad_norm": 0.017043571919202805, "learning_rate": 1.4515789473684212e-06, "loss": 0.0007, "step": 8625 }, { "epoch": 7.3150105708245245, "grad_norm": 0.010510086081922054, "learning_rate": 1.425263157894737e-06, "loss": 0.0004, "step": 8650 }, { "epoch": 7.33615221987315, "grad_norm": 0.015979068353772163, "learning_rate": 1.3989473684210528e-06, "loss": 0.0004, "step": 8675 }, { "epoch": 7.357293868921776, "grad_norm": 0.017196275293827057, "learning_rate": 1.3726315789473684e-06, "loss": 0.0005, "step": 8700 }, { "epoch": 7.378435517970401, "grad_norm": 0.009500819258391857, "learning_rate": 1.3463157894736842e-06, "loss": 0.0002, "step": 8725 }, { "epoch": 7.399577167019028, "grad_norm": 0.019156746566295624, "learning_rate": 1.32e-06, "loss": 0.0016, "step": 8750 }, { "epoch": 7.420718816067653, "grad_norm": 0.010634716600179672, "learning_rate": 1.2936842105263159e-06, "loss": 0.0012, "step": 8775 }, { "epoch": 7.441860465116279, "grad_norm": 0.02140488103032112, "learning_rate": 1.2673684210526315e-06, "loss": 0.0006, "step": 8800 }, { "epoch": 7.4630021141649046, "grad_norm": 0.01485748216509819, "learning_rate": 1.2410526315789475e-06, "loss": 0.0006, "step": 8825 }, { "epoch": 7.484143763213531, "grad_norm": 0.015150833874940872, "learning_rate": 1.2147368421052633e-06, "loss": 0.0009, "step": 8850 }, { "epoch": 7.505285412262157, "grad_norm": 0.018471376970410347, "learning_rate": 1.188421052631579e-06, "loss": 0.0006, "step": 8875 }, { "epoch": 7.526427061310782, "grad_norm": 0.011308133602142334, "learning_rate": 1.1621052631578948e-06, "loss": 0.0005, "step": 8900 }, { "epoch": 7.547568710359408, "grad_norm": 0.01056403573602438, "learning_rate": 1.1357894736842106e-06, "loss": 0.0014, "step": 8925 }, { "epoch": 7.568710359408033, "grad_norm": 0.013321136124432087, "learning_rate": 1.1094736842105264e-06, "loss": 0.0009, "step": 8950 }, { "epoch": 7.58985200845666, "grad_norm": 0.01472330279648304, "learning_rate": 1.0831578947368422e-06, "loss": 0.0005, "step": 8975 }, { "epoch": 7.6109936575052854, "grad_norm": 0.01450197771191597, "learning_rate": 1.0568421052631578e-06, "loss": 0.0002, "step": 9000 }, { "epoch": 7.6109936575052854, "eval_loss": 0.05712108314037323, "eval_runtime": 762.8356, "eval_samples_per_second": 4.773, "eval_steps_per_second": 0.299, "eval_wer": 0.042136526056893595, "step": 9000 }, { "epoch": 7.632135306553911, "grad_norm": 0.014398843050003052, "learning_rate": 1.0305263157894739e-06, "loss": 0.0006, "step": 9025 }, { "epoch": 7.653276955602537, "grad_norm": 0.035577815026044846, "learning_rate": 1.0042105263157897e-06, "loss": 0.0004, "step": 9050 }, { "epoch": 7.674418604651163, "grad_norm": 0.012484509497880936, "learning_rate": 9.778947368421053e-07, "loss": 0.0003, "step": 9075 }, { "epoch": 7.695560253699789, "grad_norm": 0.015736937522888184, "learning_rate": 9.515789473684212e-07, "loss": 0.0013, "step": 9100 }, { "epoch": 7.716701902748414, "grad_norm": 0.02214883267879486, "learning_rate": 9.252631578947368e-07, "loss": 0.0008, "step": 9125 }, { "epoch": 7.73784355179704, "grad_norm": 0.015202338807284832, "learning_rate": 8.989473684210527e-07, "loss": 0.0008, "step": 9150 }, { "epoch": 7.758985200845666, "grad_norm": 0.01783289574086666, "learning_rate": 8.726315789473686e-07, "loss": 0.0002, "step": 9175 }, { "epoch": 7.780126849894292, "grad_norm": 0.01469349954277277, "learning_rate": 8.463157894736843e-07, "loss": 0.0007, "step": 9200 }, { "epoch": 7.8012684989429175, "grad_norm": 0.014686803333461285, "learning_rate": 8.200000000000001e-07, "loss": 0.0005, "step": 9225 }, { "epoch": 7.822410147991543, "grad_norm": 0.012407947331666946, "learning_rate": 7.936842105263158e-07, "loss": 0.0004, "step": 9250 }, { "epoch": 7.843551797040169, "grad_norm": 0.12860046327114105, "learning_rate": 7.673684210526316e-07, "loss": 0.0002, "step": 9275 }, { "epoch": 7.864693446088795, "grad_norm": 0.007339488714933395, "learning_rate": 7.410526315789475e-07, "loss": 0.0007, "step": 9300 }, { "epoch": 7.885835095137421, "grad_norm": 0.10270128399133682, "learning_rate": 7.147368421052632e-07, "loss": 0.0007, "step": 9325 }, { "epoch": 7.906976744186046, "grad_norm": 0.010488270781934261, "learning_rate": 6.884210526315791e-07, "loss": 0.0013, "step": 9350 }, { "epoch": 7.928118393234672, "grad_norm": 0.015831220895051956, "learning_rate": 6.621052631578948e-07, "loss": 0.0002, "step": 9375 }, { "epoch": 7.949260042283298, "grad_norm": 0.0610765777528286, "learning_rate": 6.357894736842106e-07, "loss": 0.0005, "step": 9400 }, { "epoch": 7.970401691331924, "grad_norm": 0.01717539131641388, "learning_rate": 6.094736842105263e-07, "loss": 0.0002, "step": 9425 }, { "epoch": 7.99154334038055, "grad_norm": 0.016577402129769325, "learning_rate": 5.831578947368421e-07, "loss": 0.0009, "step": 9450 }, { "epoch": 8.012684989429175, "grad_norm": 0.007952416315674782, "learning_rate": 5.56842105263158e-07, "loss": 0.0005, "step": 9475 }, { "epoch": 8.033826638477802, "grad_norm": 0.0123605253174901, "learning_rate": 5.305263157894737e-07, "loss": 0.0002, "step": 9500 }, { "epoch": 8.054968287526426, "grad_norm": 0.009480569511651993, "learning_rate": 5.042105263157895e-07, "loss": 0.0002, "step": 9525 }, { "epoch": 8.076109936575053, "grad_norm": 0.7348806262016296, "learning_rate": 4.778947368421053e-07, "loss": 0.0003, "step": 9550 }, { "epoch": 8.09725158562368, "grad_norm": 0.011551867239177227, "learning_rate": 4.5157894736842107e-07, "loss": 0.0002, "step": 9575 }, { "epoch": 8.118393234672304, "grad_norm": 0.010176840238273144, "learning_rate": 4.2526315789473684e-07, "loss": 0.0002, "step": 9600 }, { "epoch": 8.13953488372093, "grad_norm": 0.021270159631967545, "learning_rate": 3.9894736842105266e-07, "loss": 0.0004, "step": 9625 }, { "epoch": 8.160676532769555, "grad_norm": 0.006264516618102789, "learning_rate": 3.726315789473685e-07, "loss": 0.0003, "step": 9650 }, { "epoch": 8.181818181818182, "grad_norm": 0.013031111098825932, "learning_rate": 3.4631578947368424e-07, "loss": 0.0005, "step": 9675 }, { "epoch": 8.202959830866808, "grad_norm": 0.011172004975378513, "learning_rate": 3.2e-07, "loss": 0.0002, "step": 9700 }, { "epoch": 8.224101479915433, "grad_norm": 0.01253934670239687, "learning_rate": 2.936842105263158e-07, "loss": 0.0006, "step": 9725 }, { "epoch": 8.24524312896406, "grad_norm": 0.012547343969345093, "learning_rate": 2.6736842105263164e-07, "loss": 0.0002, "step": 9750 }, { "epoch": 8.266384778012686, "grad_norm": 0.011723175644874573, "learning_rate": 2.410526315789474e-07, "loss": 0.0003, "step": 9775 }, { "epoch": 8.28752642706131, "grad_norm": 0.014090826734900475, "learning_rate": 2.1473684210526317e-07, "loss": 0.0002, "step": 9800 }, { "epoch": 8.308668076109937, "grad_norm": 0.011757822707295418, "learning_rate": 1.8842105263157897e-07, "loss": 0.0002, "step": 9825 }, { "epoch": 8.329809725158562, "grad_norm": 0.014979742467403412, "learning_rate": 1.6210526315789476e-07, "loss": 0.0008, "step": 9850 }, { "epoch": 8.350951374207188, "grad_norm": 0.01544391643255949, "learning_rate": 1.3578947368421055e-07, "loss": 0.0004, "step": 9875 }, { "epoch": 8.372093023255815, "grad_norm": 0.009578757919371128, "learning_rate": 1.0947368421052632e-07, "loss": 0.0002, "step": 9900 }, { "epoch": 8.39323467230444, "grad_norm": 0.007570895832031965, "learning_rate": 8.315789473684211e-08, "loss": 0.0003, "step": 9925 }, { "epoch": 8.414376321353066, "grad_norm": 0.014304804615676403, "learning_rate": 5.68421052631579e-08, "loss": 0.0008, "step": 9950 }, { "epoch": 8.43551797040169, "grad_norm": 0.9690393209457397, "learning_rate": 3.0526315789473686e-08, "loss": 0.0004, "step": 9975 }, { "epoch": 8.456659619450317, "grad_norm": 0.025486772879958153, "learning_rate": 4.210526315789474e-09, "loss": 0.0005, "step": 10000 }, { "epoch": 8.456659619450317, "eval_loss": 0.057420697063207626, "eval_runtime": 765.3675, "eval_samples_per_second": 4.757, "eval_steps_per_second": 0.298, "eval_wer": 0.04241496125110214, "step": 10000 }, { "epoch": 8.456659619450317, "step": 10000, "total_flos": 3.264874274960179e+20, "train_loss": 0.03444647903675213, "train_runtime": 65926.1076, "train_samples_per_second": 4.854, "train_steps_per_second": 0.152 } ], "logging_steps": 25, "max_steps": 10000, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.264874274960179e+20, "train_batch_size": 16, "trial_name": null, "trial_params": null }