|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 14800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.6375e-06, |
|
"loss": 19.2806, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 7.3875e-06, |
|
"loss": 10.959, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 1.1137499999999998e-05, |
|
"loss": 6.9106, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.48875e-05, |
|
"loss": 5.624, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.86375e-05, |
|
"loss": 4.6095, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_loss": 4.5881452560424805, |
|
"eval_runtime": 92.8763, |
|
"eval_samples_per_second": 22.557, |
|
"eval_steps_per_second": 0.711, |
|
"eval_wer": 0.9999453103636861, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 2.23875e-05, |
|
"loss": 3.9225, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.6137499999999995e-05, |
|
"loss": 3.5503, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 2.9887499999999998e-05, |
|
"loss": 3.4262, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 3.36375e-05, |
|
"loss": 3.391, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 3.7387499999999994e-05, |
|
"loss": 3.3396, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"eval_loss": 3.3301022052764893, |
|
"eval_runtime": 93.143, |
|
"eval_samples_per_second": 22.492, |
|
"eval_steps_per_second": 0.709, |
|
"eval_wer": 1.0000546896363138, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 4.11375e-05, |
|
"loss": 3.2895, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 4.48875e-05, |
|
"loss": 3.228, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 4.8637499999999996e-05, |
|
"loss": 2.987, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 5.23875e-05, |
|
"loss": 2.4352, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 5.61375e-05, |
|
"loss": 2.0061, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"eval_loss": 1.20964777469635, |
|
"eval_runtime": 93.1759, |
|
"eval_samples_per_second": 22.484, |
|
"eval_steps_per_second": 0.708, |
|
"eval_wer": 1.0063439978124145, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 5.988749999999999e-05, |
|
"loss": 1.7983, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 6.36375e-05, |
|
"loss": 1.6684, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 12.16, |
|
"learning_rate": 6.738749999999999e-05, |
|
"loss": 1.6307, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"learning_rate": 7.11375e-05, |
|
"loss": 1.5654, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 7.48875e-05, |
|
"loss": 1.523, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"eval_loss": 0.7836298942565918, |
|
"eval_runtime": 93.1376, |
|
"eval_samples_per_second": 22.494, |
|
"eval_steps_per_second": 0.709, |
|
"eval_wer": 1.0051408258135084, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 7.4431640625e-05, |
|
"loss": 1.4937, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 14.86, |
|
"learning_rate": 7.384570312499999e-05, |
|
"loss": 1.4538, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 15.54, |
|
"learning_rate": 7.325976562499999e-05, |
|
"loss": 1.4016, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 7.2673828125e-05, |
|
"loss": 1.403, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"learning_rate": 7.2087890625e-05, |
|
"loss": 1.3868, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 16.89, |
|
"eval_loss": 0.6836622953414917, |
|
"eval_runtime": 92.9915, |
|
"eval_samples_per_second": 22.529, |
|
"eval_steps_per_second": 0.71, |
|
"eval_wer": 1.007984686901832, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 17.57, |
|
"learning_rate": 7.150195312499999e-05, |
|
"loss": 1.3313, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 7.091601562499999e-05, |
|
"loss": 1.2931, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"learning_rate": 7.0330078125e-05, |
|
"loss": 1.3319, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 19.59, |
|
"learning_rate": 6.9744140625e-05, |
|
"loss": 1.3133, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"learning_rate": 6.915820312499999e-05, |
|
"loss": 1.2807, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 20.27, |
|
"eval_loss": 0.656791090965271, |
|
"eval_runtime": 93.0318, |
|
"eval_samples_per_second": 22.519, |
|
"eval_steps_per_second": 0.709, |
|
"eval_wer": 1.0112113754443532, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 20.95, |
|
"learning_rate": 6.857226562499999e-05, |
|
"loss": 1.2626, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 21.62, |
|
"learning_rate": 6.7986328125e-05, |
|
"loss": 1.2535, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 22.3, |
|
"learning_rate": 6.7400390625e-05, |
|
"loss": 1.2297, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"learning_rate": 6.681445312499999e-05, |
|
"loss": 1.2074, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 23.65, |
|
"learning_rate": 6.6234375e-05, |
|
"loss": 1.231, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 23.65, |
|
"eval_loss": 0.612037718296051, |
|
"eval_runtime": 93.2672, |
|
"eval_samples_per_second": 22.462, |
|
"eval_steps_per_second": 0.708, |
|
"eval_wer": 1.0105004101722723, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 6.564843749999999e-05, |
|
"loss": 1.2092, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 6.50625e-05, |
|
"loss": 1.1775, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 25.68, |
|
"learning_rate": 6.44765625e-05, |
|
"loss": 1.1916, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 26.35, |
|
"learning_rate": 6.3890625e-05, |
|
"loss": 1.1798, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"learning_rate": 6.330468749999999e-05, |
|
"loss": 1.1673, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"eval_loss": 0.5971782207489014, |
|
"eval_runtime": 93.4057, |
|
"eval_samples_per_second": 22.429, |
|
"eval_steps_per_second": 0.707, |
|
"eval_wer": 1.0089144107191688, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 27.7, |
|
"learning_rate": 6.271875e-05, |
|
"loss": 1.1626, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 28.38, |
|
"learning_rate": 6.21328125e-05, |
|
"loss": 1.162, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 29.05, |
|
"learning_rate": 6.1546875e-05, |
|
"loss": 1.1471, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 29.73, |
|
"learning_rate": 6.09609375e-05, |
|
"loss": 1.1203, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 30.41, |
|
"learning_rate": 6.0375e-05, |
|
"loss": 1.1416, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 30.41, |
|
"eval_loss": 0.5780259966850281, |
|
"eval_runtime": 92.6708, |
|
"eval_samples_per_second": 22.607, |
|
"eval_steps_per_second": 0.712, |
|
"eval_wer": 1.0131802023516543, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 31.08, |
|
"learning_rate": 5.97890625e-05, |
|
"loss": 1.1175, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 31.76, |
|
"learning_rate": 5.9203125e-05, |
|
"loss": 1.1053, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 32.43, |
|
"learning_rate": 5.86171875e-05, |
|
"loss": 1.107, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 33.11, |
|
"learning_rate": 5.803125e-05, |
|
"loss": 1.115, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 33.78, |
|
"learning_rate": 5.74453125e-05, |
|
"loss": 1.0738, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 33.78, |
|
"eval_loss": 0.580643892288208, |
|
"eval_runtime": 95.0733, |
|
"eval_samples_per_second": 22.036, |
|
"eval_steps_per_second": 0.694, |
|
"eval_wer": 1.0123051681706317, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 34.46, |
|
"learning_rate": 5.686523437499999e-05, |
|
"loss": 1.0778, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 35.14, |
|
"learning_rate": 5.6279296874999994e-05, |
|
"loss": 1.0948, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 35.81, |
|
"learning_rate": 5.569335937499999e-05, |
|
"loss": 1.062, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 36.49, |
|
"learning_rate": 5.5107421874999994e-05, |
|
"loss": 1.0503, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"learning_rate": 5.452148437499999e-05, |
|
"loss": 1.0771, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 37.16, |
|
"eval_loss": 0.5585715174674988, |
|
"eval_runtime": 92.7751, |
|
"eval_samples_per_second": 22.581, |
|
"eval_steps_per_second": 0.711, |
|
"eval_wer": 1.0066721356302981, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 37.84, |
|
"learning_rate": 5.3935546874999995e-05, |
|
"loss": 1.0368, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 38.51, |
|
"learning_rate": 5.334960937499999e-05, |
|
"loss": 1.0329, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 39.19, |
|
"learning_rate": 5.2763671874999995e-05, |
|
"loss": 1.0402, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 39.86, |
|
"learning_rate": 5.217773437499999e-05, |
|
"loss": 1.0346, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 40.54, |
|
"learning_rate": 5.1591796874999995e-05, |
|
"loss": 1.0287, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 40.54, |
|
"eval_loss": 0.5463963747024536, |
|
"eval_runtime": 92.708, |
|
"eval_samples_per_second": 22.598, |
|
"eval_steps_per_second": 0.712, |
|
"eval_wer": 1.0057971014492753, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 41.22, |
|
"learning_rate": 5.100585937499999e-05, |
|
"loss": 1.0196, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 41.89, |
|
"learning_rate": 5.0419921874999995e-05, |
|
"loss": 1.0198, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 42.57, |
|
"learning_rate": 4.983398437499999e-05, |
|
"loss": 0.9864, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 43.24, |
|
"learning_rate": 4.9248046874999996e-05, |
|
"loss": 0.999, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 43.92, |
|
"learning_rate": 4.866210937499999e-05, |
|
"loss": 1.0106, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 43.92, |
|
"eval_loss": 0.5407418608665466, |
|
"eval_runtime": 92.9267, |
|
"eval_samples_per_second": 22.545, |
|
"eval_steps_per_second": 0.71, |
|
"eval_wer": 1.0061799289034727, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 44.59, |
|
"learning_rate": 4.8076171874999996e-05, |
|
"loss": 0.9816, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 45.27, |
|
"learning_rate": 4.749023437499999e-05, |
|
"loss": 0.976, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 45.95, |
|
"learning_rate": 4.6904296874999996e-05, |
|
"loss": 0.9833, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 46.62, |
|
"learning_rate": 4.631835937499999e-05, |
|
"loss": 0.9905, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 47.3, |
|
"learning_rate": 4.5732421875e-05, |
|
"loss": 0.9538, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 47.3, |
|
"eval_loss": 0.5333988070487976, |
|
"eval_runtime": 92.3771, |
|
"eval_samples_per_second": 22.679, |
|
"eval_steps_per_second": 0.714, |
|
"eval_wer": 1.0089144107191688, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 47.97, |
|
"learning_rate": 4.5146484374999993e-05, |
|
"loss": 0.9578, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 48.65, |
|
"learning_rate": 4.4560546875e-05, |
|
"loss": 0.9607, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 49.32, |
|
"learning_rate": 4.3974609374999994e-05, |
|
"loss": 0.9451, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 4.3388671875e-05, |
|
"loss": 0.9453, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 50.68, |
|
"learning_rate": 4.2802734374999994e-05, |
|
"loss": 0.9607, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 50.68, |
|
"eval_loss": 0.5395269989967346, |
|
"eval_runtime": 92.6715, |
|
"eval_samples_per_second": 22.607, |
|
"eval_steps_per_second": 0.712, |
|
"eval_wer": 1.0110473065354115, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 51.35, |
|
"learning_rate": 4.2216796875e-05, |
|
"loss": 0.9445, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 52.03, |
|
"learning_rate": 4.1630859374999994e-05, |
|
"loss": 0.9314, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 52.7, |
|
"learning_rate": 4.1044921875e-05, |
|
"loss": 0.9166, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 53.38, |
|
"learning_rate": 4.0458984374999995e-05, |
|
"loss": 0.9264, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 54.05, |
|
"learning_rate": 3.987304687499999e-05, |
|
"loss": 0.9108, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 54.05, |
|
"eval_loss": 0.5501919388771057, |
|
"eval_runtime": 92.8418, |
|
"eval_samples_per_second": 22.565, |
|
"eval_steps_per_second": 0.711, |
|
"eval_wer": 1.0137270987147935, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 54.73, |
|
"learning_rate": 3.9287109374999995e-05, |
|
"loss": 0.9215, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 55.41, |
|
"learning_rate": 3.870117187499999e-05, |
|
"loss": 0.9326, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 56.08, |
|
"learning_rate": 3.8115234374999995e-05, |
|
"loss": 0.9199, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 56.76, |
|
"learning_rate": 3.752929687499999e-05, |
|
"loss": 0.9043, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 57.43, |
|
"learning_rate": 3.6943359374999996e-05, |
|
"loss": 0.9252, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 57.43, |
|
"eval_loss": 0.5498473644256592, |
|
"eval_runtime": 92.009, |
|
"eval_samples_per_second": 22.77, |
|
"eval_steps_per_second": 0.717, |
|
"eval_wer": 1.0062346185397868, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 58.11, |
|
"learning_rate": 3.635742187499999e-05, |
|
"loss": 0.9041, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 58.78, |
|
"learning_rate": 3.5771484374999996e-05, |
|
"loss": 0.8963, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 59.46, |
|
"learning_rate": 3.518554687499999e-05, |
|
"loss": 0.901, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 60.14, |
|
"learning_rate": 3.4599609374999996e-05, |
|
"loss": 0.8947, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 60.81, |
|
"learning_rate": 3.401367187499999e-05, |
|
"loss": 0.8943, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 60.81, |
|
"eval_loss": 0.5447660684585571, |
|
"eval_runtime": 92.1905, |
|
"eval_samples_per_second": 22.725, |
|
"eval_steps_per_second": 0.716, |
|
"eval_wer": 1.0158053048947224, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 61.49, |
|
"learning_rate": 3.3427734375e-05, |
|
"loss": 0.875, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 62.16, |
|
"learning_rate": 3.284765625e-05, |
|
"loss": 0.8961, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 62.84, |
|
"learning_rate": 3.2261718749999996e-05, |
|
"loss": 0.8848, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 63.51, |
|
"learning_rate": 3.167578125e-05, |
|
"loss": 0.8573, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 64.19, |
|
"learning_rate": 3.1089843749999996e-05, |
|
"loss": 0.8728, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 64.19, |
|
"eval_loss": 0.5256930589675903, |
|
"eval_runtime": 92.3173, |
|
"eval_samples_per_second": 22.693, |
|
"eval_steps_per_second": 0.715, |
|
"eval_wer": 1.0113207547169811, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 64.86, |
|
"learning_rate": 3.050390625e-05, |
|
"loss": 0.8819, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 65.54, |
|
"learning_rate": 2.991796875e-05, |
|
"loss": 0.8324, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 66.22, |
|
"learning_rate": 2.933203125e-05, |
|
"loss": 0.8461, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 66.89, |
|
"learning_rate": 2.874609375e-05, |
|
"loss": 0.8512, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 67.57, |
|
"learning_rate": 2.816015625e-05, |
|
"loss": 0.8577, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 67.57, |
|
"eval_loss": 0.554978609085083, |
|
"eval_runtime": 92.3777, |
|
"eval_samples_per_second": 22.679, |
|
"eval_steps_per_second": 0.714, |
|
"eval_wer": 1.0177741318020235, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 68.24, |
|
"learning_rate": 2.7574218749999997e-05, |
|
"loss": 0.845, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 68.92, |
|
"learning_rate": 2.6988281249999997e-05, |
|
"loss": 0.8622, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 69.59, |
|
"learning_rate": 2.6402343749999998e-05, |
|
"loss": 0.8534, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 70.27, |
|
"learning_rate": 2.5816406249999998e-05, |
|
"loss": 0.8328, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 70.95, |
|
"learning_rate": 2.5230468749999998e-05, |
|
"loss": 0.8332, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 70.95, |
|
"eval_loss": 0.5607455968856812, |
|
"eval_runtime": 91.9364, |
|
"eval_samples_per_second": 22.787, |
|
"eval_steps_per_second": 0.718, |
|
"eval_wer": 1.0165709598031174, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 71.62, |
|
"learning_rate": 2.4644531249999998e-05, |
|
"loss": 0.8389, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 72.3, |
|
"learning_rate": 2.4058593749999998e-05, |
|
"loss": 0.8225, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 72.97, |
|
"learning_rate": 2.347265625e-05, |
|
"loss": 0.8141, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 73.65, |
|
"learning_rate": 2.288671875e-05, |
|
"loss": 0.8215, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 74.32, |
|
"learning_rate": 2.230078125e-05, |
|
"loss": 0.8174, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 74.32, |
|
"eval_loss": 0.5428524613380432, |
|
"eval_runtime": 94.2641, |
|
"eval_samples_per_second": 22.225, |
|
"eval_steps_per_second": 0.7, |
|
"eval_wer": 1.0145474432595023, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 2.171484375e-05, |
|
"loss": 0.7876, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 75.68, |
|
"learning_rate": 2.112890625e-05, |
|
"loss": 0.8226, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 76.35, |
|
"learning_rate": 2.054296875e-05, |
|
"loss": 0.8177, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 77.03, |
|
"learning_rate": 1.995703125e-05, |
|
"loss": 0.7941, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 77.7, |
|
"learning_rate": 1.937109375e-05, |
|
"loss": 0.8168, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 77.7, |
|
"eval_loss": 0.5561283230781555, |
|
"eval_runtime": 94.9459, |
|
"eval_samples_per_second": 22.065, |
|
"eval_steps_per_second": 0.695, |
|
"eval_wer": 1.0116488925348646, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 78.38, |
|
"learning_rate": 1.878515625e-05, |
|
"loss": 0.8095, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 79.05, |
|
"learning_rate": 1.8199218749999996e-05, |
|
"loss": 0.791, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 79.73, |
|
"learning_rate": 1.7613281249999997e-05, |
|
"loss": 0.7812, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 80.41, |
|
"learning_rate": 1.7033203125e-05, |
|
"loss": 0.8103, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 81.08, |
|
"learning_rate": 1.6447265625e-05, |
|
"loss": 0.7872, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 81.08, |
|
"eval_loss": 0.5477647185325623, |
|
"eval_runtime": 95.0024, |
|
"eval_samples_per_second": 22.052, |
|
"eval_steps_per_second": 0.695, |
|
"eval_wer": 1.0163522012578616, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 81.76, |
|
"learning_rate": 1.5861328125e-05, |
|
"loss": 0.7658, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 82.43, |
|
"learning_rate": 1.5275390625e-05, |
|
"loss": 0.7891, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 83.11, |
|
"learning_rate": 1.4689453124999998e-05, |
|
"loss": 0.7723, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 83.78, |
|
"learning_rate": 1.4103515624999998e-05, |
|
"loss": 0.7773, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 84.46, |
|
"learning_rate": 1.3517578124999998e-05, |
|
"loss": 0.7707, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 84.46, |
|
"eval_loss": 0.5412248373031616, |
|
"eval_runtime": 95.413, |
|
"eval_samples_per_second": 21.957, |
|
"eval_steps_per_second": 0.692, |
|
"eval_wer": 1.021602406343998, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 85.14, |
|
"learning_rate": 1.2931640624999999e-05, |
|
"loss": 0.7876, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 85.81, |
|
"learning_rate": 1.2345703124999999e-05, |
|
"loss": 0.7707, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 86.49, |
|
"learning_rate": 1.1759765624999999e-05, |
|
"loss": 0.7654, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 87.16, |
|
"learning_rate": 1.1173828124999999e-05, |
|
"loss": 0.77, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 87.84, |
|
"learning_rate": 1.0587890625e-05, |
|
"loss": 0.7742, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 87.84, |
|
"eval_loss": 0.5391495227813721, |
|
"eval_runtime": 95.0784, |
|
"eval_samples_per_second": 22.034, |
|
"eval_steps_per_second": 0.694, |
|
"eval_wer": 1.0206726825266612, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 88.51, |
|
"learning_rate": 1.0007812499999998e-05, |
|
"loss": 0.7534, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 89.19, |
|
"learning_rate": 9.421874999999999e-06, |
|
"loss": 0.7765, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 89.86, |
|
"learning_rate": 8.835937499999999e-06, |
|
"loss": 0.7598, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 90.54, |
|
"learning_rate": 8.249999999999999e-06, |
|
"loss": 0.7564, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 91.22, |
|
"learning_rate": 7.664062499999999e-06, |
|
"loss": 0.7594, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 91.22, |
|
"eval_loss": 0.5379434823989868, |
|
"eval_runtime": 95.2181, |
|
"eval_samples_per_second": 22.002, |
|
"eval_steps_per_second": 0.693, |
|
"eval_wer": 1.020836751435603, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 91.89, |
|
"learning_rate": 7.078124999999999e-06, |
|
"loss": 0.7739, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 92.57, |
|
"learning_rate": 6.492187499999999e-06, |
|
"loss": 0.7646, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 93.24, |
|
"learning_rate": 5.9062499999999996e-06, |
|
"loss": 0.7321, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 93.92, |
|
"learning_rate": 5.3203125e-06, |
|
"loss": 0.7754, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 94.59, |
|
"learning_rate": 4.734375e-06, |
|
"loss": 0.7678, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 94.59, |
|
"eval_loss": 0.541484534740448, |
|
"eval_runtime": 92.5358, |
|
"eval_samples_per_second": 22.64, |
|
"eval_steps_per_second": 0.713, |
|
"eval_wer": 1.0197976483456386, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 95.27, |
|
"learning_rate": 4.1484375e-06, |
|
"loss": 0.7499, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 95.95, |
|
"learning_rate": 3.5624999999999998e-06, |
|
"loss": 0.7511, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 96.62, |
|
"learning_rate": 2.9765625e-06, |
|
"loss": 0.7652, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 97.3, |
|
"learning_rate": 2.3906249999999997e-06, |
|
"loss": 0.7596, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 97.97, |
|
"learning_rate": 1.8046874999999998e-06, |
|
"loss": 0.7502, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 97.97, |
|
"eval_loss": 0.5409459471702576, |
|
"eval_runtime": 94.0294, |
|
"eval_samples_per_second": 22.28, |
|
"eval_steps_per_second": 0.702, |
|
"eval_wer": 1.0191413727098715, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 98.65, |
|
"learning_rate": 1.21875e-06, |
|
"loss": 0.7587, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 99.32, |
|
"learning_rate": 6.328125e-07, |
|
"loss": 0.7614, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 4.6874999999999995e-08, |
|
"loss": 0.7354, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 14800, |
|
"total_flos": 6.011583745907916e+19, |
|
"train_loss": 1.4159005551724821, |
|
"train_runtime": 24218.0743, |
|
"train_samples_per_second": 19.452, |
|
"train_steps_per_second": 0.611 |
|
} |
|
], |
|
"max_steps": 14800, |
|
"num_train_epochs": 100, |
|
"total_flos": 6.011583745907916e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|