|
{ |
|
"best_metric": 51.77491557370612, |
|
"best_model_checkpoint": "./whisper-small-taiwanese/checkpoint-2000", |
|
"epoch": 0.8, |
|
"eval_steps": 1000, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 202.904541015625, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 7.1597, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 52.653865814208984, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 5.416, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 32.37702178955078, |
|
"learning_rate": 1.5e-06, |
|
"loss": 3.7601, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 29.413818359375, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.7107, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 24.6357364654541, |
|
"learning_rate": 2.5e-06, |
|
"loss": 2.3941, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 27.803171157836914, |
|
"learning_rate": 3e-06, |
|
"loss": 2.21, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 25.927200317382812, |
|
"learning_rate": 3.5e-06, |
|
"loss": 2.0774, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 21.307125091552734, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 1.8006, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 17.714780807495117, |
|
"learning_rate": 4.5e-06, |
|
"loss": 1.598, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 17.01283836364746, |
|
"learning_rate": 5e-06, |
|
"loss": 1.6058, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 15.189681053161621, |
|
"learning_rate": 5.500000000000001e-06, |
|
"loss": 1.6281, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 17.34081268310547, |
|
"learning_rate": 6e-06, |
|
"loss": 1.4216, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 14.518415451049805, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 1.5345, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 14.931073188781738, |
|
"learning_rate": 7e-06, |
|
"loss": 1.4661, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 15.447633743286133, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.4971, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 15.976614952087402, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.3329, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 14.330412864685059, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.4341, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 14.005208969116211, |
|
"learning_rate": 9e-06, |
|
"loss": 1.3309, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 16.984867095947266, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.4328, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 16.296607971191406, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3831, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 14.563655853271484, |
|
"learning_rate": 9.944444444444445e-06, |
|
"loss": 1.2605, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 15.804620742797852, |
|
"learning_rate": 9.88888888888889e-06, |
|
"loss": 1.3983, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 14.773957252502441, |
|
"learning_rate": 9.833333333333333e-06, |
|
"loss": 1.2467, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 14.722207069396973, |
|
"learning_rate": 9.777777777777779e-06, |
|
"loss": 1.3279, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 14.244134902954102, |
|
"learning_rate": 9.722222222222223e-06, |
|
"loss": 1.277, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 12.954594612121582, |
|
"learning_rate": 9.666666666666667e-06, |
|
"loss": 1.3164, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 15.33368968963623, |
|
"learning_rate": 9.611111111111112e-06, |
|
"loss": 1.3252, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 13.695683479309082, |
|
"learning_rate": 9.555555555555556e-06, |
|
"loss": 1.3171, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 13.339698791503906, |
|
"learning_rate": 9.5e-06, |
|
"loss": 1.2452, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 15.309285163879395, |
|
"learning_rate": 9.444444444444445e-06, |
|
"loss": 1.2526, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 14.412714958190918, |
|
"learning_rate": 9.38888888888889e-06, |
|
"loss": 1.2712, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 15.290837287902832, |
|
"learning_rate": 9.333333333333334e-06, |
|
"loss": 1.2745, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 15.659004211425781, |
|
"learning_rate": 9.277777777777778e-06, |
|
"loss": 1.2776, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 15.217809677124023, |
|
"learning_rate": 9.222222222222224e-06, |
|
"loss": 1.1924, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 14.651033401489258, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 1.0912, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 14.324959754943848, |
|
"learning_rate": 9.111111111111112e-06, |
|
"loss": 1.2222, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 12.53036117553711, |
|
"learning_rate": 9.055555555555556e-06, |
|
"loss": 1.2771, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 14.033557891845703, |
|
"learning_rate": 9e-06, |
|
"loss": 1.1506, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 11.66818618774414, |
|
"learning_rate": 8.944444444444446e-06, |
|
"loss": 1.2423, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 13.90597915649414, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 1.1421, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_cer": 61.163904814262146, |
|
"eval_loss": 1.1691664457321167, |
|
"eval_runtime": 1744.2126, |
|
"eval_samples_per_second": 2.257, |
|
"eval_steps_per_second": 0.282, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 11.293231964111328, |
|
"learning_rate": 8.833333333333334e-06, |
|
"loss": 1.153, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 13.314165115356445, |
|
"learning_rate": 8.777777777777778e-06, |
|
"loss": 1.168, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 13.231385231018066, |
|
"learning_rate": 8.722222222222224e-06, |
|
"loss": 1.1613, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 13.21717643737793, |
|
"learning_rate": 8.666666666666668e-06, |
|
"loss": 1.1246, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 11.046935081481934, |
|
"learning_rate": 8.611111111111112e-06, |
|
"loss": 1.088, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 14.906622886657715, |
|
"learning_rate": 8.555555555555556e-06, |
|
"loss": 1.19, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 14.302517890930176, |
|
"learning_rate": 8.5e-06, |
|
"loss": 1.1351, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 13.947770118713379, |
|
"learning_rate": 8.444444444444446e-06, |
|
"loss": 1.057, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 14.45609188079834, |
|
"learning_rate": 8.38888888888889e-06, |
|
"loss": 1.0993, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 14.952827453613281, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.1626, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 16.128353118896484, |
|
"learning_rate": 8.277777777777778e-06, |
|
"loss": 1.1082, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 13.550396919250488, |
|
"learning_rate": 8.222222222222222e-06, |
|
"loss": 1.183, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 14.400228500366211, |
|
"learning_rate": 8.166666666666668e-06, |
|
"loss": 1.1988, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 13.9801607131958, |
|
"learning_rate": 8.111111111111112e-06, |
|
"loss": 1.1314, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 12.84874439239502, |
|
"learning_rate": 8.055555555555557e-06, |
|
"loss": 1.1411, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 14.126324653625488, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 1.1314, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 12.402750015258789, |
|
"learning_rate": 7.944444444444445e-06, |
|
"loss": 1.1071, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 13.835284233093262, |
|
"learning_rate": 7.88888888888889e-06, |
|
"loss": 1.1393, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 12.414569854736328, |
|
"learning_rate": 7.833333333333333e-06, |
|
"loss": 1.1026, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 15.43626880645752, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 1.164, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 13.067487716674805, |
|
"learning_rate": 7.722222222222223e-06, |
|
"loss": 1.0448, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 14.158551216125488, |
|
"learning_rate": 7.666666666666667e-06, |
|
"loss": 1.1674, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 13.062005996704102, |
|
"learning_rate": 7.611111111111111e-06, |
|
"loss": 1.0916, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 13.6104736328125, |
|
"learning_rate": 7.555555555555556e-06, |
|
"loss": 1.0424, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 11.52835750579834, |
|
"learning_rate": 7.500000000000001e-06, |
|
"loss": 1.0196, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 14.118935585021973, |
|
"learning_rate": 7.444444444444445e-06, |
|
"loss": 1.1502, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 13.2473726272583, |
|
"learning_rate": 7.38888888888889e-06, |
|
"loss": 1.0562, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 13.026944160461426, |
|
"learning_rate": 7.333333333333333e-06, |
|
"loss": 1.0391, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 11.923539161682129, |
|
"learning_rate": 7.277777777777778e-06, |
|
"loss": 1.063, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 10.581581115722656, |
|
"learning_rate": 7.222222222222223e-06, |
|
"loss": 1.0504, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 13.761798858642578, |
|
"learning_rate": 7.166666666666667e-06, |
|
"loss": 1.1781, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 13.440286636352539, |
|
"learning_rate": 7.111111111111112e-06, |
|
"loss": 1.088, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 11.378331184387207, |
|
"learning_rate": 7.055555555555557e-06, |
|
"loss": 1.017, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 16.24916648864746, |
|
"learning_rate": 7e-06, |
|
"loss": 1.0669, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 14.499041557312012, |
|
"learning_rate": 6.944444444444445e-06, |
|
"loss": 1.023, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 14.587787628173828, |
|
"learning_rate": 6.88888888888889e-06, |
|
"loss": 1.1128, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 14.249890327453613, |
|
"learning_rate": 6.833333333333334e-06, |
|
"loss": 1.0462, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 13.22544002532959, |
|
"learning_rate": 6.777777777777779e-06, |
|
"loss": 1.0564, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 13.404162406921387, |
|
"learning_rate": 6.7222222222222235e-06, |
|
"loss": 1.0517, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 13.87370491027832, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.0556, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_cer": 51.77491557370612, |
|
"eval_loss": 1.0214924812316895, |
|
"eval_runtime": 1738.5549, |
|
"eval_samples_per_second": 2.264, |
|
"eval_steps_per_second": 0.283, |
|
"step": 2000 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 5000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1000, |
|
"total_flos": 9.23473281024e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|