|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 99.98804780876495, |
|
"global_step": 3100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.7125e-06, |
|
"loss": 17.9778, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 6.45, |
|
"learning_rate": 7.4625e-06, |
|
"loss": 10.1788, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"learning_rate": 1.1212499999999998e-05, |
|
"loss": 7.1097, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 12.89, |
|
"learning_rate": 1.49625e-05, |
|
"loss": 5.9162, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"learning_rate": 1.8712499999999997e-05, |
|
"loss": 4.917, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 16.13, |
|
"eval_loss": 4.89633846282959, |
|
"eval_runtime": 284.283, |
|
"eval_samples_per_second": 9.473, |
|
"eval_steps_per_second": 0.594, |
|
"eval_wer": 1.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 2.2462499999999997e-05, |
|
"loss": 4.1291, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 22.57, |
|
"learning_rate": 2.6212499999999997e-05, |
|
"loss": 3.6844, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 25.8, |
|
"learning_rate": 2.99625e-05, |
|
"loss": 3.5213, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"learning_rate": 3.37125e-05, |
|
"loss": 3.4601, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 32.25, |
|
"learning_rate": 3.7462499999999996e-05, |
|
"loss": 3.3585, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 32.25, |
|
"eval_loss": 3.3068907260894775, |
|
"eval_runtime": 283.3986, |
|
"eval_samples_per_second": 9.503, |
|
"eval_steps_per_second": 0.596, |
|
"eval_wer": 1.0000410576449335, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 35.48, |
|
"learning_rate": 4.12125e-05, |
|
"loss": 3.2956, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 38.7, |
|
"learning_rate": 4.4962499999999995e-05, |
|
"loss": 2.8135, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 41.92, |
|
"learning_rate": 4.871249999999999e-05, |
|
"loss": 2.0147, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 45.16, |
|
"learning_rate": 5.2424999999999994e-05, |
|
"loss": 1.7332, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 48.38, |
|
"learning_rate": 5.6175e-05, |
|
"loss": 1.5873, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 48.38, |
|
"eval_loss": 0.8273974061012268, |
|
"eval_runtime": 283.1526, |
|
"eval_samples_per_second": 9.511, |
|
"eval_steps_per_second": 0.597, |
|
"eval_wer": 1.006076531450156, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 51.61, |
|
"learning_rate": 5.9925e-05, |
|
"loss": 1.476, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 54.83, |
|
"learning_rate": 6.367499999999999e-05, |
|
"loss": 1.4204, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 58.06, |
|
"learning_rate": 6.7425e-05, |
|
"loss": 1.357, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 61.29, |
|
"learning_rate": 7.11375e-05, |
|
"loss": 1.3037, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 64.51, |
|
"learning_rate": 7.48875e-05, |
|
"loss": 1.2654, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 64.51, |
|
"eval_loss": 0.6249768137931824, |
|
"eval_runtime": 282.7219, |
|
"eval_samples_per_second": 9.525, |
|
"eval_steps_per_second": 0.598, |
|
"eval_wer": 1.0075546066677616, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 67.73, |
|
"learning_rate": 6.838636363636363e-05, |
|
"loss": 1.2231, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 70.96, |
|
"learning_rate": 6.156818181818181e-05, |
|
"loss": 1.1802, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 74.19, |
|
"learning_rate": 5.481818181818182e-05, |
|
"loss": 1.1484, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 77.41, |
|
"learning_rate": 4.7999999999999994e-05, |
|
"loss": 1.1185, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 80.64, |
|
"learning_rate": 4.118181818181818e-05, |
|
"loss": 1.0917, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 80.64, |
|
"eval_loss": 0.5459514856338501, |
|
"eval_runtime": 282.9764, |
|
"eval_samples_per_second": 9.517, |
|
"eval_steps_per_second": 0.597, |
|
"eval_wer": 1.0056248973558877, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 83.86, |
|
"learning_rate": 3.436363636363636e-05, |
|
"loss": 1.0663, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 87.1, |
|
"learning_rate": 2.754545454545454e-05, |
|
"loss": 1.0487, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 90.32, |
|
"learning_rate": 2.0727272727272725e-05, |
|
"loss": 1.0214, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 93.54, |
|
"learning_rate": 1.3909090909090907e-05, |
|
"loss": 1.016, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 96.76, |
|
"learning_rate": 7.09090909090909e-06, |
|
"loss": 1.0001, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 96.76, |
|
"eval_loss": 0.5304259061813354, |
|
"eval_runtime": 282.7392, |
|
"eval_samples_per_second": 9.525, |
|
"eval_steps_per_second": 0.598, |
|
"eval_wer": 1.0082525866316308, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"learning_rate": 2.727272727272727e-07, |
|
"loss": 0.9947, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 99.99, |
|
"step": 3100, |
|
"total_flos": 5.0567037565307716e+19, |
|
"train_loss": 3.0460862658100742, |
|
"train_runtime": 81000.8052, |
|
"train_samples_per_second": 4.939, |
|
"train_steps_per_second": 0.038 |
|
} |
|
], |
|
"max_steps": 3100, |
|
"num_train_epochs": 100, |
|
"total_flos": 5.0567037565307716e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|