|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"eval_steps": 500, |
|
"global_step": 7500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 1.0876199007034302, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1167, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_cer": 0.03410275077441366, |
|
"eval_loss": 0.07084234058856964, |
|
"eval_runtime": 30.3562, |
|
"eval_samples_per_second": 49.347, |
|
"eval_steps_per_second": 6.193, |
|
"eval_wer": 0.10315320595475325, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.202278137207031, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1195, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_cer": 0.036258261601929964, |
|
"eval_loss": 0.07872834801673889, |
|
"eval_runtime": 30.3905, |
|
"eval_samples_per_second": 49.292, |
|
"eval_steps_per_second": 6.186, |
|
"eval_wer": 0.10690423162583519, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.2584967613220215, |
|
"learning_rate": 9.230769230769232e-05, |
|
"loss": 0.1259, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_cer": 0.036558034630922304, |
|
"eval_loss": 0.08242663741111755, |
|
"eval_runtime": 30.1208, |
|
"eval_samples_per_second": 49.733, |
|
"eval_steps_per_second": 6.242, |
|
"eval_wer": 0.10708006095416715, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.9028881788253784, |
|
"learning_rate": 8.461538461538461e-05, |
|
"loss": 0.1151, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_cer": 0.03672933350463221, |
|
"eval_loss": 0.08765117079019547, |
|
"eval_runtime": 30.2298, |
|
"eval_samples_per_second": 49.554, |
|
"eval_steps_per_second": 6.219, |
|
"eval_wer": 0.10795920759582699, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.2381340265274048, |
|
"learning_rate": 7.693846153846154e-05, |
|
"loss": 0.1082, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_cer": 0.03688635747219962, |
|
"eval_loss": 0.08228232711553574, |
|
"eval_runtime": 30.1272, |
|
"eval_samples_per_second": 49.723, |
|
"eval_steps_per_second": 6.24, |
|
"eval_wer": 0.10819364670026961, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 1.054206371307373, |
|
"learning_rate": 6.924615384615386e-05, |
|
"loss": 0.0975, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_cer": 0.03688635747219962, |
|
"eval_loss": 0.09040121734142303, |
|
"eval_runtime": 30.3039, |
|
"eval_samples_per_second": 49.433, |
|
"eval_steps_per_second": 6.204, |
|
"eval_wer": 0.1080764271480483, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 0.7761673331260681, |
|
"learning_rate": 6.156923076923077e-05, |
|
"loss": 0.1123, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_cer": 0.036800708035344666, |
|
"eval_loss": 0.08693404495716095, |
|
"eval_runtime": 30.3844, |
|
"eval_samples_per_second": 49.302, |
|
"eval_steps_per_second": 6.187, |
|
"eval_wer": 0.10778337826749501, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 1.127631425857544, |
|
"learning_rate": 5.387692307692308e-05, |
|
"loss": 0.0912, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_cer": 0.03721468031347694, |
|
"eval_loss": 0.08634809404611588, |
|
"eval_runtime": 30.4442, |
|
"eval_samples_per_second": 49.205, |
|
"eval_steps_per_second": 6.175, |
|
"eval_wer": 0.10825225647638026, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"grad_norm": 0.5615821480751038, |
|
"learning_rate": 4.618461538461539e-05, |
|
"loss": 0.0829, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_cer": 0.03658658444320729, |
|
"eval_loss": 0.08522398769855499, |
|
"eval_runtime": 30.4454, |
|
"eval_samples_per_second": 49.203, |
|
"eval_steps_per_second": 6.175, |
|
"eval_wer": 0.1080764271480483, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 0.9052166938781738, |
|
"learning_rate": 3.849230769230769e-05, |
|
"loss": 0.0824, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_cer": 0.03664368406777725, |
|
"eval_loss": 0.08616316318511963, |
|
"eval_runtime": 30.3545, |
|
"eval_samples_per_second": 49.35, |
|
"eval_steps_per_second": 6.193, |
|
"eval_wer": 0.10760754893916305, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"grad_norm": 0.9279913902282715, |
|
"learning_rate": 3.08e-05, |
|
"loss": 0.0841, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_cer": 0.03681498294148716, |
|
"eval_loss": 0.08898201584815979, |
|
"eval_runtime": 30.4415, |
|
"eval_samples_per_second": 49.209, |
|
"eval_steps_per_second": 6.176, |
|
"eval_wer": 0.10778337826749501, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"grad_norm": 0.42622652649879456, |
|
"learning_rate": 2.3107692307692308e-05, |
|
"loss": 0.0768, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_cer": 0.036358185944927415, |
|
"eval_loss": 0.08734322339296341, |
|
"eval_runtime": 30.364, |
|
"eval_samples_per_second": 49.335, |
|
"eval_steps_per_second": 6.192, |
|
"eval_wer": 0.10719728050638846, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"grad_norm": 0.6296827793121338, |
|
"learning_rate": 1.5415384615384616e-05, |
|
"loss": 0.0692, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_cer": 0.036243986695787475, |
|
"eval_loss": 0.08950727432966232, |
|
"eval_runtime": 30.3163, |
|
"eval_samples_per_second": 49.412, |
|
"eval_steps_per_second": 6.201, |
|
"eval_wer": 0.10643535341694994, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"grad_norm": 0.5045118927955627, |
|
"learning_rate": 7.723076923076922e-06, |
|
"loss": 0.0714, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_cer": 0.036358185944927415, |
|
"eval_loss": 0.08889112621545792, |
|
"eval_runtime": 30.18, |
|
"eval_samples_per_second": 49.636, |
|
"eval_steps_per_second": 6.229, |
|
"eval_wer": 0.1064939631930606, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"grad_norm": 1.5385302305221558, |
|
"learning_rate": 3.076923076923077e-08, |
|
"loss": 0.0649, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_cer": 0.036301086320357445, |
|
"eval_loss": 0.08928388357162476, |
|
"eval_runtime": 30.1433, |
|
"eval_samples_per_second": 49.696, |
|
"eval_steps_per_second": 6.237, |
|
"eval_wer": 0.10655257296917126, |
|
"step": 7500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 7500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.3987580856546666e+19, |
|
"train_batch_size": 48, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|