|
{ |
|
"best_metric": Infinity, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.5271838729383018, |
|
"eval_steps": 100, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06108735491753207, |
|
"grad_norm": 5.72606897354126, |
|
"learning_rate": 0.000285, |
|
"loss": 14.5485, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06108735491753207, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.2459, |
|
"eval_samples_per_second": 14.483, |
|
"eval_steps_per_second": 3.621, |
|
"eval_wer": 1.0038533395609528, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12217470983506414, |
|
"grad_norm": 4.610683441162109, |
|
"learning_rate": 0.00028812499999999997, |
|
"loss": 6.1502, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12217470983506414, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.1126, |
|
"eval_samples_per_second": 14.514, |
|
"eval_steps_per_second": 3.628, |
|
"eval_wer": 1.0674918262494162, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1832620647525962, |
|
"grad_norm": 6.0751118659973145, |
|
"learning_rate": 0.00027562499999999994, |
|
"loss": 5.1685, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.1832620647525962, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 62.7627, |
|
"eval_samples_per_second": 14.595, |
|
"eval_steps_per_second": 3.649, |
|
"eval_wer": 1.005254553946754, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.24434941967012827, |
|
"grad_norm": 6.21919059753418, |
|
"learning_rate": 0.00026312499999999996, |
|
"loss": 2.0876, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24434941967012827, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.0153, |
|
"eval_samples_per_second": 14.536, |
|
"eval_steps_per_second": 3.634, |
|
"eval_wer": 0.5857076132648296, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.30543677458766033, |
|
"grad_norm": 3.409900188446045, |
|
"learning_rate": 0.000250625, |
|
"loss": 1.7116, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.30543677458766033, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.3982, |
|
"eval_samples_per_second": 14.448, |
|
"eval_steps_per_second": 3.612, |
|
"eval_wer": 0.5758991125642223, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3665241295051924, |
|
"grad_norm": 5.974458694458008, |
|
"learning_rate": 0.00023812499999999997, |
|
"loss": 1.6505, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.3665241295051924, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 62.8299, |
|
"eval_samples_per_second": 14.579, |
|
"eval_steps_per_second": 3.645, |
|
"eval_wer": 0.5579168612797758, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4276114844227245, |
|
"grad_norm": 4.58453369140625, |
|
"learning_rate": 0.00022562499999999997, |
|
"loss": 1.6573, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.4276114844227245, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 62.8462, |
|
"eval_samples_per_second": 14.575, |
|
"eval_steps_per_second": 3.644, |
|
"eval_wer": 0.5470574497898179, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.48869883934025654, |
|
"grad_norm": 3.033734083175659, |
|
"learning_rate": 0.000213125, |
|
"loss": 1.4679, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.48869883934025654, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.0713, |
|
"eval_samples_per_second": 14.523, |
|
"eval_steps_per_second": 3.631, |
|
"eval_wer": 0.5527790751985053, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5497861942577886, |
|
"grad_norm": 4.104335308074951, |
|
"learning_rate": 0.00020062499999999996, |
|
"loss": 1.4955, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.5497861942577886, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.3962, |
|
"eval_samples_per_second": 14.449, |
|
"eval_steps_per_second": 3.612, |
|
"eval_wer": 0.5368986454927603, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6108735491753207, |
|
"grad_norm": 22.828868865966797, |
|
"learning_rate": 0.00018812499999999998, |
|
"loss": 1.664, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6108735491753207, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.1315, |
|
"eval_samples_per_second": 14.509, |
|
"eval_steps_per_second": 3.627, |
|
"eval_wer": 0.5328117702008407, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6719609040928528, |
|
"grad_norm": 3.1733903884887695, |
|
"learning_rate": 0.000175625, |
|
"loss": 1.61, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.6719609040928528, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.2714, |
|
"eval_samples_per_second": 14.477, |
|
"eval_steps_per_second": 3.619, |
|
"eval_wer": 0.5335123773937412, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.7330482590103848, |
|
"grad_norm": 4.8088698387146, |
|
"learning_rate": 0.00016312499999999997, |
|
"loss": 1.6414, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7330482590103848, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.0546, |
|
"eval_samples_per_second": 14.527, |
|
"eval_steps_per_second": 3.632, |
|
"eval_wer": 0.5293087342363382, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7941356139279169, |
|
"grad_norm": 3.214020252227783, |
|
"learning_rate": 0.000150625, |
|
"loss": 1.6321, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.7941356139279169, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.7075, |
|
"eval_samples_per_second": 14.378, |
|
"eval_steps_per_second": 3.595, |
|
"eval_wer": 0.5270901447921532, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.855222968845449, |
|
"grad_norm": 4.21952486038208, |
|
"learning_rate": 0.00013812499999999998, |
|
"loss": 1.4686, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.855222968845449, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.2487, |
|
"eval_samples_per_second": 14.483, |
|
"eval_steps_per_second": 3.621, |
|
"eval_wer": 0.5296590378327884, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.916310323762981, |
|
"grad_norm": 15.665727615356445, |
|
"learning_rate": 0.000125625, |
|
"loss": 1.5073, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.916310323762981, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.2373, |
|
"eval_samples_per_second": 14.485, |
|
"eval_steps_per_second": 3.621, |
|
"eval_wer": 0.5325782344698738, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9773976786805131, |
|
"grad_norm": 5.875314712524414, |
|
"learning_rate": 0.00011312499999999999, |
|
"loss": 1.6164, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.9773976786805131, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.2705, |
|
"eval_samples_per_second": 14.478, |
|
"eval_steps_per_second": 3.619, |
|
"eval_wer": 0.5234703409621672, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.0384850335980451, |
|
"grad_norm": 3.3934853076934814, |
|
"learning_rate": 0.00010062499999999998, |
|
"loss": 1.577, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0384850335980451, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.8167, |
|
"eval_samples_per_second": 14.354, |
|
"eval_steps_per_second": 3.588, |
|
"eval_wer": 0.5238206445586174, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.0995723885155773, |
|
"grad_norm": 1.4915640354156494, |
|
"learning_rate": 8.8125e-05, |
|
"loss": 1.383, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.0995723885155773, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.3959, |
|
"eval_samples_per_second": 14.449, |
|
"eval_steps_per_second": 3.612, |
|
"eval_wer": 0.5217188229799159, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.1606597434331094, |
|
"grad_norm": 2.607821464538574, |
|
"learning_rate": 7.5625e-05, |
|
"loss": 1.4391, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.1606597434331094, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.2533, |
|
"eval_samples_per_second": 14.481, |
|
"eval_steps_per_second": 3.62, |
|
"eval_wer": 0.5291919663708547, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.2217470983506413, |
|
"grad_norm": 2.207900285720825, |
|
"learning_rate": 6.312499999999999e-05, |
|
"loss": 1.5327, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2217470983506413, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.6941, |
|
"eval_samples_per_second": 14.381, |
|
"eval_steps_per_second": 3.595, |
|
"eval_wer": 0.5254553946753854, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2828344532681735, |
|
"grad_norm": 1.4653774499893188, |
|
"learning_rate": 5.0625e-05, |
|
"loss": 1.3653, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.2828344532681735, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.8557, |
|
"eval_samples_per_second": 14.345, |
|
"eval_steps_per_second": 3.586, |
|
"eval_wer": 0.5195002335357309, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.3439218081857056, |
|
"grad_norm": 2.36387038230896, |
|
"learning_rate": 3.812499999999999e-05, |
|
"loss": 1.4901, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.3439218081857056, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.4906, |
|
"eval_samples_per_second": 14.427, |
|
"eval_steps_per_second": 3.607, |
|
"eval_wer": 0.5186828584773471, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.4050091631032378, |
|
"grad_norm": 12.051000595092773, |
|
"learning_rate": 2.5625e-05, |
|
"loss": 1.4263, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.4050091631032378, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.7851, |
|
"eval_samples_per_second": 14.361, |
|
"eval_steps_per_second": 3.59, |
|
"eval_wer": 0.5169313404950957, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.4660965180207697, |
|
"grad_norm": 7.876661777496338, |
|
"learning_rate": 1.3124999999999999e-05, |
|
"loss": 1.4603, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4660965180207697, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.6972, |
|
"eval_samples_per_second": 14.381, |
|
"eval_steps_per_second": 3.595, |
|
"eval_wer": 0.5178654834189631, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.5271838729383018, |
|
"grad_norm": 3.0910115242004395, |
|
"learning_rate": 6.249999999999999e-07, |
|
"loss": 1.4802, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.5271838729383018, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.3534, |
|
"eval_samples_per_second": 14.234, |
|
"eval_steps_per_second": 3.558, |
|
"eval_wer": 0.5155301261092947, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.5271838729383018, |
|
"step": 2500, |
|
"total_flos": 1.2858032865257505e+19, |
|
"train_loss": 2.409264056396484, |
|
"train_runtime": 4973.4846, |
|
"train_samples_per_second": 4.021, |
|
"train_steps_per_second": 0.503 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2858032865257505e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|