csikasote's picture
End of training
b5ab886 verified
{
"best_metric": Infinity,
"best_model_checkpoint": null,
"epoch": 1.5271838729383018,
"eval_steps": 100,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06108735491753207,
"grad_norm": 5.72606897354126,
"learning_rate": 0.000285,
"loss": 14.5485,
"step": 100
},
{
"epoch": 0.06108735491753207,
"eval_loss": Infinity,
"eval_runtime": 63.2459,
"eval_samples_per_second": 14.483,
"eval_steps_per_second": 3.621,
"eval_wer": 1.0038533395609528,
"step": 100
},
{
"epoch": 0.12217470983506414,
"grad_norm": 4.610683441162109,
"learning_rate": 0.00028812499999999997,
"loss": 6.1502,
"step": 200
},
{
"epoch": 0.12217470983506414,
"eval_loss": Infinity,
"eval_runtime": 63.1126,
"eval_samples_per_second": 14.514,
"eval_steps_per_second": 3.628,
"eval_wer": 1.0674918262494162,
"step": 200
},
{
"epoch": 0.1832620647525962,
"grad_norm": 6.0751118659973145,
"learning_rate": 0.00027562499999999994,
"loss": 5.1685,
"step": 300
},
{
"epoch": 0.1832620647525962,
"eval_loss": Infinity,
"eval_runtime": 62.7627,
"eval_samples_per_second": 14.595,
"eval_steps_per_second": 3.649,
"eval_wer": 1.005254553946754,
"step": 300
},
{
"epoch": 0.24434941967012827,
"grad_norm": 6.21919059753418,
"learning_rate": 0.00026312499999999996,
"loss": 2.0876,
"step": 400
},
{
"epoch": 0.24434941967012827,
"eval_loss": Infinity,
"eval_runtime": 63.0153,
"eval_samples_per_second": 14.536,
"eval_steps_per_second": 3.634,
"eval_wer": 0.5857076132648296,
"step": 400
},
{
"epoch": 0.30543677458766033,
"grad_norm": 3.409900188446045,
"learning_rate": 0.000250625,
"loss": 1.7116,
"step": 500
},
{
"epoch": 0.30543677458766033,
"eval_loss": Infinity,
"eval_runtime": 63.3982,
"eval_samples_per_second": 14.448,
"eval_steps_per_second": 3.612,
"eval_wer": 0.5758991125642223,
"step": 500
},
{
"epoch": 0.3665241295051924,
"grad_norm": 5.974458694458008,
"learning_rate": 0.00023812499999999997,
"loss": 1.6505,
"step": 600
},
{
"epoch": 0.3665241295051924,
"eval_loss": Infinity,
"eval_runtime": 62.8299,
"eval_samples_per_second": 14.579,
"eval_steps_per_second": 3.645,
"eval_wer": 0.5579168612797758,
"step": 600
},
{
"epoch": 0.4276114844227245,
"grad_norm": 4.58453369140625,
"learning_rate": 0.00022562499999999997,
"loss": 1.6573,
"step": 700
},
{
"epoch": 0.4276114844227245,
"eval_loss": Infinity,
"eval_runtime": 62.8462,
"eval_samples_per_second": 14.575,
"eval_steps_per_second": 3.644,
"eval_wer": 0.5470574497898179,
"step": 700
},
{
"epoch": 0.48869883934025654,
"grad_norm": 3.033734083175659,
"learning_rate": 0.000213125,
"loss": 1.4679,
"step": 800
},
{
"epoch": 0.48869883934025654,
"eval_loss": Infinity,
"eval_runtime": 63.0713,
"eval_samples_per_second": 14.523,
"eval_steps_per_second": 3.631,
"eval_wer": 0.5527790751985053,
"step": 800
},
{
"epoch": 0.5497861942577886,
"grad_norm": 4.104335308074951,
"learning_rate": 0.00020062499999999996,
"loss": 1.4955,
"step": 900
},
{
"epoch": 0.5497861942577886,
"eval_loss": Infinity,
"eval_runtime": 63.3962,
"eval_samples_per_second": 14.449,
"eval_steps_per_second": 3.612,
"eval_wer": 0.5368986454927603,
"step": 900
},
{
"epoch": 0.6108735491753207,
"grad_norm": 22.828868865966797,
"learning_rate": 0.00018812499999999998,
"loss": 1.664,
"step": 1000
},
{
"epoch": 0.6108735491753207,
"eval_loss": Infinity,
"eval_runtime": 63.1315,
"eval_samples_per_second": 14.509,
"eval_steps_per_second": 3.627,
"eval_wer": 0.5328117702008407,
"step": 1000
},
{
"epoch": 0.6719609040928528,
"grad_norm": 3.1733903884887695,
"learning_rate": 0.000175625,
"loss": 1.61,
"step": 1100
},
{
"epoch": 0.6719609040928528,
"eval_loss": Infinity,
"eval_runtime": 63.2714,
"eval_samples_per_second": 14.477,
"eval_steps_per_second": 3.619,
"eval_wer": 0.5335123773937412,
"step": 1100
},
{
"epoch": 0.7330482590103848,
"grad_norm": 4.8088698387146,
"learning_rate": 0.00016312499999999997,
"loss": 1.6414,
"step": 1200
},
{
"epoch": 0.7330482590103848,
"eval_loss": Infinity,
"eval_runtime": 63.0546,
"eval_samples_per_second": 14.527,
"eval_steps_per_second": 3.632,
"eval_wer": 0.5293087342363382,
"step": 1200
},
{
"epoch": 0.7941356139279169,
"grad_norm": 3.214020252227783,
"learning_rate": 0.000150625,
"loss": 1.6321,
"step": 1300
},
{
"epoch": 0.7941356139279169,
"eval_loss": Infinity,
"eval_runtime": 63.7075,
"eval_samples_per_second": 14.378,
"eval_steps_per_second": 3.595,
"eval_wer": 0.5270901447921532,
"step": 1300
},
{
"epoch": 0.855222968845449,
"grad_norm": 4.21952486038208,
"learning_rate": 0.00013812499999999998,
"loss": 1.4686,
"step": 1400
},
{
"epoch": 0.855222968845449,
"eval_loss": Infinity,
"eval_runtime": 63.2487,
"eval_samples_per_second": 14.483,
"eval_steps_per_second": 3.621,
"eval_wer": 0.5296590378327884,
"step": 1400
},
{
"epoch": 0.916310323762981,
"grad_norm": 15.665727615356445,
"learning_rate": 0.000125625,
"loss": 1.5073,
"step": 1500
},
{
"epoch": 0.916310323762981,
"eval_loss": Infinity,
"eval_runtime": 63.2373,
"eval_samples_per_second": 14.485,
"eval_steps_per_second": 3.621,
"eval_wer": 0.5325782344698738,
"step": 1500
},
{
"epoch": 0.9773976786805131,
"grad_norm": 5.875314712524414,
"learning_rate": 0.00011312499999999999,
"loss": 1.6164,
"step": 1600
},
{
"epoch": 0.9773976786805131,
"eval_loss": Infinity,
"eval_runtime": 63.2705,
"eval_samples_per_second": 14.478,
"eval_steps_per_second": 3.619,
"eval_wer": 0.5234703409621672,
"step": 1600
},
{
"epoch": 1.0384850335980451,
"grad_norm": 3.3934853076934814,
"learning_rate": 0.00010062499999999998,
"loss": 1.577,
"step": 1700
},
{
"epoch": 1.0384850335980451,
"eval_loss": Infinity,
"eval_runtime": 63.8167,
"eval_samples_per_second": 14.354,
"eval_steps_per_second": 3.588,
"eval_wer": 0.5238206445586174,
"step": 1700
},
{
"epoch": 1.0995723885155773,
"grad_norm": 1.4915640354156494,
"learning_rate": 8.8125e-05,
"loss": 1.383,
"step": 1800
},
{
"epoch": 1.0995723885155773,
"eval_loss": Infinity,
"eval_runtime": 63.3959,
"eval_samples_per_second": 14.449,
"eval_steps_per_second": 3.612,
"eval_wer": 0.5217188229799159,
"step": 1800
},
{
"epoch": 1.1606597434331094,
"grad_norm": 2.607821464538574,
"learning_rate": 7.5625e-05,
"loss": 1.4391,
"step": 1900
},
{
"epoch": 1.1606597434331094,
"eval_loss": Infinity,
"eval_runtime": 63.2533,
"eval_samples_per_second": 14.481,
"eval_steps_per_second": 3.62,
"eval_wer": 0.5291919663708547,
"step": 1900
},
{
"epoch": 1.2217470983506413,
"grad_norm": 2.207900285720825,
"learning_rate": 6.312499999999999e-05,
"loss": 1.5327,
"step": 2000
},
{
"epoch": 1.2217470983506413,
"eval_loss": Infinity,
"eval_runtime": 63.6941,
"eval_samples_per_second": 14.381,
"eval_steps_per_second": 3.595,
"eval_wer": 0.5254553946753854,
"step": 2000
},
{
"epoch": 1.2828344532681735,
"grad_norm": 1.4653774499893188,
"learning_rate": 5.0625e-05,
"loss": 1.3653,
"step": 2100
},
{
"epoch": 1.2828344532681735,
"eval_loss": Infinity,
"eval_runtime": 63.8557,
"eval_samples_per_second": 14.345,
"eval_steps_per_second": 3.586,
"eval_wer": 0.5195002335357309,
"step": 2100
},
{
"epoch": 1.3439218081857056,
"grad_norm": 2.36387038230896,
"learning_rate": 3.812499999999999e-05,
"loss": 1.4901,
"step": 2200
},
{
"epoch": 1.3439218081857056,
"eval_loss": Infinity,
"eval_runtime": 63.4906,
"eval_samples_per_second": 14.427,
"eval_steps_per_second": 3.607,
"eval_wer": 0.5186828584773471,
"step": 2200
},
{
"epoch": 1.4050091631032378,
"grad_norm": 12.051000595092773,
"learning_rate": 2.5625e-05,
"loss": 1.4263,
"step": 2300
},
{
"epoch": 1.4050091631032378,
"eval_loss": Infinity,
"eval_runtime": 63.7851,
"eval_samples_per_second": 14.361,
"eval_steps_per_second": 3.59,
"eval_wer": 0.5169313404950957,
"step": 2300
},
{
"epoch": 1.4660965180207697,
"grad_norm": 7.876661777496338,
"learning_rate": 1.3124999999999999e-05,
"loss": 1.4603,
"step": 2400
},
{
"epoch": 1.4660965180207697,
"eval_loss": Infinity,
"eval_runtime": 63.6972,
"eval_samples_per_second": 14.381,
"eval_steps_per_second": 3.595,
"eval_wer": 0.5178654834189631,
"step": 2400
},
{
"epoch": 1.5271838729383018,
"grad_norm": 3.0910115242004395,
"learning_rate": 6.249999999999999e-07,
"loss": 1.4802,
"step": 2500
},
{
"epoch": 1.5271838729383018,
"eval_loss": Infinity,
"eval_runtime": 64.3534,
"eval_samples_per_second": 14.234,
"eval_steps_per_second": 3.558,
"eval_wer": 0.5155301261092947,
"step": 2500
},
{
"epoch": 1.5271838729383018,
"step": 2500,
"total_flos": 1.2858032865257505e+19,
"train_loss": 2.409264056396484,
"train_runtime": 4973.4846,
"train_samples_per_second": 4.021,
"train_steps_per_second": 0.503
}
],
"logging_steps": 100,
"max_steps": 2500,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2858032865257505e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}