csikasote's picture
End of training
73c79c2 verified
{
"best_metric": Infinity,
"best_model_checkpoint": null,
"epoch": 1.9054878048780488,
"eval_steps": 100,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.07621951219512195,
"grad_norm": 8.693363189697266,
"learning_rate": 0.000285,
"loss": 14.8448,
"step": 100
},
{
"epoch": 0.07621951219512195,
"eval_loss": Infinity,
"eval_runtime": 64.4055,
"eval_samples_per_second": 14.222,
"eval_steps_per_second": 3.556,
"eval_wer": 1.0079402148528724,
"step": 100
},
{
"epoch": 0.1524390243902439,
"grad_norm": 5.000373363494873,
"learning_rate": 0.00028812499999999997,
"loss": 6.2506,
"step": 200
},
{
"epoch": 0.1524390243902439,
"eval_loss": Infinity,
"eval_runtime": 64.2383,
"eval_samples_per_second": 14.259,
"eval_steps_per_second": 3.565,
"eval_wer": 1.004203643157403,
"step": 200
},
{
"epoch": 0.22865853658536586,
"grad_norm": 8.148813247680664,
"learning_rate": 0.00027562499999999994,
"loss": 5.5314,
"step": 300
},
{
"epoch": 0.22865853658536586,
"eval_loss": Infinity,
"eval_runtime": 64.4036,
"eval_samples_per_second": 14.223,
"eval_steps_per_second": 3.556,
"eval_wer": 1.0269733769266698,
"step": 300
},
{
"epoch": 0.3048780487804878,
"grad_norm": 5.414369106292725,
"learning_rate": 0.00026312499999999996,
"loss": 3.4418,
"step": 400
},
{
"epoch": 0.3048780487804878,
"eval_loss": Infinity,
"eval_runtime": 64.0954,
"eval_samples_per_second": 14.291,
"eval_steps_per_second": 3.573,
"eval_wer": 0.5906118636151331,
"step": 400
},
{
"epoch": 0.38109756097560976,
"grad_norm": 3.8319242000579834,
"learning_rate": 0.000250625,
"loss": 1.9396,
"step": 500
},
{
"epoch": 0.38109756097560976,
"eval_loss": Infinity,
"eval_runtime": 64.3684,
"eval_samples_per_second": 14.231,
"eval_steps_per_second": 3.558,
"eval_wer": 0.5762494161606726,
"step": 500
},
{
"epoch": 0.4573170731707317,
"grad_norm": 3.6439785957336426,
"learning_rate": 0.00023812499999999997,
"loss": 1.698,
"step": 600
},
{
"epoch": 0.4573170731707317,
"eval_loss": Infinity,
"eval_runtime": 63.934,
"eval_samples_per_second": 14.327,
"eval_steps_per_second": 3.582,
"eval_wer": 0.5566324147594582,
"step": 600
},
{
"epoch": 0.5335365853658537,
"grad_norm": 3.3120391368865967,
"learning_rate": 0.00022562499999999997,
"loss": 1.5483,
"step": 700
},
{
"epoch": 0.5335365853658537,
"eval_loss": Infinity,
"eval_runtime": 63.9483,
"eval_samples_per_second": 14.324,
"eval_steps_per_second": 3.581,
"eval_wer": 0.5570994862213918,
"step": 700
},
{
"epoch": 0.6097560975609756,
"grad_norm": 3.9880552291870117,
"learning_rate": 0.000213125,
"loss": 1.6501,
"step": 800
},
{
"epoch": 0.6097560975609756,
"eval_loss": Infinity,
"eval_runtime": 63.9976,
"eval_samples_per_second": 14.313,
"eval_steps_per_second": 3.578,
"eval_wer": 0.5486921999065857,
"step": 800
},
{
"epoch": 0.6859756097560976,
"grad_norm": 10.49134349822998,
"learning_rate": 0.00020062499999999996,
"loss": 1.5528,
"step": 900
},
{
"epoch": 0.6859756097560976,
"eval_loss": Infinity,
"eval_runtime": 64.4873,
"eval_samples_per_second": 14.204,
"eval_steps_per_second": 3.551,
"eval_wer": 0.5470574497898179,
"step": 900
},
{
"epoch": 0.7621951219512195,
"grad_norm": 8.717538833618164,
"learning_rate": 0.00018812499999999998,
"loss": 1.5398,
"step": 1000
},
{
"epoch": 0.7621951219512195,
"eval_loss": Infinity,
"eval_runtime": 63.7937,
"eval_samples_per_second": 14.359,
"eval_steps_per_second": 3.59,
"eval_wer": 0.5478748248482018,
"step": 1000
},
{
"epoch": 0.8384146341463414,
"grad_norm": 3.1688060760498047,
"learning_rate": 0.000175625,
"loss": 1.6413,
"step": 1100
},
{
"epoch": 0.8384146341463414,
"eval_loss": Infinity,
"eval_runtime": 63.725,
"eval_samples_per_second": 14.374,
"eval_steps_per_second": 3.594,
"eval_wer": 0.5303596450256889,
"step": 1100
},
{
"epoch": 0.9146341463414634,
"grad_norm": 4.943587779998779,
"learning_rate": 0.00016312499999999997,
"loss": 1.418,
"step": 1200
},
{
"epoch": 0.9146341463414634,
"eval_loss": Infinity,
"eval_runtime": 64.0453,
"eval_samples_per_second": 14.302,
"eval_steps_per_second": 3.576,
"eval_wer": 0.5282578234469874,
"step": 1200
},
{
"epoch": 0.9908536585365854,
"grad_norm": 4.093054294586182,
"learning_rate": 0.000150625,
"loss": 1.5625,
"step": 1300
},
{
"epoch": 0.9908536585365854,
"eval_loss": Infinity,
"eval_runtime": 64.5424,
"eval_samples_per_second": 14.192,
"eval_steps_per_second": 3.548,
"eval_wer": 0.5265063054647361,
"step": 1300
},
{
"epoch": 1.0670731707317074,
"grad_norm": 2.884342670440674,
"learning_rate": 0.00013812499999999998,
"loss": 1.4753,
"step": 1400
},
{
"epoch": 1.0670731707317074,
"eval_loss": Infinity,
"eval_runtime": 63.9509,
"eval_samples_per_second": 14.323,
"eval_steps_per_second": 3.581,
"eval_wer": 0.5346800560485754,
"step": 1400
},
{
"epoch": 1.1432926829268293,
"grad_norm": 2.9519801139831543,
"learning_rate": 0.000125625,
"loss": 1.616,
"step": 1500
},
{
"epoch": 1.1432926829268293,
"eval_loss": Infinity,
"eval_runtime": 64.3023,
"eval_samples_per_second": 14.245,
"eval_steps_per_second": 3.561,
"eval_wer": 0.530943484353106,
"step": 1500
},
{
"epoch": 1.2195121951219512,
"grad_norm": 4.795141696929932,
"learning_rate": 0.00011312499999999999,
"loss": 1.3802,
"step": 1600
},
{
"epoch": 1.2195121951219512,
"eval_loss": Infinity,
"eval_runtime": 64.1513,
"eval_samples_per_second": 14.279,
"eval_steps_per_second": 3.57,
"eval_wer": 0.5246380196170014,
"step": 1600
},
{
"epoch": 1.295731707317073,
"grad_norm": 3.19049334526062,
"learning_rate": 0.00010074999999999998,
"loss": 1.4105,
"step": 1700
},
{
"epoch": 1.295731707317073,
"eval_loss": Infinity,
"eval_runtime": 64.7009,
"eval_samples_per_second": 14.157,
"eval_steps_per_second": 3.539,
"eval_wer": 0.5197337692666978,
"step": 1700
},
{
"epoch": 1.3719512195121952,
"grad_norm": 3.021605968475342,
"learning_rate": 8.825e-05,
"loss": 1.3793,
"step": 1800
},
{
"epoch": 1.3719512195121952,
"eval_loss": Infinity,
"eval_runtime": 64.1507,
"eval_samples_per_second": 14.279,
"eval_steps_per_second": 3.57,
"eval_wer": 0.5288416627744045,
"step": 1800
},
{
"epoch": 1.4481707317073171,
"grad_norm": 5.46076774597168,
"learning_rate": 7.575e-05,
"loss": 1.3991,
"step": 1900
},
{
"epoch": 1.4481707317073171,
"eval_loss": Infinity,
"eval_runtime": 64.1838,
"eval_samples_per_second": 14.272,
"eval_steps_per_second": 3.568,
"eval_wer": 0.5140121438580103,
"step": 1900
},
{
"epoch": 1.524390243902439,
"grad_norm": 2.8810770511627197,
"learning_rate": 6.324999999999999e-05,
"loss": 1.5838,
"step": 2000
},
{
"epoch": 1.524390243902439,
"eval_loss": Infinity,
"eval_runtime": 64.1271,
"eval_samples_per_second": 14.284,
"eval_steps_per_second": 3.571,
"eval_wer": 0.5239374124241009,
"step": 2000
},
{
"epoch": 1.600609756097561,
"grad_norm": 2.776454448699951,
"learning_rate": 5.0749999999999994e-05,
"loss": 1.6283,
"step": 2100
},
{
"epoch": 1.600609756097561,
"eval_loss": Infinity,
"eval_runtime": 64.764,
"eval_samples_per_second": 14.144,
"eval_steps_per_second": 3.536,
"eval_wer": 0.5143624474544606,
"step": 2100
},
{
"epoch": 1.6768292682926829,
"grad_norm": 2.028964042663574,
"learning_rate": 3.8249999999999995e-05,
"loss": 1.4131,
"step": 2200
},
{
"epoch": 1.6768292682926829,
"eval_loss": Infinity,
"eval_runtime": 64.5071,
"eval_samples_per_second": 14.2,
"eval_steps_per_second": 3.55,
"eval_wer": 0.5135450723960766,
"step": 2200
},
{
"epoch": 1.7530487804878048,
"grad_norm": 5.2079386711120605,
"learning_rate": 2.5749999999999996e-05,
"loss": 1.388,
"step": 2300
},
{
"epoch": 1.7530487804878048,
"eval_loss": Infinity,
"eval_runtime": 64.3264,
"eval_samples_per_second": 14.24,
"eval_steps_per_second": 3.56,
"eval_wer": 0.51366184026156,
"step": 2300
},
{
"epoch": 1.8292682926829267,
"grad_norm": 2.5626211166381836,
"learning_rate": 1.3249999999999999e-05,
"loss": 1.3846,
"step": 2400
},
{
"epoch": 1.8292682926829267,
"eval_loss": Infinity,
"eval_runtime": 64.4764,
"eval_samples_per_second": 14.207,
"eval_steps_per_second": 3.552,
"eval_wer": 0.514479215319944,
"step": 2400
},
{
"epoch": 1.9054878048780488,
"grad_norm": 2.6917226314544678,
"learning_rate": 7.499999999999999e-07,
"loss": 1.497,
"step": 2500
},
{
"epoch": 1.9054878048780488,
"eval_loss": Infinity,
"eval_runtime": 65.0831,
"eval_samples_per_second": 14.074,
"eval_steps_per_second": 3.519,
"eval_wer": 0.516697804764129,
"step": 2500
},
{
"epoch": 1.9054878048780488,
"step": 2500,
"total_flos": 1.2777728251757648e+19,
"train_loss": 2.486965802001953,
"train_runtime": 5021.3633,
"train_samples_per_second": 3.983,
"train_steps_per_second": 0.498
}
],
"logging_steps": 100,
"max_steps": 2500,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.2777728251757648e+19,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}