|
{ |
|
"best_metric": Infinity, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9054878048780488, |
|
"eval_steps": 100, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07621951219512195, |
|
"grad_norm": 8.693363189697266, |
|
"learning_rate": 0.000285, |
|
"loss": 14.8448, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07621951219512195, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.4055, |
|
"eval_samples_per_second": 14.222, |
|
"eval_steps_per_second": 3.556, |
|
"eval_wer": 1.0079402148528724, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1524390243902439, |
|
"grad_norm": 5.000373363494873, |
|
"learning_rate": 0.00028812499999999997, |
|
"loss": 6.2506, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.1524390243902439, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.2383, |
|
"eval_samples_per_second": 14.259, |
|
"eval_steps_per_second": 3.565, |
|
"eval_wer": 1.004203643157403, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22865853658536586, |
|
"grad_norm": 8.148813247680664, |
|
"learning_rate": 0.00027562499999999994, |
|
"loss": 5.5314, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22865853658536586, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.4036, |
|
"eval_samples_per_second": 14.223, |
|
"eval_steps_per_second": 3.556, |
|
"eval_wer": 1.0269733769266698, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3048780487804878, |
|
"grad_norm": 5.414369106292725, |
|
"learning_rate": 0.00026312499999999996, |
|
"loss": 3.4418, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3048780487804878, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.0954, |
|
"eval_samples_per_second": 14.291, |
|
"eval_steps_per_second": 3.573, |
|
"eval_wer": 0.5906118636151331, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.38109756097560976, |
|
"grad_norm": 3.8319242000579834, |
|
"learning_rate": 0.000250625, |
|
"loss": 1.9396, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.38109756097560976, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.3684, |
|
"eval_samples_per_second": 14.231, |
|
"eval_steps_per_second": 3.558, |
|
"eval_wer": 0.5762494161606726, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4573170731707317, |
|
"grad_norm": 3.6439785957336426, |
|
"learning_rate": 0.00023812499999999997, |
|
"loss": 1.698, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.4573170731707317, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.934, |
|
"eval_samples_per_second": 14.327, |
|
"eval_steps_per_second": 3.582, |
|
"eval_wer": 0.5566324147594582, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5335365853658537, |
|
"grad_norm": 3.3120391368865967, |
|
"learning_rate": 0.00022562499999999997, |
|
"loss": 1.5483, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.5335365853658537, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.9483, |
|
"eval_samples_per_second": 14.324, |
|
"eval_steps_per_second": 3.581, |
|
"eval_wer": 0.5570994862213918, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 3.9880552291870117, |
|
"learning_rate": 0.000213125, |
|
"loss": 1.6501, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.9976, |
|
"eval_samples_per_second": 14.313, |
|
"eval_steps_per_second": 3.578, |
|
"eval_wer": 0.5486921999065857, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.6859756097560976, |
|
"grad_norm": 10.49134349822998, |
|
"learning_rate": 0.00020062499999999996, |
|
"loss": 1.5528, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.6859756097560976, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.4873, |
|
"eval_samples_per_second": 14.204, |
|
"eval_steps_per_second": 3.551, |
|
"eval_wer": 0.5470574497898179, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7621951219512195, |
|
"grad_norm": 8.717538833618164, |
|
"learning_rate": 0.00018812499999999998, |
|
"loss": 1.5398, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7621951219512195, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.7937, |
|
"eval_samples_per_second": 14.359, |
|
"eval_steps_per_second": 3.59, |
|
"eval_wer": 0.5478748248482018, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8384146341463414, |
|
"grad_norm": 3.1688060760498047, |
|
"learning_rate": 0.000175625, |
|
"loss": 1.6413, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8384146341463414, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.725, |
|
"eval_samples_per_second": 14.374, |
|
"eval_steps_per_second": 3.594, |
|
"eval_wer": 0.5303596450256889, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.9146341463414634, |
|
"grad_norm": 4.943587779998779, |
|
"learning_rate": 0.00016312499999999997, |
|
"loss": 1.418, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9146341463414634, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.0453, |
|
"eval_samples_per_second": 14.302, |
|
"eval_steps_per_second": 3.576, |
|
"eval_wer": 0.5282578234469874, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9908536585365854, |
|
"grad_norm": 4.093054294586182, |
|
"learning_rate": 0.000150625, |
|
"loss": 1.5625, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.9908536585365854, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.5424, |
|
"eval_samples_per_second": 14.192, |
|
"eval_steps_per_second": 3.548, |
|
"eval_wer": 0.5265063054647361, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.0670731707317074, |
|
"grad_norm": 2.884342670440674, |
|
"learning_rate": 0.00013812499999999998, |
|
"loss": 1.4753, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.0670731707317074, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 63.9509, |
|
"eval_samples_per_second": 14.323, |
|
"eval_steps_per_second": 3.581, |
|
"eval_wer": 0.5346800560485754, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1432926829268293, |
|
"grad_norm": 2.9519801139831543, |
|
"learning_rate": 0.000125625, |
|
"loss": 1.616, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1432926829268293, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.3023, |
|
"eval_samples_per_second": 14.245, |
|
"eval_steps_per_second": 3.561, |
|
"eval_wer": 0.530943484353106, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 4.795141696929932, |
|
"learning_rate": 0.00011312499999999999, |
|
"loss": 1.3802, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.1513, |
|
"eval_samples_per_second": 14.279, |
|
"eval_steps_per_second": 3.57, |
|
"eval_wer": 0.5246380196170014, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.295731707317073, |
|
"grad_norm": 3.19049334526062, |
|
"learning_rate": 0.00010074999999999998, |
|
"loss": 1.4105, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.295731707317073, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.7009, |
|
"eval_samples_per_second": 14.157, |
|
"eval_steps_per_second": 3.539, |
|
"eval_wer": 0.5197337692666978, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.3719512195121952, |
|
"grad_norm": 3.021605968475342, |
|
"learning_rate": 8.825e-05, |
|
"loss": 1.3793, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3719512195121952, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.1507, |
|
"eval_samples_per_second": 14.279, |
|
"eval_steps_per_second": 3.57, |
|
"eval_wer": 0.5288416627744045, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.4481707317073171, |
|
"grad_norm": 5.46076774597168, |
|
"learning_rate": 7.575e-05, |
|
"loss": 1.3991, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.4481707317073171, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.1838, |
|
"eval_samples_per_second": 14.272, |
|
"eval_steps_per_second": 3.568, |
|
"eval_wer": 0.5140121438580103, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.524390243902439, |
|
"grad_norm": 2.8810770511627197, |
|
"learning_rate": 6.324999999999999e-05, |
|
"loss": 1.5838, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.524390243902439, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.1271, |
|
"eval_samples_per_second": 14.284, |
|
"eval_steps_per_second": 3.571, |
|
"eval_wer": 0.5239374124241009, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.600609756097561, |
|
"grad_norm": 2.776454448699951, |
|
"learning_rate": 5.0749999999999994e-05, |
|
"loss": 1.6283, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.600609756097561, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.764, |
|
"eval_samples_per_second": 14.144, |
|
"eval_steps_per_second": 3.536, |
|
"eval_wer": 0.5143624474544606, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6768292682926829, |
|
"grad_norm": 2.028964042663574, |
|
"learning_rate": 3.8249999999999995e-05, |
|
"loss": 1.4131, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6768292682926829, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.5071, |
|
"eval_samples_per_second": 14.2, |
|
"eval_steps_per_second": 3.55, |
|
"eval_wer": 0.5135450723960766, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.7530487804878048, |
|
"grad_norm": 5.2079386711120605, |
|
"learning_rate": 2.5749999999999996e-05, |
|
"loss": 1.388, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.7530487804878048, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.3264, |
|
"eval_samples_per_second": 14.24, |
|
"eval_steps_per_second": 3.56, |
|
"eval_wer": 0.51366184026156, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.8292682926829267, |
|
"grad_norm": 2.5626211166381836, |
|
"learning_rate": 1.3249999999999999e-05, |
|
"loss": 1.3846, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.8292682926829267, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 64.4764, |
|
"eval_samples_per_second": 14.207, |
|
"eval_steps_per_second": 3.552, |
|
"eval_wer": 0.514479215319944, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.9054878048780488, |
|
"grad_norm": 2.6917226314544678, |
|
"learning_rate": 7.499999999999999e-07, |
|
"loss": 1.497, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9054878048780488, |
|
"eval_loss": Infinity, |
|
"eval_runtime": 65.0831, |
|
"eval_samples_per_second": 14.074, |
|
"eval_steps_per_second": 3.519, |
|
"eval_wer": 0.516697804764129, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9054878048780488, |
|
"step": 2500, |
|
"total_flos": 1.2777728251757648e+19, |
|
"train_loss": 2.486965802001953, |
|
"train_runtime": 5021.3633, |
|
"train_samples_per_second": 3.983, |
|
"train_steps_per_second": 0.498 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 2500, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 400, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2777728251757648e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|