|
{ |
|
"best_metric": 1.1056212186813354, |
|
"best_model_checkpoint": "/kaggle/output/checkpoint-6000", |
|
"epoch": 240.0, |
|
"eval_steps": 1000, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.7777777777777777e-11, |
|
"loss": 1.5477, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.7750000000000004e-08, |
|
"loss": 1.2028, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.3387225548902196, |
|
"eval_loss": 1.1078879833221436, |
|
"eval_runtime": 54.1057, |
|
"eval_samples_per_second": 92.596, |
|
"eval_steps_per_second": 11.588, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 5.5527777777777784e-08, |
|
"loss": 1.1152, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.3349301397205589, |
|
"eval_loss": 1.1080944538116455, |
|
"eval_runtime": 54.2029, |
|
"eval_samples_per_second": 92.43, |
|
"eval_steps_per_second": 11.568, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 8.327777777777778e-08, |
|
"loss": 1.1036, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.3377245508982036, |
|
"eval_loss": 1.1081210374832153, |
|
"eval_runtime": 54.0467, |
|
"eval_samples_per_second": 92.698, |
|
"eval_steps_per_second": 11.601, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 1.1105555555555557e-07, |
|
"loss": 1.0895, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.33912175648702597, |
|
"eval_loss": 1.1072543859481812, |
|
"eval_runtime": 54.1318, |
|
"eval_samples_per_second": 92.552, |
|
"eval_steps_per_second": 11.583, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 1.3880555555555558e-07, |
|
"loss": 1.0748, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.3435129740518962, |
|
"eval_loss": 1.1071943044662476, |
|
"eval_runtime": 54.4121, |
|
"eval_samples_per_second": 92.075, |
|
"eval_steps_per_second": 11.523, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 1.6658333333333335e-07, |
|
"loss": 1.055, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.3459081836327345, |
|
"eval_loss": 1.1056212186813354, |
|
"eval_runtime": 54.092, |
|
"eval_samples_per_second": 92.62, |
|
"eval_steps_per_second": 11.591, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 1.9433333333333334e-07, |
|
"loss": 1.0352, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.3469061876247505, |
|
"eval_loss": 1.1121071577072144, |
|
"eval_runtime": 53.9318, |
|
"eval_samples_per_second": 92.895, |
|
"eval_steps_per_second": 11.626, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 2.2211111111111114e-07, |
|
"loss": 1.0085, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.3534930139720559, |
|
"eval_loss": 1.1201248168945312, |
|
"eval_runtime": 54.0094, |
|
"eval_samples_per_second": 92.762, |
|
"eval_steps_per_second": 11.609, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 2.4986111111111113e-07, |
|
"loss": 0.9772, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.3568862275449102, |
|
"eval_loss": 1.1434961557388306, |
|
"eval_runtime": 54.0639, |
|
"eval_samples_per_second": 92.668, |
|
"eval_steps_per_second": 11.597, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 2.7761111111111115e-07, |
|
"loss": 0.9323, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.36087824351297404, |
|
"eval_loss": 1.1742260456085205, |
|
"eval_runtime": 54.7624, |
|
"eval_samples_per_second": 91.486, |
|
"eval_steps_per_second": 11.449, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"learning_rate": 3.053888888888889e-07, |
|
"loss": 0.8779, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_accuracy": 0.3568862275449102, |
|
"eval_loss": 1.2272026538848877, |
|
"eval_runtime": 54.0934, |
|
"eval_samples_per_second": 92.618, |
|
"eval_steps_per_second": 11.591, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"learning_rate": 3.331388888888889e-07, |
|
"loss": 0.8276, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_accuracy": 0.3562874251497006, |
|
"eval_loss": 1.3099753856658936, |
|
"eval_runtime": 54.3996, |
|
"eval_samples_per_second": 92.096, |
|
"eval_steps_per_second": 11.526, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"learning_rate": 3.608888888888889e-07, |
|
"loss": 0.7716, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_accuracy": 0.3588822355289421, |
|
"eval_loss": 1.4230743646621704, |
|
"eval_runtime": 54.193, |
|
"eval_samples_per_second": 92.447, |
|
"eval_steps_per_second": 11.57, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"learning_rate": 3.886666666666667e-07, |
|
"loss": 0.7162, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 224.0, |
|
"eval_accuracy": 0.35568862275449104, |
|
"eval_loss": 1.547025442123413, |
|
"eval_runtime": 54.8911, |
|
"eval_samples_per_second": 91.272, |
|
"eval_steps_per_second": 11.423, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"learning_rate": 4.164166666666667e-07, |
|
"loss": 0.6447, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 240.0, |
|
"eval_accuracy": 0.35528942115768464, |
|
"eval_loss": 1.6746374368667603, |
|
"eval_runtime": 55.1232, |
|
"eval_samples_per_second": 90.887, |
|
"eval_steps_per_second": 11.375, |
|
"step": 15000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 10000000, |
|
"num_train_epochs": 161291, |
|
"save_steps": 1000, |
|
"total_flos": 3.135532695552e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|