|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.974948758824869, |
|
"global_step": 10950, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.553734061930783e-06, |
|
"loss": 11.1934, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 0.7989015579223633, |
|
"eval_runtime": 121.671, |
|
"eval_samples_per_second": 20.523, |
|
"eval_steps_per_second": 1.29, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.107468123861566e-06, |
|
"loss": 0.6665, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 0.6362500190734863, |
|
"eval_runtime": 121.243, |
|
"eval_samples_per_second": 20.595, |
|
"eval_steps_per_second": 1.295, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.807120237981e-06, |
|
"loss": 0.5506, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.5699377059936523, |
|
"eval_runtime": 121.2617, |
|
"eval_samples_per_second": 20.592, |
|
"eval_steps_per_second": 1.295, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.567220036464831e-06, |
|
"loss": 0.5167, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 0.5575143694877625, |
|
"eval_runtime": 121.2628, |
|
"eval_samples_per_second": 20.592, |
|
"eval_steps_per_second": 1.295, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.327319834948662e-06, |
|
"loss": 0.4963, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 0.5507959127426147, |
|
"eval_runtime": 121.2437, |
|
"eval_samples_per_second": 20.595, |
|
"eval_steps_per_second": 1.295, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.087419633432492e-06, |
|
"loss": 0.4732, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.5411986708641052, |
|
"eval_runtime": 121.2586, |
|
"eval_samples_per_second": 20.592, |
|
"eval_steps_per_second": 1.295, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.847519431916323e-06, |
|
"loss": 0.4566, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.5381733179092407, |
|
"eval_runtime": 121.2512, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 1.295, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 8.607619230400154e-06, |
|
"loss": 0.4575, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 0.532073438167572, |
|
"eval_runtime": 121.2451, |
|
"eval_samples_per_second": 20.595, |
|
"eval_steps_per_second": 1.295, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 8.367719028883984e-06, |
|
"loss": 0.4518, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 0.532995343208313, |
|
"eval_runtime": 121.2499, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 1.295, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.127818827367817e-06, |
|
"loss": 0.4152, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 0.5357652902603149, |
|
"eval_runtime": 121.2507, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 1.295, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 7.887918625851645e-06, |
|
"loss": 0.421, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 0.5311329364776611, |
|
"eval_runtime": 121.2368, |
|
"eval_samples_per_second": 20.596, |
|
"eval_steps_per_second": 1.295, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 7.648018424335478e-06, |
|
"loss": 0.4169, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 0.528181791305542, |
|
"eval_runtime": 121.2514, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 1.295, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.408118222819308e-06, |
|
"loss": 0.4047, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 0.5281467437744141, |
|
"eval_runtime": 121.2515, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 1.295, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.168218021303138e-06, |
|
"loss": 0.3819, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_loss": 0.5296162962913513, |
|
"eval_runtime": 121.2446, |
|
"eval_samples_per_second": 20.595, |
|
"eval_steps_per_second": 1.295, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.92831781978697e-06, |
|
"loss": 0.3811, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 0.527252733707428, |
|
"eval_runtime": 121.2481, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 1.295, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 6.688417618270799e-06, |
|
"loss": 0.3783, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 0.5292276740074158, |
|
"eval_runtime": 121.255, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.448517416754631e-06, |
|
"loss": 0.3883, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 0.5283324122428894, |
|
"eval_runtime": 121.2518, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 1.295, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 6.208617215238461e-06, |
|
"loss": 0.3692, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_loss": 0.5359126925468445, |
|
"eval_runtime": 121.2557, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 5.968717013722292e-06, |
|
"loss": 0.3572, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 0.5336561799049377, |
|
"eval_runtime": 121.261, |
|
"eval_samples_per_second": 20.592, |
|
"eval_steps_per_second": 1.295, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 5.728816812206123e-06, |
|
"loss": 0.3504, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 0.5326528549194336, |
|
"eval_runtime": 121.2567, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 5.488916610689954e-06, |
|
"loss": 0.3555, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 0.5350491404533386, |
|
"eval_runtime": 121.2561, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 5.249016409173784e-06, |
|
"loss": 0.3553, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_loss": 0.5344789028167725, |
|
"eval_runtime": 121.2538, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 5.009116207657615e-06, |
|
"loss": 0.3278, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 0.5418434739112854, |
|
"eval_runtime": 121.2532, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.769216006141446e-06, |
|
"loss": 0.3315, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_loss": 0.5402191877365112, |
|
"eval_runtime": 121.2529, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 4.5293158046252765e-06, |
|
"loss": 0.3351, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 0.5378587245941162, |
|
"eval_runtime": 121.2535, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 4.289415603109107e-06, |
|
"loss": 0.3349, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"eval_loss": 0.5371122360229492, |
|
"eval_runtime": 121.2566, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 4.049515401592938e-06, |
|
"loss": 0.3217, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_loss": 0.5460793375968933, |
|
"eval_runtime": 121.2515, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 1.295, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 3.8096152000767683e-06, |
|
"loss": 0.3177, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_loss": 0.5452025532722473, |
|
"eval_runtime": 121.2643, |
|
"eval_samples_per_second": 20.591, |
|
"eval_steps_per_second": 1.295, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.569714998560599e-06, |
|
"loss": 0.3157, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 0.5441482663154602, |
|
"eval_runtime": 121.2598, |
|
"eval_samples_per_second": 20.592, |
|
"eval_steps_per_second": 1.295, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 3.32981479704443e-06, |
|
"loss": 0.3187, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"eval_loss": 0.5422244071960449, |
|
"eval_runtime": 121.2573, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 3.0899145955282605e-06, |
|
"loss": 0.3138, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_loss": 0.5464943051338196, |
|
"eval_runtime": 121.2517, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 1.295, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 2.850014394012091e-06, |
|
"loss": 0.3049, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_loss": 0.5488951206207275, |
|
"eval_runtime": 121.2598, |
|
"eval_samples_per_second": 20.592, |
|
"eval_steps_per_second": 1.295, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 2.6101141924959217e-06, |
|
"loss": 0.3021, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"eval_loss": 0.551296591758728, |
|
"eval_runtime": 121.2573, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 2.3702139909797523e-06, |
|
"loss": 0.3011, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"eval_loss": 0.5499754548072815, |
|
"eval_runtime": 121.2448, |
|
"eval_samples_per_second": 20.595, |
|
"eval_steps_per_second": 1.295, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 2.1303137894635834e-06, |
|
"loss": 0.3007, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"eval_loss": 0.5497844219207764, |
|
"eval_runtime": 121.2334, |
|
"eval_samples_per_second": 20.597, |
|
"eval_steps_per_second": 1.295, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 1.890413587947414e-06, |
|
"loss": 0.296, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_loss": 0.5538543462753296, |
|
"eval_runtime": 121.2385, |
|
"eval_samples_per_second": 20.596, |
|
"eval_steps_per_second": 1.295, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 1.6505133864312448e-06, |
|
"loss": 0.2981, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"eval_loss": 0.5549352765083313, |
|
"eval_runtime": 121.2524, |
|
"eval_samples_per_second": 20.593, |
|
"eval_steps_per_second": 1.295, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 1.4106131849150754e-06, |
|
"loss": 0.2931, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"eval_loss": 0.5541805624961853, |
|
"eval_runtime": 121.2599, |
|
"eval_samples_per_second": 20.592, |
|
"eval_steps_per_second": 1.295, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 1.1707129833989062e-06, |
|
"loss": 0.2923, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"eval_loss": 0.554023802280426, |
|
"eval_runtime": 121.2441, |
|
"eval_samples_per_second": 20.595, |
|
"eval_steps_per_second": 1.295, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 9.308127818827369e-07, |
|
"loss": 0.2776, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"eval_loss": 0.5579211711883545, |
|
"eval_runtime": 121.2499, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 1.295, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 6.909125803665675e-07, |
|
"loss": 0.2913, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"eval_loss": 0.5575366020202637, |
|
"eval_runtime": 121.2491, |
|
"eval_samples_per_second": 20.594, |
|
"eval_steps_per_second": 1.295, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.5101237885039827e-07, |
|
"loss": 0.2856, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 0.5578790307044983, |
|
"eval_runtime": 121.2422, |
|
"eval_samples_per_second": 20.595, |
|
"eval_steps_per_second": 1.295, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 2.1111217733422898e-07, |
|
"loss": 0.288, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"eval_loss": 0.5565530061721802, |
|
"eval_runtime": 121.2668, |
|
"eval_samples_per_second": 20.591, |
|
"eval_steps_per_second": 1.295, |
|
"step": 10750 |
|
} |
|
], |
|
"max_steps": 10970, |
|
"num_train_epochs": 10, |
|
"total_flos": 2.0607780963875328e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|