inst-qg-vinewsqa-vit5 / trainer_state.json
shnl's picture
'instructionqg'
07a3872
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.974948758824869,
"global_step": 10950,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.23,
"learning_rate": 4.553734061930783e-06,
"loss": 11.1934,
"step": 250
},
{
"epoch": 0.23,
"eval_loss": 0.7989015579223633,
"eval_runtime": 121.671,
"eval_samples_per_second": 20.523,
"eval_steps_per_second": 1.29,
"step": 250
},
{
"epoch": 0.46,
"learning_rate": 9.107468123861566e-06,
"loss": 0.6665,
"step": 500
},
{
"epoch": 0.46,
"eval_loss": 0.6362500190734863,
"eval_runtime": 121.243,
"eval_samples_per_second": 20.595,
"eval_steps_per_second": 1.295,
"step": 500
},
{
"epoch": 0.68,
"learning_rate": 9.807120237981e-06,
"loss": 0.5506,
"step": 750
},
{
"epoch": 0.68,
"eval_loss": 0.5699377059936523,
"eval_runtime": 121.2617,
"eval_samples_per_second": 20.592,
"eval_steps_per_second": 1.295,
"step": 750
},
{
"epoch": 0.91,
"learning_rate": 9.567220036464831e-06,
"loss": 0.5167,
"step": 1000
},
{
"epoch": 0.91,
"eval_loss": 0.5575143694877625,
"eval_runtime": 121.2628,
"eval_samples_per_second": 20.592,
"eval_steps_per_second": 1.295,
"step": 1000
},
{
"epoch": 1.14,
"learning_rate": 9.327319834948662e-06,
"loss": 0.4963,
"step": 1250
},
{
"epoch": 1.14,
"eval_loss": 0.5507959127426147,
"eval_runtime": 121.2437,
"eval_samples_per_second": 20.595,
"eval_steps_per_second": 1.295,
"step": 1250
},
{
"epoch": 1.37,
"learning_rate": 9.087419633432492e-06,
"loss": 0.4732,
"step": 1500
},
{
"epoch": 1.37,
"eval_loss": 0.5411986708641052,
"eval_runtime": 121.2586,
"eval_samples_per_second": 20.592,
"eval_steps_per_second": 1.295,
"step": 1500
},
{
"epoch": 1.59,
"learning_rate": 8.847519431916323e-06,
"loss": 0.4566,
"step": 1750
},
{
"epoch": 1.59,
"eval_loss": 0.5381733179092407,
"eval_runtime": 121.2512,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 1.295,
"step": 1750
},
{
"epoch": 1.82,
"learning_rate": 8.607619230400154e-06,
"loss": 0.4575,
"step": 2000
},
{
"epoch": 1.82,
"eval_loss": 0.532073438167572,
"eval_runtime": 121.2451,
"eval_samples_per_second": 20.595,
"eval_steps_per_second": 1.295,
"step": 2000
},
{
"epoch": 2.05,
"learning_rate": 8.367719028883984e-06,
"loss": 0.4518,
"step": 2250
},
{
"epoch": 2.05,
"eval_loss": 0.532995343208313,
"eval_runtime": 121.2499,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 1.295,
"step": 2250
},
{
"epoch": 2.28,
"learning_rate": 8.127818827367817e-06,
"loss": 0.4152,
"step": 2500
},
{
"epoch": 2.28,
"eval_loss": 0.5357652902603149,
"eval_runtime": 121.2507,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 1.295,
"step": 2500
},
{
"epoch": 2.51,
"learning_rate": 7.887918625851645e-06,
"loss": 0.421,
"step": 2750
},
{
"epoch": 2.51,
"eval_loss": 0.5311329364776611,
"eval_runtime": 121.2368,
"eval_samples_per_second": 20.596,
"eval_steps_per_second": 1.295,
"step": 2750
},
{
"epoch": 2.73,
"learning_rate": 7.648018424335478e-06,
"loss": 0.4169,
"step": 3000
},
{
"epoch": 2.73,
"eval_loss": 0.528181791305542,
"eval_runtime": 121.2514,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 1.295,
"step": 3000
},
{
"epoch": 2.96,
"learning_rate": 7.408118222819308e-06,
"loss": 0.4047,
"step": 3250
},
{
"epoch": 2.96,
"eval_loss": 0.5281467437744141,
"eval_runtime": 121.2515,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 1.295,
"step": 3250
},
{
"epoch": 3.19,
"learning_rate": 7.168218021303138e-06,
"loss": 0.3819,
"step": 3500
},
{
"epoch": 3.19,
"eval_loss": 0.5296162962913513,
"eval_runtime": 121.2446,
"eval_samples_per_second": 20.595,
"eval_steps_per_second": 1.295,
"step": 3500
},
{
"epoch": 3.42,
"learning_rate": 6.92831781978697e-06,
"loss": 0.3811,
"step": 3750
},
{
"epoch": 3.42,
"eval_loss": 0.527252733707428,
"eval_runtime": 121.2481,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 1.295,
"step": 3750
},
{
"epoch": 3.64,
"learning_rate": 6.688417618270799e-06,
"loss": 0.3783,
"step": 4000
},
{
"epoch": 3.64,
"eval_loss": 0.5292276740074158,
"eval_runtime": 121.255,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 4000
},
{
"epoch": 3.87,
"learning_rate": 6.448517416754631e-06,
"loss": 0.3883,
"step": 4250
},
{
"epoch": 3.87,
"eval_loss": 0.5283324122428894,
"eval_runtime": 121.2518,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 1.295,
"step": 4250
},
{
"epoch": 4.1,
"learning_rate": 6.208617215238461e-06,
"loss": 0.3692,
"step": 4500
},
{
"epoch": 4.1,
"eval_loss": 0.5359126925468445,
"eval_runtime": 121.2557,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 4500
},
{
"epoch": 4.33,
"learning_rate": 5.968717013722292e-06,
"loss": 0.3572,
"step": 4750
},
{
"epoch": 4.33,
"eval_loss": 0.5336561799049377,
"eval_runtime": 121.261,
"eval_samples_per_second": 20.592,
"eval_steps_per_second": 1.295,
"step": 4750
},
{
"epoch": 4.55,
"learning_rate": 5.728816812206123e-06,
"loss": 0.3504,
"step": 5000
},
{
"epoch": 4.55,
"eval_loss": 0.5326528549194336,
"eval_runtime": 121.2567,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 5000
},
{
"epoch": 4.78,
"learning_rate": 5.488916610689954e-06,
"loss": 0.3555,
"step": 5250
},
{
"epoch": 4.78,
"eval_loss": 0.5350491404533386,
"eval_runtime": 121.2561,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 5250
},
{
"epoch": 5.01,
"learning_rate": 5.249016409173784e-06,
"loss": 0.3553,
"step": 5500
},
{
"epoch": 5.01,
"eval_loss": 0.5344789028167725,
"eval_runtime": 121.2538,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 5500
},
{
"epoch": 5.24,
"learning_rate": 5.009116207657615e-06,
"loss": 0.3278,
"step": 5750
},
{
"epoch": 5.24,
"eval_loss": 0.5418434739112854,
"eval_runtime": 121.2532,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 5750
},
{
"epoch": 5.47,
"learning_rate": 4.769216006141446e-06,
"loss": 0.3315,
"step": 6000
},
{
"epoch": 5.47,
"eval_loss": 0.5402191877365112,
"eval_runtime": 121.2529,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 6000
},
{
"epoch": 5.69,
"learning_rate": 4.5293158046252765e-06,
"loss": 0.3351,
"step": 6250
},
{
"epoch": 5.69,
"eval_loss": 0.5378587245941162,
"eval_runtime": 121.2535,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 6250
},
{
"epoch": 5.92,
"learning_rate": 4.289415603109107e-06,
"loss": 0.3349,
"step": 6500
},
{
"epoch": 5.92,
"eval_loss": 0.5371122360229492,
"eval_runtime": 121.2566,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 6500
},
{
"epoch": 6.15,
"learning_rate": 4.049515401592938e-06,
"loss": 0.3217,
"step": 6750
},
{
"epoch": 6.15,
"eval_loss": 0.5460793375968933,
"eval_runtime": 121.2515,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 1.295,
"step": 6750
},
{
"epoch": 6.38,
"learning_rate": 3.8096152000767683e-06,
"loss": 0.3177,
"step": 7000
},
{
"epoch": 6.38,
"eval_loss": 0.5452025532722473,
"eval_runtime": 121.2643,
"eval_samples_per_second": 20.591,
"eval_steps_per_second": 1.295,
"step": 7000
},
{
"epoch": 6.6,
"learning_rate": 3.569714998560599e-06,
"loss": 0.3157,
"step": 7250
},
{
"epoch": 6.6,
"eval_loss": 0.5441482663154602,
"eval_runtime": 121.2598,
"eval_samples_per_second": 20.592,
"eval_steps_per_second": 1.295,
"step": 7250
},
{
"epoch": 6.83,
"learning_rate": 3.32981479704443e-06,
"loss": 0.3187,
"step": 7500
},
{
"epoch": 6.83,
"eval_loss": 0.5422244071960449,
"eval_runtime": 121.2573,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 7500
},
{
"epoch": 7.06,
"learning_rate": 3.0899145955282605e-06,
"loss": 0.3138,
"step": 7750
},
{
"epoch": 7.06,
"eval_loss": 0.5464943051338196,
"eval_runtime": 121.2517,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 1.295,
"step": 7750
},
{
"epoch": 7.29,
"learning_rate": 2.850014394012091e-06,
"loss": 0.3049,
"step": 8000
},
{
"epoch": 7.29,
"eval_loss": 0.5488951206207275,
"eval_runtime": 121.2598,
"eval_samples_per_second": 20.592,
"eval_steps_per_second": 1.295,
"step": 8000
},
{
"epoch": 7.52,
"learning_rate": 2.6101141924959217e-06,
"loss": 0.3021,
"step": 8250
},
{
"epoch": 7.52,
"eval_loss": 0.551296591758728,
"eval_runtime": 121.2573,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 8250
},
{
"epoch": 7.74,
"learning_rate": 2.3702139909797523e-06,
"loss": 0.3011,
"step": 8500
},
{
"epoch": 7.74,
"eval_loss": 0.5499754548072815,
"eval_runtime": 121.2448,
"eval_samples_per_second": 20.595,
"eval_steps_per_second": 1.295,
"step": 8500
},
{
"epoch": 7.97,
"learning_rate": 2.1303137894635834e-06,
"loss": 0.3007,
"step": 8750
},
{
"epoch": 7.97,
"eval_loss": 0.5497844219207764,
"eval_runtime": 121.2334,
"eval_samples_per_second": 20.597,
"eval_steps_per_second": 1.295,
"step": 8750
},
{
"epoch": 8.2,
"learning_rate": 1.890413587947414e-06,
"loss": 0.296,
"step": 9000
},
{
"epoch": 8.2,
"eval_loss": 0.5538543462753296,
"eval_runtime": 121.2385,
"eval_samples_per_second": 20.596,
"eval_steps_per_second": 1.295,
"step": 9000
},
{
"epoch": 8.43,
"learning_rate": 1.6505133864312448e-06,
"loss": 0.2981,
"step": 9250
},
{
"epoch": 8.43,
"eval_loss": 0.5549352765083313,
"eval_runtime": 121.2524,
"eval_samples_per_second": 20.593,
"eval_steps_per_second": 1.295,
"step": 9250
},
{
"epoch": 8.65,
"learning_rate": 1.4106131849150754e-06,
"loss": 0.2931,
"step": 9500
},
{
"epoch": 8.65,
"eval_loss": 0.5541805624961853,
"eval_runtime": 121.2599,
"eval_samples_per_second": 20.592,
"eval_steps_per_second": 1.295,
"step": 9500
},
{
"epoch": 8.88,
"learning_rate": 1.1707129833989062e-06,
"loss": 0.2923,
"step": 9750
},
{
"epoch": 8.88,
"eval_loss": 0.554023802280426,
"eval_runtime": 121.2441,
"eval_samples_per_second": 20.595,
"eval_steps_per_second": 1.295,
"step": 9750
},
{
"epoch": 9.11,
"learning_rate": 9.308127818827369e-07,
"loss": 0.2776,
"step": 10000
},
{
"epoch": 9.11,
"eval_loss": 0.5579211711883545,
"eval_runtime": 121.2499,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 1.295,
"step": 10000
},
{
"epoch": 9.34,
"learning_rate": 6.909125803665675e-07,
"loss": 0.2913,
"step": 10250
},
{
"epoch": 9.34,
"eval_loss": 0.5575366020202637,
"eval_runtime": 121.2491,
"eval_samples_per_second": 20.594,
"eval_steps_per_second": 1.295,
"step": 10250
},
{
"epoch": 9.57,
"learning_rate": 4.5101237885039827e-07,
"loss": 0.2856,
"step": 10500
},
{
"epoch": 9.57,
"eval_loss": 0.5578790307044983,
"eval_runtime": 121.2422,
"eval_samples_per_second": 20.595,
"eval_steps_per_second": 1.295,
"step": 10500
},
{
"epoch": 9.79,
"learning_rate": 2.1111217733422898e-07,
"loss": 0.288,
"step": 10750
},
{
"epoch": 9.79,
"eval_loss": 0.5565530061721802,
"eval_runtime": 121.2668,
"eval_samples_per_second": 20.591,
"eval_steps_per_second": 1.295,
"step": 10750
}
],
"max_steps": 10970,
"num_train_epochs": 10,
"total_flos": 2.0607780963875328e+17,
"trial_name": null,
"trial_params": null
}