{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.974948758824869, "global_step": 10950, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.23, "learning_rate": 4.553734061930783e-06, "loss": 11.1934, "step": 250 }, { "epoch": 0.23, "eval_loss": 0.7989015579223633, "eval_runtime": 121.671, "eval_samples_per_second": 20.523, "eval_steps_per_second": 1.29, "step": 250 }, { "epoch": 0.46, "learning_rate": 9.107468123861566e-06, "loss": 0.6665, "step": 500 }, { "epoch": 0.46, "eval_loss": 0.6362500190734863, "eval_runtime": 121.243, "eval_samples_per_second": 20.595, "eval_steps_per_second": 1.295, "step": 500 }, { "epoch": 0.68, "learning_rate": 9.807120237981e-06, "loss": 0.5506, "step": 750 }, { "epoch": 0.68, "eval_loss": 0.5699377059936523, "eval_runtime": 121.2617, "eval_samples_per_second": 20.592, "eval_steps_per_second": 1.295, "step": 750 }, { "epoch": 0.91, "learning_rate": 9.567220036464831e-06, "loss": 0.5167, "step": 1000 }, { "epoch": 0.91, "eval_loss": 0.5575143694877625, "eval_runtime": 121.2628, "eval_samples_per_second": 20.592, "eval_steps_per_second": 1.295, "step": 1000 }, { "epoch": 1.14, "learning_rate": 9.327319834948662e-06, "loss": 0.4963, "step": 1250 }, { "epoch": 1.14, "eval_loss": 0.5507959127426147, "eval_runtime": 121.2437, "eval_samples_per_second": 20.595, "eval_steps_per_second": 1.295, "step": 1250 }, { "epoch": 1.37, "learning_rate": 9.087419633432492e-06, "loss": 0.4732, "step": 1500 }, { "epoch": 1.37, "eval_loss": 0.5411986708641052, "eval_runtime": 121.2586, "eval_samples_per_second": 20.592, "eval_steps_per_second": 1.295, "step": 1500 }, { "epoch": 1.59, "learning_rate": 8.847519431916323e-06, "loss": 0.4566, "step": 1750 }, { "epoch": 1.59, "eval_loss": 0.5381733179092407, "eval_runtime": 121.2512, "eval_samples_per_second": 20.594, "eval_steps_per_second": 1.295, "step": 1750 }, { "epoch": 1.82, "learning_rate": 8.607619230400154e-06, "loss": 0.4575, "step": 2000 }, { "epoch": 1.82, "eval_loss": 0.532073438167572, "eval_runtime": 121.2451, "eval_samples_per_second": 20.595, "eval_steps_per_second": 1.295, "step": 2000 }, { "epoch": 2.05, "learning_rate": 8.367719028883984e-06, "loss": 0.4518, "step": 2250 }, { "epoch": 2.05, "eval_loss": 0.532995343208313, "eval_runtime": 121.2499, "eval_samples_per_second": 20.594, "eval_steps_per_second": 1.295, "step": 2250 }, { "epoch": 2.28, "learning_rate": 8.127818827367817e-06, "loss": 0.4152, "step": 2500 }, { "epoch": 2.28, "eval_loss": 0.5357652902603149, "eval_runtime": 121.2507, "eval_samples_per_second": 20.594, "eval_steps_per_second": 1.295, "step": 2500 }, { "epoch": 2.51, "learning_rate": 7.887918625851645e-06, "loss": 0.421, "step": 2750 }, { "epoch": 2.51, "eval_loss": 0.5311329364776611, "eval_runtime": 121.2368, "eval_samples_per_second": 20.596, "eval_steps_per_second": 1.295, "step": 2750 }, { "epoch": 2.73, "learning_rate": 7.648018424335478e-06, "loss": 0.4169, "step": 3000 }, { "epoch": 2.73, "eval_loss": 0.528181791305542, "eval_runtime": 121.2514, "eval_samples_per_second": 20.594, "eval_steps_per_second": 1.295, "step": 3000 }, { "epoch": 2.96, "learning_rate": 7.408118222819308e-06, "loss": 0.4047, "step": 3250 }, { "epoch": 2.96, "eval_loss": 0.5281467437744141, "eval_runtime": 121.2515, "eval_samples_per_second": 20.594, "eval_steps_per_second": 1.295, "step": 3250 }, { "epoch": 3.19, "learning_rate": 7.168218021303138e-06, "loss": 0.3819, "step": 3500 }, { "epoch": 3.19, "eval_loss": 0.5296162962913513, "eval_runtime": 121.2446, "eval_samples_per_second": 20.595, "eval_steps_per_second": 1.295, "step": 3500 }, { "epoch": 3.42, "learning_rate": 6.92831781978697e-06, "loss": 0.3811, "step": 3750 }, { "epoch": 3.42, "eval_loss": 0.527252733707428, "eval_runtime": 121.2481, "eval_samples_per_second": 20.594, "eval_steps_per_second": 1.295, "step": 3750 }, { "epoch": 3.64, "learning_rate": 6.688417618270799e-06, "loss": 0.3783, "step": 4000 }, { "epoch": 3.64, "eval_loss": 0.5292276740074158, "eval_runtime": 121.255, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 4000 }, { "epoch": 3.87, "learning_rate": 6.448517416754631e-06, "loss": 0.3883, "step": 4250 }, { "epoch": 3.87, "eval_loss": 0.5283324122428894, "eval_runtime": 121.2518, "eval_samples_per_second": 20.594, "eval_steps_per_second": 1.295, "step": 4250 }, { "epoch": 4.1, "learning_rate": 6.208617215238461e-06, "loss": 0.3692, "step": 4500 }, { "epoch": 4.1, "eval_loss": 0.5359126925468445, "eval_runtime": 121.2557, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 4500 }, { "epoch": 4.33, "learning_rate": 5.968717013722292e-06, "loss": 0.3572, "step": 4750 }, { "epoch": 4.33, "eval_loss": 0.5336561799049377, "eval_runtime": 121.261, "eval_samples_per_second": 20.592, "eval_steps_per_second": 1.295, "step": 4750 }, { "epoch": 4.55, "learning_rate": 5.728816812206123e-06, "loss": 0.3504, "step": 5000 }, { "epoch": 4.55, "eval_loss": 0.5326528549194336, "eval_runtime": 121.2567, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 5000 }, { "epoch": 4.78, "learning_rate": 5.488916610689954e-06, "loss": 0.3555, "step": 5250 }, { "epoch": 4.78, "eval_loss": 0.5350491404533386, "eval_runtime": 121.2561, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 5250 }, { "epoch": 5.01, "learning_rate": 5.249016409173784e-06, "loss": 0.3553, "step": 5500 }, { "epoch": 5.01, "eval_loss": 0.5344789028167725, "eval_runtime": 121.2538, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 5500 }, { "epoch": 5.24, "learning_rate": 5.009116207657615e-06, "loss": 0.3278, "step": 5750 }, { "epoch": 5.24, "eval_loss": 0.5418434739112854, "eval_runtime": 121.2532, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 5750 }, { "epoch": 5.47, "learning_rate": 4.769216006141446e-06, "loss": 0.3315, "step": 6000 }, { "epoch": 5.47, "eval_loss": 0.5402191877365112, "eval_runtime": 121.2529, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 6000 }, { "epoch": 5.69, "learning_rate": 4.5293158046252765e-06, "loss": 0.3351, "step": 6250 }, { "epoch": 5.69, "eval_loss": 0.5378587245941162, "eval_runtime": 121.2535, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 6250 }, { "epoch": 5.92, "learning_rate": 4.289415603109107e-06, "loss": 0.3349, "step": 6500 }, { "epoch": 5.92, "eval_loss": 0.5371122360229492, "eval_runtime": 121.2566, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 6500 }, { "epoch": 6.15, "learning_rate": 4.049515401592938e-06, "loss": 0.3217, "step": 6750 }, { "epoch": 6.15, "eval_loss": 0.5460793375968933, "eval_runtime": 121.2515, "eval_samples_per_second": 20.594, "eval_steps_per_second": 1.295, "step": 6750 }, { "epoch": 6.38, "learning_rate": 3.8096152000767683e-06, "loss": 0.3177, "step": 7000 }, { "epoch": 6.38, "eval_loss": 0.5452025532722473, "eval_runtime": 121.2643, "eval_samples_per_second": 20.591, "eval_steps_per_second": 1.295, "step": 7000 }, { "epoch": 6.6, "learning_rate": 3.569714998560599e-06, "loss": 0.3157, "step": 7250 }, { "epoch": 6.6, "eval_loss": 0.5441482663154602, "eval_runtime": 121.2598, "eval_samples_per_second": 20.592, "eval_steps_per_second": 1.295, "step": 7250 }, { "epoch": 6.83, "learning_rate": 3.32981479704443e-06, "loss": 0.3187, "step": 7500 }, { "epoch": 6.83, "eval_loss": 0.5422244071960449, "eval_runtime": 121.2573, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 7500 }, { "epoch": 7.06, "learning_rate": 3.0899145955282605e-06, "loss": 0.3138, "step": 7750 }, { "epoch": 7.06, "eval_loss": 0.5464943051338196, "eval_runtime": 121.2517, "eval_samples_per_second": 20.594, "eval_steps_per_second": 1.295, "step": 7750 }, { "epoch": 7.29, "learning_rate": 2.850014394012091e-06, "loss": 0.3049, "step": 8000 }, { "epoch": 7.29, "eval_loss": 0.5488951206207275, "eval_runtime": 121.2598, "eval_samples_per_second": 20.592, "eval_steps_per_second": 1.295, "step": 8000 }, { "epoch": 7.52, "learning_rate": 2.6101141924959217e-06, "loss": 0.3021, "step": 8250 }, { "epoch": 7.52, "eval_loss": 0.551296591758728, "eval_runtime": 121.2573, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 8250 }, { "epoch": 7.74, "learning_rate": 2.3702139909797523e-06, "loss": 0.3011, "step": 8500 }, { "epoch": 7.74, "eval_loss": 0.5499754548072815, "eval_runtime": 121.2448, "eval_samples_per_second": 20.595, "eval_steps_per_second": 1.295, "step": 8500 }, { "epoch": 7.97, "learning_rate": 2.1303137894635834e-06, "loss": 0.3007, "step": 8750 }, { "epoch": 7.97, "eval_loss": 0.5497844219207764, "eval_runtime": 121.2334, "eval_samples_per_second": 20.597, "eval_steps_per_second": 1.295, "step": 8750 }, { "epoch": 8.2, "learning_rate": 1.890413587947414e-06, "loss": 0.296, "step": 9000 }, { "epoch": 8.2, "eval_loss": 0.5538543462753296, "eval_runtime": 121.2385, "eval_samples_per_second": 20.596, "eval_steps_per_second": 1.295, "step": 9000 }, { "epoch": 8.43, "learning_rate": 1.6505133864312448e-06, "loss": 0.2981, "step": 9250 }, { "epoch": 8.43, "eval_loss": 0.5549352765083313, "eval_runtime": 121.2524, "eval_samples_per_second": 20.593, "eval_steps_per_second": 1.295, "step": 9250 }, { "epoch": 8.65, "learning_rate": 1.4106131849150754e-06, "loss": 0.2931, "step": 9500 }, { "epoch": 8.65, "eval_loss": 0.5541805624961853, "eval_runtime": 121.2599, "eval_samples_per_second": 20.592, "eval_steps_per_second": 1.295, "step": 9500 }, { "epoch": 8.88, "learning_rate": 1.1707129833989062e-06, "loss": 0.2923, "step": 9750 }, { "epoch": 8.88, "eval_loss": 0.554023802280426, "eval_runtime": 121.2441, "eval_samples_per_second": 20.595, "eval_steps_per_second": 1.295, "step": 9750 }, { "epoch": 9.11, "learning_rate": 9.308127818827369e-07, "loss": 0.2776, "step": 10000 }, { "epoch": 9.11, "eval_loss": 0.5579211711883545, "eval_runtime": 121.2499, "eval_samples_per_second": 20.594, "eval_steps_per_second": 1.295, "step": 10000 }, { "epoch": 9.34, "learning_rate": 6.909125803665675e-07, "loss": 0.2913, "step": 10250 }, { "epoch": 9.34, "eval_loss": 0.5575366020202637, "eval_runtime": 121.2491, "eval_samples_per_second": 20.594, "eval_steps_per_second": 1.295, "step": 10250 }, { "epoch": 9.57, "learning_rate": 4.5101237885039827e-07, "loss": 0.2856, "step": 10500 }, { "epoch": 9.57, "eval_loss": 0.5578790307044983, "eval_runtime": 121.2422, "eval_samples_per_second": 20.595, "eval_steps_per_second": 1.295, "step": 10500 }, { "epoch": 9.79, "learning_rate": 2.1111217733422898e-07, "loss": 0.288, "step": 10750 }, { "epoch": 9.79, "eval_loss": 0.5565530061721802, "eval_runtime": 121.2668, "eval_samples_per_second": 20.591, "eval_steps_per_second": 1.295, "step": 10750 } ], "max_steps": 10970, "num_train_epochs": 10, "total_flos": 2.0607780963875328e+17, "trial_name": null, "trial_params": null }