|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.974948758824869, |
|
"global_step": 10950, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.553734061930783e-05, |
|
"loss": 4.5455, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 0.06012005731463432, |
|
"eval_runtime": 125.7302, |
|
"eval_samples_per_second": 19.86, |
|
"eval_steps_per_second": 1.249, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 9.107468123861566e-05, |
|
"loss": 0.0471, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 0.05808735638856888, |
|
"eval_runtime": 125.3179, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.807120237981e-05, |
|
"loss": 0.0443, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_loss": 0.05481741577386856, |
|
"eval_runtime": 125.3239, |
|
"eval_samples_per_second": 19.924, |
|
"eval_steps_per_second": 1.253, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.567220036464831e-05, |
|
"loss": 0.0428, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 0.05039665102958679, |
|
"eval_runtime": 125.3169, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.327319834948663e-05, |
|
"loss": 0.0329, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 0.05058171600103378, |
|
"eval_runtime": 125.3095, |
|
"eval_samples_per_second": 19.927, |
|
"eval_steps_per_second": 1.253, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 9.087419633432492e-05, |
|
"loss": 0.0299, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"eval_loss": 0.04842585325241089, |
|
"eval_runtime": 125.3053, |
|
"eval_samples_per_second": 19.927, |
|
"eval_steps_per_second": 1.253, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 8.847519431916324e-05, |
|
"loss": 0.0295, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.049905285239219666, |
|
"eval_runtime": 125.3158, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 8.607619230400153e-05, |
|
"loss": 0.0304, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 0.047520652413368225, |
|
"eval_runtime": 125.3111, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 8.367719028883985e-05, |
|
"loss": 0.0277, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 0.04981054365634918, |
|
"eval_runtime": 125.3207, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 8.127818827367816e-05, |
|
"loss": 0.0186, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 0.04637761414051056, |
|
"eval_runtime": 125.3226, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 7.887918625851645e-05, |
|
"loss": 0.0199, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 0.04944201186299324, |
|
"eval_runtime": 125.3132, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 7.648018424335477e-05, |
|
"loss": 0.0205, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 0.045388150960206985, |
|
"eval_runtime": 125.3269, |
|
"eval_samples_per_second": 19.924, |
|
"eval_steps_per_second": 1.253, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.408118222819308e-05, |
|
"loss": 0.0202, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 0.04644118994474411, |
|
"eval_runtime": 125.317, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 7.168218021303138e-05, |
|
"loss": 0.015, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_loss": 0.0492834635078907, |
|
"eval_runtime": 125.3122, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"learning_rate": 6.928317819786969e-05, |
|
"loss": 0.0136, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 0.050950221717357635, |
|
"eval_runtime": 125.3101, |
|
"eval_samples_per_second": 19.927, |
|
"eval_steps_per_second": 1.253, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 6.6884176182708e-05, |
|
"loss": 0.0139, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 0.0521300733089447, |
|
"eval_runtime": 125.3091, |
|
"eval_samples_per_second": 19.927, |
|
"eval_steps_per_second": 1.253, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.44851741675463e-05, |
|
"loss": 0.0149, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 0.049005962908267975, |
|
"eval_runtime": 125.2924, |
|
"eval_samples_per_second": 19.929, |
|
"eval_steps_per_second": 1.253, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 6.208617215238462e-05, |
|
"loss": 0.012, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_loss": 0.05201614275574684, |
|
"eval_runtime": 125.3236, |
|
"eval_samples_per_second": 19.924, |
|
"eval_steps_per_second": 1.253, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 5.968717013722291e-05, |
|
"loss": 0.0094, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 0.052882954478263855, |
|
"eval_runtime": 125.3182, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 5.7288168122061226e-05, |
|
"loss": 0.0104, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 0.05443257838487625, |
|
"eval_runtime": 125.308, |
|
"eval_samples_per_second": 19.927, |
|
"eval_steps_per_second": 1.253, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 5.488916610689954e-05, |
|
"loss": 0.0095, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 0.0512896366417408, |
|
"eval_runtime": 125.3186, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 5.249016409173784e-05, |
|
"loss": 0.0099, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"eval_loss": 0.05250149220228195, |
|
"eval_runtime": 125.3202, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 5.009116207657615e-05, |
|
"loss": 0.0067, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"eval_loss": 0.05294517055153847, |
|
"eval_runtime": 125.3036, |
|
"eval_samples_per_second": 19.928, |
|
"eval_steps_per_second": 1.253, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 4.769216006141446e-05, |
|
"loss": 0.0064, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"eval_loss": 0.05718787759542465, |
|
"eval_runtime": 125.3174, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 4.5293158046252756e-05, |
|
"loss": 0.0062, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 0.0588238462805748, |
|
"eval_runtime": 125.32, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 4.289415603109107e-05, |
|
"loss": 0.0066, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"eval_loss": 0.055590804666280746, |
|
"eval_runtime": 125.32, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 4.0495154015929375e-05, |
|
"loss": 0.0049, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"eval_loss": 0.060405001044273376, |
|
"eval_runtime": 125.3175, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 3.809615200076768e-05, |
|
"loss": 0.0044, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_loss": 0.0592646524310112, |
|
"eval_runtime": 125.3136, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.569714998560599e-05, |
|
"loss": 0.0042, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 0.059081513434648514, |
|
"eval_runtime": 125.2994, |
|
"eval_samples_per_second": 19.928, |
|
"eval_steps_per_second": 1.253, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"learning_rate": 3.32981479704443e-05, |
|
"loss": 0.0048, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 6.83, |
|
"eval_loss": 0.06123210862278938, |
|
"eval_runtime": 125.3056, |
|
"eval_samples_per_second": 19.927, |
|
"eval_steps_per_second": 1.253, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 3.0899145955282606e-05, |
|
"loss": 0.004, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"eval_loss": 0.060906291007995605, |
|
"eval_runtime": 125.3113, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"learning_rate": 2.850014394012091e-05, |
|
"loss": 0.003, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.29, |
|
"eval_loss": 0.06742047518491745, |
|
"eval_runtime": 125.3117, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 2.6101141924959215e-05, |
|
"loss": 0.003, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"eval_loss": 0.0640687569975853, |
|
"eval_runtime": 125.31, |
|
"eval_samples_per_second": 19.927, |
|
"eval_steps_per_second": 1.253, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 2.3702139909797524e-05, |
|
"loss": 0.0027, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"eval_loss": 0.06774434447288513, |
|
"eval_runtime": 125.316, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 2.1303137894635834e-05, |
|
"loss": 0.0028, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"eval_loss": 0.06737840920686722, |
|
"eval_runtime": 125.323, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 1.890413587947414e-05, |
|
"loss": 0.0021, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"eval_loss": 0.06941425800323486, |
|
"eval_runtime": 125.3147, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 1.6505133864312446e-05, |
|
"loss": 0.0018, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"eval_loss": 0.07149343937635422, |
|
"eval_runtime": 125.3155, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"learning_rate": 1.4106131849150753e-05, |
|
"loss": 0.0021, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.65, |
|
"eval_loss": 0.06807977706193924, |
|
"eval_runtime": 125.3164, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 1.1707129833989061e-05, |
|
"loss": 0.0017, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"eval_loss": 0.07044515013694763, |
|
"eval_runtime": 125.3307, |
|
"eval_samples_per_second": 19.923, |
|
"eval_steps_per_second": 1.253, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 9.308127818827369e-06, |
|
"loss": 0.0014, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"eval_loss": 0.07252407819032669, |
|
"eval_runtime": 125.329, |
|
"eval_samples_per_second": 19.924, |
|
"eval_steps_per_second": 1.253, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 6.909125803665675e-06, |
|
"loss": 0.0012, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"eval_loss": 0.07298342883586884, |
|
"eval_runtime": 125.3163, |
|
"eval_samples_per_second": 19.926, |
|
"eval_steps_per_second": 1.253, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 4.510123788503983e-06, |
|
"loss": 0.0012, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 0.07320970296859741, |
|
"eval_runtime": 125.318, |
|
"eval_samples_per_second": 19.925, |
|
"eval_steps_per_second": 1.253, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"learning_rate": 2.11112177334229e-06, |
|
"loss": 0.0013, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"eval_loss": 0.07287949323654175, |
|
"eval_runtime": 125.3257, |
|
"eval_samples_per_second": 19.924, |
|
"eval_steps_per_second": 1.253, |
|
"step": 10750 |
|
} |
|
], |
|
"max_steps": 10970, |
|
"num_train_epochs": 10, |
|
"total_flos": 2.0454829414889472e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|