|
{ |
|
"best_metric": 1.1375267505645752, |
|
"best_model_checkpoint": "./outputs/checkpoint-4000", |
|
"epoch": 2.9143897996357016, |
|
"eval_steps": 100, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.2671, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.1813676357269287, |
|
"eval_runtime": 549.6637, |
|
"eval_samples_per_second": 11.414, |
|
"eval_steps_per_second": 1.428, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1547, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.131509304046631, |
|
"eval_runtime": 549.883, |
|
"eval_samples_per_second": 11.41, |
|
"eval_steps_per_second": 1.428, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1088, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.0894575119018555, |
|
"eval_runtime": 550.3941, |
|
"eval_samples_per_second": 11.399, |
|
"eval_steps_per_second": 1.426, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0681, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 2.0435657501220703, |
|
"eval_runtime": 550.6916, |
|
"eval_samples_per_second": 11.393, |
|
"eval_steps_per_second": 1.425, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0241, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 2.0031208992004395, |
|
"eval_runtime": 550.5855, |
|
"eval_samples_per_second": 11.395, |
|
"eval_steps_per_second": 1.426, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9755, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.9635978937149048, |
|
"eval_runtime": 552.4575, |
|
"eval_samples_per_second": 11.357, |
|
"eval_steps_per_second": 1.421, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9447, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.9280308485031128, |
|
"eval_runtime": 552.7859, |
|
"eval_samples_per_second": 11.35, |
|
"eval_steps_per_second": 1.42, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9024, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.8917897939682007, |
|
"eval_runtime": 553.0559, |
|
"eval_samples_per_second": 11.344, |
|
"eval_steps_per_second": 1.419, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8626, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.8584532737731934, |
|
"eval_runtime": 554.4522, |
|
"eval_samples_per_second": 11.316, |
|
"eval_steps_per_second": 1.416, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8397, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.8282623291015625, |
|
"eval_runtime": 554.0635, |
|
"eval_samples_per_second": 11.324, |
|
"eval_steps_per_second": 1.417, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8218, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.7944462299346924, |
|
"eval_runtime": 554.2451, |
|
"eval_samples_per_second": 11.32, |
|
"eval_steps_per_second": 1.416, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7698, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.7612203359603882, |
|
"eval_runtime": 554.1779, |
|
"eval_samples_per_second": 11.321, |
|
"eval_steps_per_second": 1.417, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7685, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.730454921722412, |
|
"eval_runtime": 554.4409, |
|
"eval_samples_per_second": 11.316, |
|
"eval_steps_per_second": 1.416, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6896, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.691584587097168, |
|
"eval_runtime": 554.1975, |
|
"eval_samples_per_second": 11.321, |
|
"eval_steps_per_second": 1.416, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6213, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.6627237796783447, |
|
"eval_runtime": 554.2183, |
|
"eval_samples_per_second": 11.32, |
|
"eval_steps_per_second": 1.416, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6016, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.6322499513626099, |
|
"eval_runtime": 554.3251, |
|
"eval_samples_per_second": 11.318, |
|
"eval_steps_per_second": 1.416, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5939, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.6027921438217163, |
|
"eval_runtime": 555.8406, |
|
"eval_samples_per_second": 11.287, |
|
"eval_steps_per_second": 1.412, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5416, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.5798810720443726, |
|
"eval_runtime": 555.8321, |
|
"eval_samples_per_second": 11.288, |
|
"eval_steps_per_second": 1.412, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5149, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.5527715682983398, |
|
"eval_runtime": 556.317, |
|
"eval_samples_per_second": 11.278, |
|
"eval_steps_per_second": 1.411, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5057, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.5266777276992798, |
|
"eval_runtime": 556.4442, |
|
"eval_samples_per_second": 11.275, |
|
"eval_steps_per_second": 1.411, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4851, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.5026806592941284, |
|
"eval_runtime": 556.1829, |
|
"eval_samples_per_second": 11.28, |
|
"eval_steps_per_second": 1.411, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4473, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.4737541675567627, |
|
"eval_runtime": 555.4267, |
|
"eval_samples_per_second": 11.296, |
|
"eval_steps_per_second": 1.413, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.426, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.4537303447723389, |
|
"eval_runtime": 555.9884, |
|
"eval_samples_per_second": 11.284, |
|
"eval_steps_per_second": 1.412, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4171, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.431217908859253, |
|
"eval_runtime": 557.2566, |
|
"eval_samples_per_second": 11.259, |
|
"eval_steps_per_second": 1.409, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3827, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.4048686027526855, |
|
"eval_runtime": 555.8745, |
|
"eval_samples_per_second": 11.287, |
|
"eval_steps_per_second": 1.412, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3655, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.3847733736038208, |
|
"eval_runtime": 556.7868, |
|
"eval_samples_per_second": 11.268, |
|
"eval_steps_per_second": 1.41, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3487, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.3603800535202026, |
|
"eval_runtime": 556.2959, |
|
"eval_samples_per_second": 11.278, |
|
"eval_steps_per_second": 1.411, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2769, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.3475805521011353, |
|
"eval_runtime": 556.1265, |
|
"eval_samples_per_second": 11.282, |
|
"eval_steps_per_second": 1.412, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.226, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.3265427350997925, |
|
"eval_runtime": 555.0977, |
|
"eval_samples_per_second": 11.303, |
|
"eval_steps_per_second": 1.414, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2435, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.3085519075393677, |
|
"eval_runtime": 554.9705, |
|
"eval_samples_per_second": 11.305, |
|
"eval_steps_per_second": 1.414, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2063, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.2864030599594116, |
|
"eval_runtime": 555.8466, |
|
"eval_samples_per_second": 11.287, |
|
"eval_steps_per_second": 1.412, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2078, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 1.2733838558197021, |
|
"eval_runtime": 556.0489, |
|
"eval_samples_per_second": 11.283, |
|
"eval_steps_per_second": 1.412, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1763, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.2529035806655884, |
|
"eval_runtime": 555.7551, |
|
"eval_samples_per_second": 11.289, |
|
"eval_steps_per_second": 1.412, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1648, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.2390271425247192, |
|
"eval_runtime": 555.5992, |
|
"eval_samples_per_second": 11.292, |
|
"eval_steps_per_second": 1.413, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1497, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.221109390258789, |
|
"eval_runtime": 555.5623, |
|
"eval_samples_per_second": 11.293, |
|
"eval_steps_per_second": 1.413, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1428, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.205322027206421, |
|
"eval_runtime": 555.5155, |
|
"eval_samples_per_second": 11.294, |
|
"eval_steps_per_second": 1.413, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.1142, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.1857892274856567, |
|
"eval_runtime": 556.0273, |
|
"eval_samples_per_second": 11.284, |
|
"eval_steps_per_second": 1.412, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0989, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 1.1746346950531006, |
|
"eval_runtime": 556.1788, |
|
"eval_samples_per_second": 11.281, |
|
"eval_steps_per_second": 1.411, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0885, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.1546565294265747, |
|
"eval_runtime": 556.3052, |
|
"eval_samples_per_second": 11.278, |
|
"eval_steps_per_second": 1.411, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002, |
|
"loss": 1.0863, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 1.1375267505645752, |
|
"eval_runtime": 556.442, |
|
"eval_samples_per_second": 11.275, |
|
"eval_steps_per_second": 1.411, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 1.0412060628345815e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|