|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 135.59322033898306, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.0196, |
|
"loss": 4.115, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 0.0192, |
|
"loss": 4.0422, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 8.14, |
|
"learning_rate": 0.0188, |
|
"loss": 3.7797, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 10.85, |
|
"learning_rate": 0.0184, |
|
"loss": 3.204, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 0.018000000000000002, |
|
"loss": 2.7285, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 16.27, |
|
"learning_rate": 0.0176, |
|
"loss": 2.1524, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.98, |
|
"learning_rate": 0.0172, |
|
"loss": 1.6875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 21.69, |
|
"learning_rate": 0.0168, |
|
"loss": 1.2613, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.41, |
|
"learning_rate": 0.016399999999999998, |
|
"loss": 0.9464, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 0.016, |
|
"loss": 0.734, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 29.83, |
|
"learning_rate": 0.015600000000000001, |
|
"loss": 0.5502, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 32.54, |
|
"learning_rate": 0.0152, |
|
"loss": 0.4353, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 35.25, |
|
"learning_rate": 0.0148, |
|
"loss": 0.3286, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 37.97, |
|
"learning_rate": 0.0144, |
|
"loss": 0.2814, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 40.68, |
|
"learning_rate": 0.013999999999999999, |
|
"loss": 0.2337, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 43.39, |
|
"learning_rate": 0.013600000000000001, |
|
"loss": 0.1949, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 46.1, |
|
"learning_rate": 0.013200000000000002, |
|
"loss": 0.1482, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 48.81, |
|
"learning_rate": 0.0128, |
|
"loss": 0.136, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 51.53, |
|
"learning_rate": 0.0124, |
|
"loss": 0.1175, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 54.24, |
|
"learning_rate": 0.012, |
|
"loss": 0.0995, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 56.95, |
|
"learning_rate": 0.0116, |
|
"loss": 0.0841, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 59.66, |
|
"learning_rate": 0.011200000000000002, |
|
"loss": 0.07, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 62.37, |
|
"learning_rate": 0.0108, |
|
"loss": 0.0672, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 65.08, |
|
"learning_rate": 0.010400000000000001, |
|
"loss": 0.058, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 67.8, |
|
"learning_rate": 0.01, |
|
"loss": 0.0546, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 70.51, |
|
"learning_rate": 0.0096, |
|
"loss": 0.0503, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 73.22, |
|
"learning_rate": 0.0092, |
|
"loss": 0.0471, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 75.93, |
|
"learning_rate": 0.0088, |
|
"loss": 0.0422, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 78.64, |
|
"learning_rate": 0.0084, |
|
"loss": 0.0352, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 81.36, |
|
"learning_rate": 0.008, |
|
"loss": 0.0374, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 84.07, |
|
"learning_rate": 0.0076, |
|
"loss": 0.033, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 86.78, |
|
"learning_rate": 0.0072, |
|
"loss": 0.0332, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 89.49, |
|
"learning_rate": 0.0068000000000000005, |
|
"loss": 0.0292, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 92.2, |
|
"learning_rate": 0.0064, |
|
"loss": 0.0298, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 94.92, |
|
"learning_rate": 0.006, |
|
"loss": 0.0258, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 97.63, |
|
"learning_rate": 0.005600000000000001, |
|
"loss": 0.0263, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 100.34, |
|
"learning_rate": 0.005200000000000001, |
|
"loss": 0.0252, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 103.05, |
|
"learning_rate": 0.0048, |
|
"loss": 0.0249, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 105.76, |
|
"learning_rate": 0.0044, |
|
"loss": 0.0225, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 108.47, |
|
"learning_rate": 0.004, |
|
"loss": 0.0219, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 111.19, |
|
"learning_rate": 0.0036, |
|
"loss": 0.0224, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 113.9, |
|
"learning_rate": 0.0032, |
|
"loss": 0.0238, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 116.61, |
|
"learning_rate": 0.0028000000000000004, |
|
"loss": 0.0196, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 119.32, |
|
"learning_rate": 0.0024, |
|
"loss": 0.0207, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 122.03, |
|
"learning_rate": 0.002, |
|
"loss": 0.0208, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 124.75, |
|
"learning_rate": 0.0016, |
|
"loss": 0.0204, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 127.46, |
|
"learning_rate": 0.0012, |
|
"loss": 0.0207, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 130.17, |
|
"learning_rate": 0.0008, |
|
"loss": 0.0206, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 132.88, |
|
"learning_rate": 0.0004, |
|
"loss": 0.0203, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 135.59, |
|
"learning_rate": 0.0, |
|
"loss": 0.021, |
|
"step": 500 |
|
} |
|
], |
|
"max_steps": 500, |
|
"num_train_epochs": 167, |
|
"total_flos": 6.9331442466816e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|