|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 0.34569480334286873,
|
|
"global_step": 40000,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.0,
|
|
"learning_rate": 4.9956788149582146e-05,
|
|
"loss": 2.3181,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 4.991357629916428e-05,
|
|
"loss": 2.2625,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 0.01,
|
|
"learning_rate": 4.9870364448746425e-05,
|
|
"loss": 2.1859,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 4.982715259832857e-05,
|
|
"loss": 2.1855,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 0.02,
|
|
"learning_rate": 4.9783940747910704e-05,
|
|
"loss": 2.1539,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 4.974072889749285e-05,
|
|
"loss": 2.1454,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 4.969751704707499e-05,
|
|
"loss": 2.1311,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 0.03,
|
|
"learning_rate": 4.9654305196657134e-05,
|
|
"loss": 2.1232,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 4.961109334623927e-05,
|
|
"loss": 2.0882,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 0.04,
|
|
"learning_rate": 4.956788149582141e-05,
|
|
"loss": 2.1255,
|
|
"step": 5000
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 4.9524669645403556e-05,
|
|
"loss": 2.1109,
|
|
"step": 5500
|
|
},
|
|
{
|
|
"epoch": 0.05,
|
|
"learning_rate": 4.94814577949857e-05,
|
|
"loss": 2.0932,
|
|
"step": 6000
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 4.943824594456784e-05,
|
|
"loss": 2.0806,
|
|
"step": 6500
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 4.9395034094149986e-05,
|
|
"loss": 2.108,
|
|
"step": 7000
|
|
},
|
|
{
|
|
"epoch": 0.06,
|
|
"learning_rate": 4.935182224373212e-05,
|
|
"loss": 2.1137,
|
|
"step": 7500
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 4.9308610393314265e-05,
|
|
"loss": 2.0663,
|
|
"step": 8000
|
|
},
|
|
{
|
|
"epoch": 0.07,
|
|
"learning_rate": 4.926539854289641e-05,
|
|
"loss": 2.0729,
|
|
"step": 8500
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 4.922218669247855e-05,
|
|
"loss": 2.0571,
|
|
"step": 9000
|
|
},
|
|
{
|
|
"epoch": 0.08,
|
|
"learning_rate": 4.917897484206069e-05,
|
|
"loss": 2.1231,
|
|
"step": 9500
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 4.913576299164283e-05,
|
|
"loss": 2.0631,
|
|
"step": 10000
|
|
},
|
|
{
|
|
"epoch": 0.09,
|
|
"learning_rate": 4.9092551141224974e-05,
|
|
"loss": 2.0889,
|
|
"step": 10500
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 4.904933929080711e-05,
|
|
"loss": 2.0952,
|
|
"step": 11000
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 4.9006127440389254e-05,
|
|
"loss": 2.0885,
|
|
"step": 11500
|
|
},
|
|
{
|
|
"epoch": 0.1,
|
|
"learning_rate": 4.89629155899714e-05,
|
|
"loss": 2.0642,
|
|
"step": 12000
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 4.891970373955353e-05,
|
|
"loss": 2.0385,
|
|
"step": 12500
|
|
},
|
|
{
|
|
"epoch": 0.11,
|
|
"learning_rate": 4.8876491889135676e-05,
|
|
"loss": 2.0664,
|
|
"step": 13000
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.883328003871782e-05,
|
|
"loss": 2.0678,
|
|
"step": 13500
|
|
},
|
|
{
|
|
"epoch": 0.12,
|
|
"learning_rate": 4.879006818829996e-05,
|
|
"loss": 2.0487,
|
|
"step": 14000
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.87468563378821e-05,
|
|
"loss": 2.0119,
|
|
"step": 14500
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.870364448746424e-05,
|
|
"loss": 2.0541,
|
|
"step": 15000
|
|
},
|
|
{
|
|
"epoch": 0.13,
|
|
"learning_rate": 4.8660432637046385e-05,
|
|
"loss": 2.0105,
|
|
"step": 15500
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.861722078662853e-05,
|
|
"loss": 2.0374,
|
|
"step": 16000
|
|
},
|
|
{
|
|
"epoch": 0.14,
|
|
"learning_rate": 4.857400893621067e-05,
|
|
"loss": 2.0205,
|
|
"step": 16500
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.8530797085792814e-05,
|
|
"loss": 2.0243,
|
|
"step": 17000
|
|
},
|
|
{
|
|
"epoch": 0.15,
|
|
"learning_rate": 4.848758523537495e-05,
|
|
"loss": 2.0473,
|
|
"step": 17500
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.8444373384957094e-05,
|
|
"loss": 2.0331,
|
|
"step": 18000
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.840116153453924e-05,
|
|
"loss": 2.0622,
|
|
"step": 18500
|
|
},
|
|
{
|
|
"epoch": 0.16,
|
|
"learning_rate": 4.835794968412137e-05,
|
|
"loss": 2.0277,
|
|
"step": 19000
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.8314737833703516e-05,
|
|
"loss": 2.0355,
|
|
"step": 19500
|
|
},
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 4.827152598328566e-05,
|
|
"loss": 2.0415,
|
|
"step": 20000
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.82283141328678e-05,
|
|
"loss": 2.0355,
|
|
"step": 20500
|
|
},
|
|
{
|
|
"epoch": 0.18,
|
|
"learning_rate": 4.818510228244994e-05,
|
|
"loss": 1.9982,
|
|
"step": 21000
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.814189043203208e-05,
|
|
"loss": 2.0237,
|
|
"step": 21500
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.8098678581614225e-05,
|
|
"loss": 2.0248,
|
|
"step": 22000
|
|
},
|
|
{
|
|
"epoch": 0.19,
|
|
"learning_rate": 4.805546673119636e-05,
|
|
"loss": 2.0038,
|
|
"step": 22500
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.8012254880778505e-05,
|
|
"loss": 2.0252,
|
|
"step": 23000
|
|
},
|
|
{
|
|
"epoch": 0.2,
|
|
"learning_rate": 4.796904303036065e-05,
|
|
"loss": 1.9888,
|
|
"step": 23500
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.7925831179942784e-05,
|
|
"loss": 2.0121,
|
|
"step": 24000
|
|
},
|
|
{
|
|
"epoch": 0.21,
|
|
"learning_rate": 4.788261932952493e-05,
|
|
"loss": 1.9794,
|
|
"step": 24500
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.783940747910707e-05,
|
|
"loss": 1.9893,
|
|
"step": 25000
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.7796195628689213e-05,
|
|
"loss": 1.9933,
|
|
"step": 25500
|
|
},
|
|
{
|
|
"epoch": 0.22,
|
|
"learning_rate": 4.7752983778271357e-05,
|
|
"loss": 1.9897,
|
|
"step": 26000
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.77097719278535e-05,
|
|
"loss": 2.0199,
|
|
"step": 26500
|
|
},
|
|
{
|
|
"epoch": 0.23,
|
|
"learning_rate": 4.766656007743564e-05,
|
|
"loss": 2.0092,
|
|
"step": 27000
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.762334822701778e-05,
|
|
"loss": 1.9775,
|
|
"step": 27500
|
|
},
|
|
{
|
|
"epoch": 0.24,
|
|
"learning_rate": 4.758013637659992e-05,
|
|
"loss": 1.9796,
|
|
"step": 28000
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.7536924526182065e-05,
|
|
"loss": 1.9855,
|
|
"step": 28500
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.74937126757642e-05,
|
|
"loss": 2.0539,
|
|
"step": 29000
|
|
},
|
|
{
|
|
"epoch": 0.25,
|
|
"learning_rate": 4.7450500825346345e-05,
|
|
"loss": 2.0053,
|
|
"step": 29500
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.740728897492849e-05,
|
|
"loss": 1.9682,
|
|
"step": 30000
|
|
},
|
|
{
|
|
"epoch": 0.26,
|
|
"learning_rate": 4.736407712451063e-05,
|
|
"loss": 2.0241,
|
|
"step": 30500
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.732086527409277e-05,
|
|
"loss": 1.9906,
|
|
"step": 31000
|
|
},
|
|
{
|
|
"epoch": 0.27,
|
|
"learning_rate": 4.727765342367491e-05,
|
|
"loss": 1.999,
|
|
"step": 31500
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.7234441573257054e-05,
|
|
"loss": 2.0129,
|
|
"step": 32000
|
|
},
|
|
{
|
|
"epoch": 0.28,
|
|
"learning_rate": 4.719122972283919e-05,
|
|
"loss": 2.0016,
|
|
"step": 32500
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.714801787242133e-05,
|
|
"loss": 1.9669,
|
|
"step": 33000
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.7104806022003476e-05,
|
|
"loss": 2.0073,
|
|
"step": 33500
|
|
},
|
|
{
|
|
"epoch": 0.29,
|
|
"learning_rate": 4.706159417158561e-05,
|
|
"loss": 1.9432,
|
|
"step": 34000
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.7018382321167756e-05,
|
|
"loss": 1.9661,
|
|
"step": 34500
|
|
},
|
|
{
|
|
"epoch": 0.3,
|
|
"learning_rate": 4.69751704707499e-05,
|
|
"loss": 1.9904,
|
|
"step": 35000
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.693195862033204e-05,
|
|
"loss": 1.9749,
|
|
"step": 35500
|
|
},
|
|
{
|
|
"epoch": 0.31,
|
|
"learning_rate": 4.6888746769914185e-05,
|
|
"loss": 1.9963,
|
|
"step": 36000
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.684553491949633e-05,
|
|
"loss": 1.9604,
|
|
"step": 36500
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.680232306907847e-05,
|
|
"loss": 2.0006,
|
|
"step": 37000
|
|
},
|
|
{
|
|
"epoch": 0.32,
|
|
"learning_rate": 4.675911121866061e-05,
|
|
"loss": 1.9637,
|
|
"step": 37500
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.671589936824275e-05,
|
|
"loss": 1.9799,
|
|
"step": 38000
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 4.6672687517824894e-05,
|
|
"loss": 1.9626,
|
|
"step": 38500
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.662947566740703e-05,
|
|
"loss": 1.9352,
|
|
"step": 39000
|
|
},
|
|
{
|
|
"epoch": 0.34,
|
|
"learning_rate": 4.6586263816989173e-05,
|
|
"loss": 1.9919,
|
|
"step": 39500
|
|
},
|
|
{
|
|
"epoch": 0.35,
|
|
"learning_rate": 4.6543051966571317e-05,
|
|
"loss": 1.9473,
|
|
"step": 40000
|
|
}
|
|
],
|
|
"max_steps": 578545,
|
|
"num_train_epochs": 5,
|
|
"total_flos": 5.842369833311232e+16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|