|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 22.0, |
|
"global_step": 79002, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-09, |
|
"loss": 10.5192, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.5e-06, |
|
"loss": 9.4826, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 5e-06, |
|
"loss": 7.9029, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 7.5e-06, |
|
"loss": 6.8971, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1e-05, |
|
"loss": 6.6133, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.25e-05, |
|
"loss": 6.4578, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.5e-05, |
|
"loss": 6.3502, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.75e-05, |
|
"loss": 6.2558, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2e-05, |
|
"loss": 6.1784, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.25e-05, |
|
"loss": 6.1171, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.5e-05, |
|
"loss": 6.0613, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 6.0146, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3e-05, |
|
"loss": 5.9732, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 5.9385, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.5e-05, |
|
"loss": 5.9006, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 5.8725, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4e-05, |
|
"loss": 5.8389, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.2495e-05, |
|
"loss": 5.8149, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.4995000000000005e-05, |
|
"loss": 5.7907, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 4.7495e-05, |
|
"loss": 5.771, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.9995000000000005e-05, |
|
"loss": 5.7541, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 4.998254346606843e-05, |
|
"loss": 5.7367, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 4.9965016825574876e-05, |
|
"loss": 5.7192, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 4.994749018508132e-05, |
|
"loss": 5.7026, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.34, |
|
"learning_rate": 4.9929963544587775e-05, |
|
"loss": 5.6922, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 4.991247195737521e-05, |
|
"loss": 5.6796, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.9894945316881665e-05, |
|
"loss": 5.67, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 4.987741867638811e-05, |
|
"loss": 5.6599, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.9859892035894564e-05, |
|
"loss": 5.6487, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 4.9842400448681994e-05, |
|
"loss": 5.6437, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 4.982487380818845e-05, |
|
"loss": 5.6326, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 4.98073471676949e-05, |
|
"loss": 5.6252, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 4.978982052720135e-05, |
|
"loss": 5.6177, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 4.9772328939988783e-05, |
|
"loss": 5.6131, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 4.975483735277622e-05, |
|
"loss": 5.6043, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 4.9737310712282666e-05, |
|
"loss": 5.5996, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.971978407178912e-05, |
|
"loss": 5.5909, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 4.970225743129557e-05, |
|
"loss": 5.5874, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 4.968473079080202e-05, |
|
"loss": 5.5822, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 4.966723920358946e-05, |
|
"loss": 5.5773, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 5.57, |
|
"learning_rate": 4.964971256309591e-05, |
|
"loss": 5.5722, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 4.963218592260236e-05, |
|
"loss": 5.5707, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 4.961465928210881e-05, |
|
"loss": 5.5626, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 4.9597132641615254e-05, |
|
"loss": 5.5599, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 4.957964105440269e-05, |
|
"loss": 5.5536, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 4.9562114413909144e-05, |
|
"loss": 5.5496, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 4.95445877734156e-05, |
|
"loss": 5.5485, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 4.952706113292204e-05, |
|
"loss": 5.5436, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 4.950956954570948e-05, |
|
"loss": 5.5428, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 4.9492042905215926e-05, |
|
"loss": 5.5355, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 4.947451626472238e-05, |
|
"loss": 5.5348, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 4.9456989624228826e-05, |
|
"loss": 5.5275, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 4.943949803701627e-05, |
|
"loss": 5.5286, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 4.9421971396522716e-05, |
|
"loss": 5.5239, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 4.940444475602917e-05, |
|
"loss": 5.5223, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 4.9386953168816605e-05, |
|
"loss": 5.52, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 4.936942652832305e-05, |
|
"loss": 5.5177, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 4.9351899887829505e-05, |
|
"loss": 5.5147, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 4.933437324733595e-05, |
|
"loss": 5.5059, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 4.9316846606842404e-05, |
|
"loss": 5.5046, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 4.929931996634885e-05, |
|
"loss": 5.5033, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 4.92817933258553e-05, |
|
"loss": 5.5019, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 4.9264266685361756e-05, |
|
"loss": 5.4969, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 4.92467400448682e-05, |
|
"loss": 5.4955, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 4.922924845765564e-05, |
|
"loss": 5.4978, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 4.9211721817162086e-05, |
|
"loss": 5.4951, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 4.919419517666854e-05, |
|
"loss": 5.4846, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"learning_rate": 4.9176668536174985e-05, |
|
"loss": 5.4871, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 4.915917694896243e-05, |
|
"loss": 5.4838, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"learning_rate": 4.9141650308468875e-05, |
|
"loss": 5.4853, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 4.912412366797533e-05, |
|
"loss": 5.4807, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 4.9106597027481774e-05, |
|
"loss": 5.4789, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 4.908910544026921e-05, |
|
"loss": 5.4754, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 4.9071578799775664e-05, |
|
"loss": 5.4706, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 10.3, |
|
"learning_rate": 4.905405215928211e-05, |
|
"loss": 5.332, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 10.44, |
|
"learning_rate": 4.903652551878856e-05, |
|
"loss": 5.1202, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 10.58, |
|
"learning_rate": 4.901899887829501e-05, |
|
"loss": 4.9543, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 4.900147223780146e-05, |
|
"loss": 4.7848, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 4.898394559730791e-05, |
|
"loss": 4.6248, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 4.8966418956814355e-05, |
|
"loss": 4.4711, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 11.14, |
|
"learning_rate": 4.894889231632081e-05, |
|
"loss": 4.3324, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 11.28, |
|
"learning_rate": 4.8931400729108245e-05, |
|
"loss": 4.1831, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 11.42, |
|
"learning_rate": 4.89138740886147e-05, |
|
"loss": 3.845, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 4.889634744812115e-05, |
|
"loss": 3.3852, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 4.88788208076276e-05, |
|
"loss": 2.749, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 11.84, |
|
"learning_rate": 4.886136427369602e-05, |
|
"loss": 2.4339, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 11.97, |
|
"learning_rate": 4.884383763320247e-05, |
|
"loss": 2.2505, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 4.882631099270892e-05, |
|
"loss": 2.1255, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 12.25, |
|
"learning_rate": 4.880878435221537e-05, |
|
"loss": 2.037, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 4.879125771172182e-05, |
|
"loss": 1.9662, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 12.53, |
|
"learning_rate": 4.877376612450926e-05, |
|
"loss": 1.909, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 12.67, |
|
"learning_rate": 4.8756239484015706e-05, |
|
"loss": 1.8613, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 4.873871284352215e-05, |
|
"loss": 1.8191, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 12.95, |
|
"learning_rate": 4.8721186203028606e-05, |
|
"loss": 1.7808, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 13.09, |
|
"learning_rate": 4.870369461581604e-05, |
|
"loss": 1.745, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 13.23, |
|
"learning_rate": 4.8686167975322495e-05, |
|
"loss": 1.7123, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 13.37, |
|
"learning_rate": 4.866864133482894e-05, |
|
"loss": 1.6834, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 4.8651114694335395e-05, |
|
"loss": 1.6541, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 13.65, |
|
"learning_rate": 4.8633623107122825e-05, |
|
"loss": 1.6308, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 13.78, |
|
"learning_rate": 4.861609646662928e-05, |
|
"loss": 1.6093, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 4.859856982613573e-05, |
|
"loss": 1.5854, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"learning_rate": 4.858104318564218e-05, |
|
"loss": 1.5645, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 4.856351654514863e-05, |
|
"loss": 1.5449, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 14.34, |
|
"learning_rate": 4.854602495793607e-05, |
|
"loss": 1.5306, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 4.852849831744252e-05, |
|
"loss": 1.5134, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 4.8510971676948966e-05, |
|
"loss": 1.4979, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"learning_rate": 4.849344503645541e-05, |
|
"loss": 1.4835, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 14.9, |
|
"learning_rate": 4.847595344924285e-05, |
|
"loss": 1.468, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 4.84584268087493e-05, |
|
"loss": 1.4542, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 15.18, |
|
"learning_rate": 4.844090016825575e-05, |
|
"loss": 1.4416, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 4.842340858104319e-05, |
|
"loss": 1.4277, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"learning_rate": 4.840588194054964e-05, |
|
"loss": 1.4185, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 15.59, |
|
"learning_rate": 4.8388355300056085e-05, |
|
"loss": 1.405, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 15.73, |
|
"learning_rate": 4.837082865956254e-05, |
|
"loss": 1.3956, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 15.87, |
|
"learning_rate": 4.8353302019068984e-05, |
|
"loss": 1.384, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 16.01, |
|
"learning_rate": 4.833581043185643e-05, |
|
"loss": 1.3745, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 16.15, |
|
"learning_rate": 4.8318283791362874e-05, |
|
"loss": 1.3639, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 16.29, |
|
"learning_rate": 4.830075715086933e-05, |
|
"loss": 1.3543, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 16.43, |
|
"learning_rate": 4.828323051037577e-05, |
|
"loss": 1.347, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 16.57, |
|
"learning_rate": 4.8265703869882226e-05, |
|
"loss": 1.3377, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 16.71, |
|
"learning_rate": 4.8248212282669656e-05, |
|
"loss": 1.3291, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 4.823068564217611e-05, |
|
"loss": 1.3214, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"learning_rate": 4.821315900168256e-05, |
|
"loss": 1.3146, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 17.13, |
|
"learning_rate": 4.819563236118901e-05, |
|
"loss": 1.3041, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"learning_rate": 4.817810572069546e-05, |
|
"loss": 1.2975, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 17.4, |
|
"learning_rate": 4.8160579080201915e-05, |
|
"loss": 1.2908, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 17.54, |
|
"learning_rate": 4.814305243970836e-05, |
|
"loss": 1.2835, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 17.68, |
|
"learning_rate": 4.812556085249579e-05, |
|
"loss": 1.2766, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 17.82, |
|
"learning_rate": 4.8108034212002244e-05, |
|
"loss": 1.2715, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 17.96, |
|
"learning_rate": 4.80905075715087e-05, |
|
"loss": 1.2655, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 18.1, |
|
"learning_rate": 4.807298093101514e-05, |
|
"loss": 1.2565, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 18.24, |
|
"learning_rate": 4.805548934380259e-05, |
|
"loss": 1.2506, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 18.38, |
|
"learning_rate": 4.803796270330903e-05, |
|
"loss": 1.2443, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 18.52, |
|
"learning_rate": 4.802043606281548e-05, |
|
"loss": 1.2386, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 18.66, |
|
"learning_rate": 4.800290942232193e-05, |
|
"loss": 1.2338, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 18.8, |
|
"learning_rate": 4.798541783510937e-05, |
|
"loss": 1.2287, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 18.94, |
|
"learning_rate": 4.7967926247896806e-05, |
|
"loss": 1.2233, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 19.08, |
|
"learning_rate": 4.795039960740326e-05, |
|
"loss": 1.2185, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 19.21, |
|
"learning_rate": 4.7932872966909705e-05, |
|
"loss": 1.2133, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 19.35, |
|
"learning_rate": 4.791534632641616e-05, |
|
"loss": 1.2088, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 4.7897819685922604e-05, |
|
"loss": 1.2036, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 19.63, |
|
"learning_rate": 4.788029304542905e-05, |
|
"loss": 1.1979, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 19.77, |
|
"learning_rate": 4.7862766404935504e-05, |
|
"loss": 1.1919, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 19.91, |
|
"learning_rate": 4.784527481772294e-05, |
|
"loss": 1.1893, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 20.05, |
|
"learning_rate": 4.7827748177229394e-05, |
|
"loss": 1.1863, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 20.19, |
|
"learning_rate": 4.781022153673584e-05, |
|
"loss": 1.1801, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 20.33, |
|
"learning_rate": 4.779269489624229e-05, |
|
"loss": 1.1777, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 20.47, |
|
"learning_rate": 4.777516825574874e-05, |
|
"loss": 1.1714, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 20.61, |
|
"learning_rate": 4.7757676668536176e-05, |
|
"loss": 1.1684, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 20.75, |
|
"learning_rate": 4.774015002804263e-05, |
|
"loss": 1.165, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 20.89, |
|
"learning_rate": 4.7722623387549075e-05, |
|
"loss": 1.1602, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 21.02, |
|
"learning_rate": 4.770509674705553e-05, |
|
"loss": 1.1567, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 21.16, |
|
"learning_rate": 4.7687570106561975e-05, |
|
"loss": 1.1524, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 21.3, |
|
"learning_rate": 4.767007851934942e-05, |
|
"loss": 1.1502, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 21.44, |
|
"learning_rate": 4.7652551878855864e-05, |
|
"loss": 1.1442, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 21.58, |
|
"learning_rate": 4.763502523836231e-05, |
|
"loss": 1.1422, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 21.72, |
|
"learning_rate": 4.7617498597868764e-05, |
|
"loss": 1.1384, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 21.86, |
|
"learning_rate": 4.759997195737521e-05, |
|
"loss": 1.1361, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 4.758244531688166e-05, |
|
"loss": 1.1318, |
|
"step": 79000 |
|
} |
|
], |
|
"max_steps": 1436400, |
|
"num_train_epochs": 400, |
|
"total_flos": 2.129238171230562e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|