|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 113200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.97791519434629e-05, |
|
"loss": 0.9761, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.95583038869258e-05, |
|
"loss": 0.6349, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.93374558303887e-05, |
|
"loss": 0.5803, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 4.9116607773851593e-05, |
|
"loss": 0.5568, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 4.889575971731449e-05, |
|
"loss": 0.5413, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 4.867491166077739e-05, |
|
"loss": 0.5304, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 4.8454063604240283e-05, |
|
"loss": 0.5222, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 4.823321554770318e-05, |
|
"loss": 0.5149, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 4.8012367491166086e-05, |
|
"loss": 0.5104, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 4.779151943462898e-05, |
|
"loss": 0.5041, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 4.7570671378091875e-05, |
|
"loss": 0.5012, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 4.734982332155477e-05, |
|
"loss": 0.4968, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 4.712897526501767e-05, |
|
"loss": 0.4941, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 4.690812720848057e-05, |
|
"loss": 0.4911, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.63, |
|
"learning_rate": 4.6687279151943466e-05, |
|
"loss": 0.488, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 4.646643109540637e-05, |
|
"loss": 0.4861, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"learning_rate": 4.624558303886926e-05, |
|
"loss": 0.4828, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 4.6024734982332156e-05, |
|
"loss": 0.4821, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 4.580388692579505e-05, |
|
"loss": 0.4785, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 4.558303886925796e-05, |
|
"loss": 0.4781, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"learning_rate": 4.536219081272085e-05, |
|
"loss": 0.4754, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 4.514134275618375e-05, |
|
"loss": 0.4745, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 4.492049469964665e-05, |
|
"loss": 0.4726, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 10.6, |
|
"learning_rate": 4.469964664310954e-05, |
|
"loss": 0.471, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 4.4478798586572437e-05, |
|
"loss": 0.4704, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 11.48, |
|
"learning_rate": 4.425795053003534e-05, |
|
"loss": 0.4678, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 11.93, |
|
"learning_rate": 4.403710247349824e-05, |
|
"loss": 0.4681, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 12.37, |
|
"learning_rate": 4.381625441696113e-05, |
|
"loss": 0.4649, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 12.81, |
|
"learning_rate": 4.359540636042403e-05, |
|
"loss": 0.4654, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 13.25, |
|
"learning_rate": 4.337455830388692e-05, |
|
"loss": 0.4632, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 4.315371024734983e-05, |
|
"loss": 0.4627, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 14.13, |
|
"learning_rate": 4.2932862190812724e-05, |
|
"loss": 0.4616, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 14.58, |
|
"learning_rate": 4.271201413427562e-05, |
|
"loss": 0.4601, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 15.02, |
|
"learning_rate": 4.249116607773852e-05, |
|
"loss": 0.4607, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 15.46, |
|
"learning_rate": 4.2270318021201414e-05, |
|
"loss": 0.4572, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 15.9, |
|
"learning_rate": 4.204946996466431e-05, |
|
"loss": 0.4587, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 4.182862190812721e-05, |
|
"loss": 0.4557, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 16.78, |
|
"learning_rate": 4.160777385159011e-05, |
|
"loss": 0.4565, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 17.23, |
|
"learning_rate": 4.1386925795053005e-05, |
|
"loss": 0.4547, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 17.67, |
|
"learning_rate": 4.11660777385159e-05, |
|
"loss": 0.4541, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 18.11, |
|
"learning_rate": 4.09452296819788e-05, |
|
"loss": 0.4539, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 18.55, |
|
"learning_rate": 4.07243816254417e-05, |
|
"loss": 0.452, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 4.0503533568904596e-05, |
|
"loss": 0.4531, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 19.43, |
|
"learning_rate": 4.028268551236749e-05, |
|
"loss": 0.4496, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 19.88, |
|
"learning_rate": 4.006183745583039e-05, |
|
"loss": 0.4513, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 20.32, |
|
"learning_rate": 3.9840989399293286e-05, |
|
"loss": 0.4488, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 20.76, |
|
"learning_rate": 3.962014134275618e-05, |
|
"loss": 0.4493, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 21.2, |
|
"learning_rate": 3.939929328621909e-05, |
|
"loss": 0.4481, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 21.64, |
|
"learning_rate": 3.917844522968198e-05, |
|
"loss": 0.4475, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 22.08, |
|
"learning_rate": 3.895759717314488e-05, |
|
"loss": 0.4475, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 22.53, |
|
"learning_rate": 3.873674911660777e-05, |
|
"loss": 0.4453, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 22.97, |
|
"learning_rate": 3.851590106007067e-05, |
|
"loss": 0.447, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 23.41, |
|
"learning_rate": 3.8295053003533574e-05, |
|
"loss": 0.4435, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 23.85, |
|
"learning_rate": 3.807420494699647e-05, |
|
"loss": 0.4454, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 24.29, |
|
"learning_rate": 3.785335689045937e-05, |
|
"loss": 0.443, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 24.73, |
|
"learning_rate": 3.7632508833922264e-05, |
|
"loss": 0.4436, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 25.18, |
|
"learning_rate": 3.741166077738516e-05, |
|
"loss": 0.4427, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 25.62, |
|
"learning_rate": 3.719081272084805e-05, |
|
"loss": 0.4418, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"learning_rate": 3.696996466431096e-05, |
|
"loss": 0.4426, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 26.5, |
|
"learning_rate": 3.6749116607773855e-05, |
|
"loss": 0.4398, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 26.94, |
|
"learning_rate": 3.652826855123675e-05, |
|
"loss": 0.4417, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 27.39, |
|
"learning_rate": 3.630742049469965e-05, |
|
"loss": 0.4386, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 27.83, |
|
"learning_rate": 3.6086572438162545e-05, |
|
"loss": 0.4401, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 28.27, |
|
"learning_rate": 3.586572438162544e-05, |
|
"loss": 0.4382, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 28.71, |
|
"learning_rate": 3.564487632508834e-05, |
|
"loss": 0.4386, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 29.15, |
|
"learning_rate": 3.542402826855124e-05, |
|
"loss": 0.4381, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 29.59, |
|
"learning_rate": 3.5203180212014136e-05, |
|
"loss": 0.4369, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 30.04, |
|
"learning_rate": 3.498233215547703e-05, |
|
"loss": 0.438, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 30.48, |
|
"learning_rate": 3.476148409893993e-05, |
|
"loss": 0.435, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 30.92, |
|
"learning_rate": 3.454063604240283e-05, |
|
"loss": 0.4372, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 31.36, |
|
"learning_rate": 3.431978798586573e-05, |
|
"loss": 0.4343, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 31.8, |
|
"learning_rate": 3.409893992932862e-05, |
|
"loss": 0.4358, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 32.24, |
|
"learning_rate": 3.387809187279152e-05, |
|
"loss": 0.4341, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 32.69, |
|
"learning_rate": 3.365724381625442e-05, |
|
"loss": 0.4343, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 33.13, |
|
"learning_rate": 3.343639575971731e-05, |
|
"loss": 0.4343, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 33.57, |
|
"learning_rate": 3.321554770318021e-05, |
|
"loss": 0.4326, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 34.01, |
|
"learning_rate": 3.2994699646643114e-05, |
|
"loss": 0.4343, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 34.45, |
|
"learning_rate": 3.277385159010601e-05, |
|
"loss": 0.4309, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 34.89, |
|
"learning_rate": 3.25530035335689e-05, |
|
"loss": 0.4332, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 35.34, |
|
"learning_rate": 3.2332155477031804e-05, |
|
"loss": 0.4306, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 35.78, |
|
"learning_rate": 3.2111307420494705e-05, |
|
"loss": 0.4318, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 36.22, |
|
"learning_rate": 3.18904593639576e-05, |
|
"loss": 0.4306, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 36.66, |
|
"learning_rate": 3.1669611307420494e-05, |
|
"loss": 0.4304, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 37.1, |
|
"learning_rate": 3.1448763250883395e-05, |
|
"loss": 0.4307, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 37.54, |
|
"learning_rate": 3.122791519434629e-05, |
|
"loss": 0.4289, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 37.99, |
|
"learning_rate": 3.1007067137809184e-05, |
|
"loss": 0.431, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 38.43, |
|
"learning_rate": 3.078621908127209e-05, |
|
"loss": 0.4276, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 38.87, |
|
"learning_rate": 3.0565371024734986e-05, |
|
"loss": 0.4296, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 39.31, |
|
"learning_rate": 3.034452296819788e-05, |
|
"loss": 0.4274, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 39.75, |
|
"learning_rate": 3.0123674911660775e-05, |
|
"loss": 0.4285, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 40.19, |
|
"learning_rate": 2.990282685512368e-05, |
|
"loss": 0.4276, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 40.64, |
|
"learning_rate": 2.9681978798586574e-05, |
|
"loss": 0.427, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 41.08, |
|
"learning_rate": 2.9461130742049468e-05, |
|
"loss": 0.4278, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 41.52, |
|
"learning_rate": 2.9240282685512373e-05, |
|
"loss": 0.4256, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 41.96, |
|
"learning_rate": 2.9019434628975267e-05, |
|
"loss": 0.4277, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 42.4, |
|
"learning_rate": 2.879858657243816e-05, |
|
"loss": 0.4246, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 42.84, |
|
"learning_rate": 2.857773851590106e-05, |
|
"loss": 0.4266, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 43.29, |
|
"learning_rate": 2.835689045936396e-05, |
|
"loss": 0.4246, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 43.73, |
|
"learning_rate": 2.8136042402826858e-05, |
|
"loss": 0.4254, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 44.17, |
|
"learning_rate": 2.7915194346289753e-05, |
|
"loss": 0.4248, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 44.61, |
|
"learning_rate": 2.7694346289752654e-05, |
|
"loss": 0.424, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 45.05, |
|
"learning_rate": 2.747349823321555e-05, |
|
"loss": 0.4251, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 45.49, |
|
"learning_rate": 2.7252650176678446e-05, |
|
"loss": 0.4227, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 45.94, |
|
"learning_rate": 2.703180212014134e-05, |
|
"loss": 0.4248, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 46.38, |
|
"learning_rate": 2.6810954063604245e-05, |
|
"loss": 0.4221, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 46.82, |
|
"learning_rate": 2.659010600706714e-05, |
|
"loss": 0.4237, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 47.26, |
|
"learning_rate": 2.6369257950530034e-05, |
|
"loss": 0.4221, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 47.7, |
|
"learning_rate": 2.6148409893992938e-05, |
|
"loss": 0.4225, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 48.14, |
|
"learning_rate": 2.5927561837455833e-05, |
|
"loss": 0.4225, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 48.59, |
|
"learning_rate": 2.5706713780918727e-05, |
|
"loss": 0.4214, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 49.03, |
|
"learning_rate": 2.5485865724381625e-05, |
|
"loss": 0.4228, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 49.47, |
|
"learning_rate": 2.5265017667844526e-05, |
|
"loss": 0.42, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 49.91, |
|
"learning_rate": 2.5044169611307424e-05, |
|
"loss": 0.4222, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 50.35, |
|
"learning_rate": 2.4823321554770318e-05, |
|
"loss": 0.4197, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 50.8, |
|
"learning_rate": 2.4602473498233216e-05, |
|
"loss": 0.4211, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 51.24, |
|
"learning_rate": 2.4381625441696117e-05, |
|
"loss": 0.42, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 51.68, |
|
"learning_rate": 2.416077738515901e-05, |
|
"loss": 0.4201, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 52.12, |
|
"learning_rate": 2.393992932862191e-05, |
|
"loss": 0.4203, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 52.56, |
|
"learning_rate": 2.3719081272084807e-05, |
|
"loss": 0.419, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"learning_rate": 2.3498233215547705e-05, |
|
"loss": 0.4207, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 53.45, |
|
"learning_rate": 2.32773851590106e-05, |
|
"loss": 0.4176, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 53.89, |
|
"learning_rate": 2.30565371024735e-05, |
|
"loss": 0.4199, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 54.33, |
|
"learning_rate": 2.2835689045936398e-05, |
|
"loss": 0.4179, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 54.77, |
|
"learning_rate": 2.2614840989399292e-05, |
|
"loss": 0.4188, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 55.21, |
|
"learning_rate": 2.2393992932862194e-05, |
|
"loss": 0.4181, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 55.65, |
|
"learning_rate": 2.2173144876325088e-05, |
|
"loss": 0.4178, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 56.1, |
|
"learning_rate": 2.195229681978799e-05, |
|
"loss": 0.4184, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 56.54, |
|
"learning_rate": 2.1731448763250883e-05, |
|
"loss": 0.4169, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 56.98, |
|
"learning_rate": 2.151060070671378e-05, |
|
"loss": 0.4185, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 57.42, |
|
"learning_rate": 2.128975265017668e-05, |
|
"loss": 0.4159, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 57.86, |
|
"learning_rate": 2.1068904593639577e-05, |
|
"loss": 0.4177, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 58.3, |
|
"learning_rate": 2.0848056537102475e-05, |
|
"loss": 0.4159, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 58.75, |
|
"learning_rate": 2.0627208480565372e-05, |
|
"loss": 0.4169, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 59.19, |
|
"learning_rate": 2.040636042402827e-05, |
|
"loss": 0.4163, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 59.63, |
|
"learning_rate": 2.0185512367491165e-05, |
|
"loss": 0.4159, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 60.07, |
|
"learning_rate": 1.9964664310954066e-05, |
|
"loss": 0.4167, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 60.51, |
|
"learning_rate": 1.974381625441696e-05, |
|
"loss": 0.4148, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 60.95, |
|
"learning_rate": 1.9522968197879858e-05, |
|
"loss": 0.4165, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 61.4, |
|
"learning_rate": 1.930212014134276e-05, |
|
"loss": 0.4142, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 61.84, |
|
"learning_rate": 1.9081272084805653e-05, |
|
"loss": 0.4157, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 62.28, |
|
"learning_rate": 1.8860424028268555e-05, |
|
"loss": 0.4144, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 62.72, |
|
"learning_rate": 1.863957597173145e-05, |
|
"loss": 0.4149, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 63.16, |
|
"learning_rate": 1.8418727915194347e-05, |
|
"loss": 0.4147, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 63.6, |
|
"learning_rate": 1.8197879858657244e-05, |
|
"loss": 0.4139, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 64.05, |
|
"learning_rate": 1.7977031802120142e-05, |
|
"loss": 0.4151, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 64.49, |
|
"learning_rate": 1.775618374558304e-05, |
|
"loss": 0.413, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 64.93, |
|
"learning_rate": 1.7535335689045938e-05, |
|
"loss": 0.4147, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 65.37, |
|
"learning_rate": 1.7314487632508836e-05, |
|
"loss": 0.4127, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 65.81, |
|
"learning_rate": 1.709363957597173e-05, |
|
"loss": 0.4139, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 66.25, |
|
"learning_rate": 1.687279151943463e-05, |
|
"loss": 0.4128, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 66.7, |
|
"learning_rate": 1.6651943462897526e-05, |
|
"loss": 0.4131, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 67.14, |
|
"learning_rate": 1.6431095406360427e-05, |
|
"loss": 0.4133, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 67.58, |
|
"learning_rate": 1.6210247349823324e-05, |
|
"loss": 0.4123, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 68.02, |
|
"learning_rate": 1.598939929328622e-05, |
|
"loss": 0.4136, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 68.46, |
|
"learning_rate": 1.576855123674912e-05, |
|
"loss": 0.4114, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 68.9, |
|
"learning_rate": 1.5547703180212014e-05, |
|
"loss": 0.413, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 69.35, |
|
"learning_rate": 1.5326855123674912e-05, |
|
"loss": 0.4113, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 69.79, |
|
"learning_rate": 1.5106007067137808e-05, |
|
"loss": 0.4123, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 70.23, |
|
"learning_rate": 1.4885159010600708e-05, |
|
"loss": 0.4116, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 70.67, |
|
"learning_rate": 1.4664310954063604e-05, |
|
"loss": 0.4116, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 71.11, |
|
"learning_rate": 1.4443462897526502e-05, |
|
"loss": 0.412, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 71.55, |
|
"learning_rate": 1.4222614840989401e-05, |
|
"loss": 0.4107, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 1.4001766784452297e-05, |
|
"loss": 0.4122, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 72.44, |
|
"learning_rate": 1.3780918727915197e-05, |
|
"loss": 0.41, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 72.88, |
|
"learning_rate": 1.3560070671378091e-05, |
|
"loss": 0.4114, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 73.32, |
|
"learning_rate": 1.333922261484099e-05, |
|
"loss": 0.4102, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 73.76, |
|
"learning_rate": 1.3118374558303886e-05, |
|
"loss": 0.4108, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 74.2, |
|
"learning_rate": 1.2897526501766786e-05, |
|
"loss": 0.4104, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 74.65, |
|
"learning_rate": 1.2676678445229684e-05, |
|
"loss": 0.4101, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 75.09, |
|
"learning_rate": 1.245583038869258e-05, |
|
"loss": 0.4106, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 75.53, |
|
"learning_rate": 1.2234982332155478e-05, |
|
"loss": 0.4094, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 75.97, |
|
"learning_rate": 1.2014134275618374e-05, |
|
"loss": 0.4107, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 76.41, |
|
"learning_rate": 1.1793286219081273e-05, |
|
"loss": 0.4088, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 76.86, |
|
"learning_rate": 1.1572438162544171e-05, |
|
"loss": 0.4101, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 77.3, |
|
"learning_rate": 1.1351590106007069e-05, |
|
"loss": 0.4089, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 77.74, |
|
"learning_rate": 1.1130742049469965e-05, |
|
"loss": 0.4095, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 78.18, |
|
"learning_rate": 1.0909893992932863e-05, |
|
"loss": 0.4092, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 78.62, |
|
"learning_rate": 1.068904593639576e-05, |
|
"loss": 0.4087, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 79.06, |
|
"learning_rate": 1.0468197879858656e-05, |
|
"loss": 0.4094, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 79.51, |
|
"learning_rate": 1.0247349823321556e-05, |
|
"loss": 0.4081, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 79.95, |
|
"learning_rate": 1.0026501766784454e-05, |
|
"loss": 0.4092, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 80.39, |
|
"learning_rate": 9.805653710247351e-06, |
|
"loss": 0.4078, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 80.83, |
|
"learning_rate": 9.584805653710247e-06, |
|
"loss": 0.4087, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 81.27, |
|
"learning_rate": 9.363957597173145e-06, |
|
"loss": 0.4078, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 81.71, |
|
"learning_rate": 9.143109540636043e-06, |
|
"loss": 0.4081, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 82.16, |
|
"learning_rate": 8.922261484098939e-06, |
|
"loss": 0.4082, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 82.6, |
|
"learning_rate": 8.701413427561837e-06, |
|
"loss": 0.4075, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 83.04, |
|
"learning_rate": 8.480565371024736e-06, |
|
"loss": 0.4082, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 83.48, |
|
"learning_rate": 8.259717314487634e-06, |
|
"loss": 0.4069, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 83.92, |
|
"learning_rate": 8.03886925795053e-06, |
|
"loss": 0.4081, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 84.36, |
|
"learning_rate": 7.818021201413428e-06, |
|
"loss": 0.4068, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 84.81, |
|
"learning_rate": 7.597173144876325e-06, |
|
"loss": 0.4075, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 85.25, |
|
"learning_rate": 7.376325088339223e-06, |
|
"loss": 0.407, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 85.69, |
|
"learning_rate": 7.15547703180212e-06, |
|
"loss": 0.407, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 86.13, |
|
"learning_rate": 6.934628975265017e-06, |
|
"loss": 0.4071, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 86.57, |
|
"learning_rate": 6.713780918727916e-06, |
|
"loss": 0.4064, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 87.01, |
|
"learning_rate": 6.492932862190814e-06, |
|
"loss": 0.4073, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 87.46, |
|
"learning_rate": 6.272084805653711e-06, |
|
"loss": 0.406, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 87.9, |
|
"learning_rate": 6.051236749116608e-06, |
|
"loss": 0.4068, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 88.34, |
|
"learning_rate": 5.830388692579505e-06, |
|
"loss": 0.4059, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 88.78, |
|
"learning_rate": 5.609540636042403e-06, |
|
"loss": 0.4063, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 89.22, |
|
"learning_rate": 5.388692579505301e-06, |
|
"loss": 0.4061, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 89.66, |
|
"learning_rate": 5.167844522968198e-06, |
|
"loss": 0.4059, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 90.11, |
|
"learning_rate": 4.946996466431096e-06, |
|
"loss": 0.4061, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 90.55, |
|
"learning_rate": 4.726148409893993e-06, |
|
"loss": 0.4056, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 90.99, |
|
"learning_rate": 4.505300353356891e-06, |
|
"loss": 0.406, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 91.43, |
|
"learning_rate": 4.284452296819788e-06, |
|
"loss": 0.4051, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 91.87, |
|
"learning_rate": 4.063604240282685e-06, |
|
"loss": 0.4057, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 92.31, |
|
"learning_rate": 3.842756183745584e-06, |
|
"loss": 0.4052, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 92.76, |
|
"learning_rate": 3.621908127208481e-06, |
|
"loss": 0.4054, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 93.2, |
|
"learning_rate": 3.401060070671378e-06, |
|
"loss": 0.4051, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 93.64, |
|
"learning_rate": 3.1802120141342753e-06, |
|
"loss": 0.4049, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 94.08, |
|
"learning_rate": 2.959363957597173e-06, |
|
"loss": 0.4053, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 94.52, |
|
"learning_rate": 2.738515901060071e-06, |
|
"loss": 0.4047, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 94.96, |
|
"learning_rate": 2.517667844522968e-06, |
|
"loss": 0.405, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 95.41, |
|
"learning_rate": 2.296819787985866e-06, |
|
"loss": 0.4044, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 95.85, |
|
"learning_rate": 2.0759717314487633e-06, |
|
"loss": 0.4047, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 96.29, |
|
"learning_rate": 1.8551236749116609e-06, |
|
"loss": 0.4044, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 96.73, |
|
"learning_rate": 1.6342756183745586e-06, |
|
"loss": 0.4044, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 97.17, |
|
"learning_rate": 1.4134275618374558e-06, |
|
"loss": 0.4044, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 97.61, |
|
"learning_rate": 1.1925795053003533e-06, |
|
"loss": 0.4042, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 98.06, |
|
"learning_rate": 9.71731448763251e-07, |
|
"loss": 0.4044, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 98.5, |
|
"learning_rate": 7.508833922261484e-07, |
|
"loss": 0.404, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 98.94, |
|
"learning_rate": 5.30035335689046e-07, |
|
"loss": 0.4041, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 99.38, |
|
"learning_rate": 3.091872791519435e-07, |
|
"loss": 0.4039, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 99.82, |
|
"learning_rate": 8.833922261484099e-08, |
|
"loss": 0.4039, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 113200, |
|
"total_flos": 8.087110710389113e+18, |
|
"train_loss": 0.43426066813115094, |
|
"train_runtime": 194605.8141, |
|
"train_samples_per_second": 814.294, |
|
"train_steps_per_second": 0.582 |
|
} |
|
], |
|
"max_steps": 113200, |
|
"num_train_epochs": 100, |
|
"total_flos": 8.087110710389113e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|