|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.9405478794478936, |
|
"global_step": 1100000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.8208841872978234e-05, |
|
"loss": 0.0755, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6417683745956466e-05, |
|
"loss": 0.0721, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.06494459509849548, |
|
"eval_runtime": 754.7473, |
|
"eval_samples_per_second": 82.19, |
|
"eval_steps_per_second": 20.549, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.462652561893469e-05, |
|
"loss": 0.0704, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.2835367491912923e-05, |
|
"loss": 0.0701, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.06424280256032944, |
|
"eval_runtime": 676.9024, |
|
"eval_samples_per_second": 91.642, |
|
"eval_steps_per_second": 22.912, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.104420936489115e-05, |
|
"loss": 0.0713, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.925305123786938e-05, |
|
"loss": 0.0712, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 0.0624563954770565, |
|
"eval_runtime": 678.8069, |
|
"eval_samples_per_second": 91.385, |
|
"eval_steps_per_second": 22.847, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.746189311084761e-05, |
|
"loss": 0.0698, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.5670734983825844e-05, |
|
"loss": 0.0667, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 0.06348983943462372, |
|
"eval_runtime": 675.6836, |
|
"eval_samples_per_second": 91.808, |
|
"eval_steps_per_second": 22.953, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.3879576856804076e-05, |
|
"loss": 0.0657, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.20884187297823e-05, |
|
"loss": 0.0665, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 0.06186612322926521, |
|
"eval_runtime": 676.1915, |
|
"eval_samples_per_second": 91.739, |
|
"eval_steps_per_second": 22.936, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.0297260602760537e-05, |
|
"loss": 0.0657, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.8506102475738766e-05, |
|
"loss": 0.0653, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_loss": 0.061661478132009506, |
|
"eval_runtime": 677.4648, |
|
"eval_samples_per_second": 91.566, |
|
"eval_steps_per_second": 22.893, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.6714944348716997e-05, |
|
"loss": 0.0658, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4923786221695226e-05, |
|
"loss": 0.0656, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_loss": 0.06098250672221184, |
|
"eval_runtime": 677.3485, |
|
"eval_samples_per_second": 91.582, |
|
"eval_steps_per_second": 22.897, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.3132628094673455e-05, |
|
"loss": 0.0649, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.1341469967651687e-05, |
|
"loss": 0.0648, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 0.06077565252780914, |
|
"eval_runtime": 677.1708, |
|
"eval_samples_per_second": 91.606, |
|
"eval_steps_per_second": 22.903, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9550311840629915e-05, |
|
"loss": 0.0642, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.7759153713608147e-05, |
|
"loss": 0.0649, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 0.05941811203956604, |
|
"eval_runtime": 677.9109, |
|
"eval_samples_per_second": 91.506, |
|
"eval_steps_per_second": 22.878, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5967995586586376e-05, |
|
"loss": 0.0652, |
|
"step": 190000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.4176837459564608e-05, |
|
"loss": 0.0638, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 0.06042506918311119, |
|
"eval_runtime": 678.1922, |
|
"eval_samples_per_second": 91.468, |
|
"eval_steps_per_second": 22.868, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.2385679332542836e-05, |
|
"loss": 0.0621, |
|
"step": 210000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.0594521205521066e-05, |
|
"loss": 0.0643, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.059569716453552246, |
|
"eval_runtime": 688.4187, |
|
"eval_samples_per_second": 90.109, |
|
"eval_steps_per_second": 22.528, |
|
"step": 220000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 8.803363078499297e-06, |
|
"loss": 0.0623, |
|
"step": 230000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.012204951477527e-06, |
|
"loss": 0.0646, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_loss": 0.059120796620845795, |
|
"eval_runtime": 715.8659, |
|
"eval_samples_per_second": 86.654, |
|
"eval_steps_per_second": 21.665, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.5522104682445576e-05, |
|
"loss": 0.0634, |
|
"step": 250000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.53429888697434e-05, |
|
"loss": 0.0648, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_loss": 0.06051425263285637, |
|
"eval_runtime": 694.96, |
|
"eval_samples_per_second": 89.261, |
|
"eval_steps_per_second": 22.316, |
|
"step": 260000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.516387305704122e-05, |
|
"loss": 0.0644, |
|
"step": 270000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.4984757244339046e-05, |
|
"loss": 0.0655, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.06069951504468918, |
|
"eval_runtime": 679.0968, |
|
"eval_samples_per_second": 91.346, |
|
"eval_steps_per_second": 22.838, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.4805641431636866e-05, |
|
"loss": 0.0652, |
|
"step": 290000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.462652561893469e-05, |
|
"loss": 0.0631, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.06102127209305763, |
|
"eval_runtime": 705.9715, |
|
"eval_samples_per_second": 87.869, |
|
"eval_steps_per_second": 21.968, |
|
"step": 300000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 4.444740980623252e-05, |
|
"loss": 0.064, |
|
"step": 310000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 4.426829399353034e-05, |
|
"loss": 0.0643, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.05971848592162132, |
|
"eval_runtime": 698.4495, |
|
"eval_samples_per_second": 88.815, |
|
"eval_steps_per_second": 22.205, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.408917818082816e-05, |
|
"loss": 0.0638, |
|
"step": 330000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.391006236812599e-05, |
|
"loss": 0.064, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 0.05945688858628273, |
|
"eval_runtime": 700.9514, |
|
"eval_samples_per_second": 88.498, |
|
"eval_steps_per_second": 22.126, |
|
"step": 340000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.373094655542381e-05, |
|
"loss": 0.0613, |
|
"step": 350000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.355183074272163e-05, |
|
"loss": 0.0635, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.06002034246921539, |
|
"eval_runtime": 697.4531, |
|
"eval_samples_per_second": 88.942, |
|
"eval_steps_per_second": 22.237, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.337271493001945e-05, |
|
"loss": 0.0635, |
|
"step": 370000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.319359911731728e-05, |
|
"loss": 0.0627, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 0.0593414306640625, |
|
"eval_runtime": 720.1404, |
|
"eval_samples_per_second": 86.14, |
|
"eval_steps_per_second": 21.536, |
|
"step": 380000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.30144833046151e-05, |
|
"loss": 0.0629, |
|
"step": 390000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.2835367491912923e-05, |
|
"loss": 0.0642, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 0.059706129133701324, |
|
"eval_runtime": 706.6838, |
|
"eval_samples_per_second": 87.78, |
|
"eval_steps_per_second": 21.946, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.265625167921074e-05, |
|
"loss": 0.062, |
|
"step": 410000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 4.247713586650857e-05, |
|
"loss": 0.0616, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_loss": 0.058674536645412445, |
|
"eval_runtime": 740.0142, |
|
"eval_samples_per_second": 83.827, |
|
"eval_steps_per_second": 20.958, |
|
"step": 420000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.2298020053806394e-05, |
|
"loss": 0.0611, |
|
"step": 430000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.211890424110422e-05, |
|
"loss": 0.0618, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 0.05948682501912117, |
|
"eval_runtime": 698.4731, |
|
"eval_samples_per_second": 88.812, |
|
"eval_steps_per_second": 22.204, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 4.193978842840204e-05, |
|
"loss": 0.062, |
|
"step": 450000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 4.176067261569986e-05, |
|
"loss": 0.0603, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 0.059506043791770935, |
|
"eval_runtime": 699.0665, |
|
"eval_samples_per_second": 88.737, |
|
"eval_steps_per_second": 22.185, |
|
"step": 460000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.1581556802997684e-05, |
|
"loss": 0.0606, |
|
"step": 470000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 4.1402440990295504e-05, |
|
"loss": 0.0596, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 0.06113377958536148, |
|
"eval_runtime": 687.5695, |
|
"eval_samples_per_second": 90.221, |
|
"eval_steps_per_second": 22.556, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 4.122332517759333e-05, |
|
"loss": 0.0604, |
|
"step": 490000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 4.104420936489115e-05, |
|
"loss": 0.0602, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 0.058628179132938385, |
|
"eval_runtime": 685.1917, |
|
"eval_samples_per_second": 90.534, |
|
"eval_steps_per_second": 22.635, |
|
"step": 500000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.0865093552188975e-05, |
|
"loss": 0.0588, |
|
"step": 510000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.0685977739486794e-05, |
|
"loss": 0.0603, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 0.058195825666189194, |
|
"eval_runtime": 684.8426, |
|
"eval_samples_per_second": 90.58, |
|
"eval_steps_per_second": 22.646, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.050686192678462e-05, |
|
"loss": 0.0606, |
|
"step": 530000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 4.0327746114082445e-05, |
|
"loss": 0.0603, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_loss": 0.059178441762924194, |
|
"eval_runtime": 686.71, |
|
"eval_samples_per_second": 90.334, |
|
"eval_steps_per_second": 22.584, |
|
"step": 540000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.014863030138027e-05, |
|
"loss": 0.0614, |
|
"step": 550000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 3.996951448867809e-05, |
|
"loss": 0.0596, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"eval_loss": 0.05809653550386429, |
|
"eval_runtime": 686.1393, |
|
"eval_samples_per_second": 90.409, |
|
"eval_steps_per_second": 22.603, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.9790398675975916e-05, |
|
"loss": 0.0587, |
|
"step": 570000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.9611282863273736e-05, |
|
"loss": 0.0584, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 0.05719467252492905, |
|
"eval_runtime": 686.9168, |
|
"eval_samples_per_second": 90.306, |
|
"eval_steps_per_second": 22.578, |
|
"step": 580000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 3.943216705057156e-05, |
|
"loss": 0.0592, |
|
"step": 590000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 3.925305123786938e-05, |
|
"loss": 0.0592, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"eval_loss": 0.05839799717068672, |
|
"eval_runtime": 691.5693, |
|
"eval_samples_per_second": 89.699, |
|
"eval_steps_per_second": 22.426, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 3.9073935425167206e-05, |
|
"loss": 0.0583, |
|
"step": 610000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 3.8894819612465026e-05, |
|
"loss": 0.0598, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 0.05699307098984718, |
|
"eval_runtime": 703.0844, |
|
"eval_samples_per_second": 88.23, |
|
"eval_steps_per_second": 22.059, |
|
"step": 620000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 3.871570379976285e-05, |
|
"loss": 0.0587, |
|
"step": 630000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.853658798706067e-05, |
|
"loss": 0.058, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 0.05786525830626488, |
|
"eval_runtime": 719.4112, |
|
"eval_samples_per_second": 86.227, |
|
"eval_steps_per_second": 21.558, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 3.8357472174358497e-05, |
|
"loss": 0.0579, |
|
"step": 650000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.817835636165632e-05, |
|
"loss": 0.0576, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 0.05708477646112442, |
|
"eval_runtime": 740.1092, |
|
"eval_samples_per_second": 83.816, |
|
"eval_steps_per_second": 20.955, |
|
"step": 660000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.799924054895415e-05, |
|
"loss": 0.0578, |
|
"step": 670000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.782012473625197e-05, |
|
"loss": 0.0586, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_loss": 0.05821482464671135, |
|
"eval_runtime": 697.0334, |
|
"eval_samples_per_second": 88.996, |
|
"eval_steps_per_second": 22.25, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 3.764100892354979e-05, |
|
"loss": 0.0579, |
|
"step": 690000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.746189311084761e-05, |
|
"loss": 0.0579, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_loss": 0.057255882769823074, |
|
"eval_runtime": 693.1254, |
|
"eval_samples_per_second": 89.498, |
|
"eval_steps_per_second": 22.375, |
|
"step": 700000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 3.728277729814544e-05, |
|
"loss": 0.0586, |
|
"step": 710000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 3.710366148544326e-05, |
|
"loss": 0.0583, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_loss": 0.0572069026529789, |
|
"eval_runtime": 691.2275, |
|
"eval_samples_per_second": 89.743, |
|
"eval_steps_per_second": 22.437, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 3.6924545672741083e-05, |
|
"loss": 0.0591, |
|
"step": 730000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 3.67454298600389e-05, |
|
"loss": 0.0573, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_loss": 0.05713275447487831, |
|
"eval_runtime": 696.2667, |
|
"eval_samples_per_second": 89.094, |
|
"eval_steps_per_second": 22.275, |
|
"step": 740000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 3.656631404733673e-05, |
|
"loss": 0.0559, |
|
"step": 750000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 3.638719823463455e-05, |
|
"loss": 0.0571, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 0.056393299251794815, |
|
"eval_runtime": 683.9753, |
|
"eval_samples_per_second": 90.695, |
|
"eval_steps_per_second": 22.675, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 3.6208082421932374e-05, |
|
"loss": 0.0572, |
|
"step": 770000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 3.60289666092302e-05, |
|
"loss": 0.0571, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_loss": 0.057615023106336594, |
|
"eval_runtime": 683.0895, |
|
"eval_samples_per_second": 90.812, |
|
"eval_steps_per_second": 22.704, |
|
"step": 780000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.5849850796528025e-05, |
|
"loss": 0.0575, |
|
"step": 790000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 3.5670734983825844e-05, |
|
"loss": 0.0556, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_loss": 0.055679138749837875, |
|
"eval_runtime": 682.7397, |
|
"eval_samples_per_second": 90.859, |
|
"eval_steps_per_second": 22.716, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 3.549161917112367e-05, |
|
"loss": 0.057, |
|
"step": 810000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.531250335842149e-05, |
|
"loss": 0.057, |
|
"step": 820000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 0.056380968540906906, |
|
"eval_runtime": 682.9026, |
|
"eval_samples_per_second": 90.837, |
|
"eval_steps_per_second": 22.71, |
|
"step": 820000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 3.5133387545719315e-05, |
|
"loss": 0.0558, |
|
"step": 830000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 3.4954271733017135e-05, |
|
"loss": 0.0561, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"eval_loss": 0.05610540881752968, |
|
"eval_runtime": 684.8005, |
|
"eval_samples_per_second": 90.586, |
|
"eval_steps_per_second": 22.647, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 3.477515592031496e-05, |
|
"loss": 0.0558, |
|
"step": 850000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 3.459604010761278e-05, |
|
"loss": 0.0552, |
|
"step": 860000 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"eval_loss": 0.05571727082133293, |
|
"eval_runtime": 684.9047, |
|
"eval_samples_per_second": 90.572, |
|
"eval_steps_per_second": 22.644, |
|
"step": 860000 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 3.4416924294910605e-05, |
|
"loss": 0.0566, |
|
"step": 870000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 3.4237808482208425e-05, |
|
"loss": 0.0572, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"eval_loss": 0.05569184198975563, |
|
"eval_runtime": 685.6335, |
|
"eval_samples_per_second": 90.475, |
|
"eval_steps_per_second": 22.62, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 3.405869266950625e-05, |
|
"loss": 0.0558, |
|
"step": 890000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.3879576856804076e-05, |
|
"loss": 0.0559, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"eval_loss": 0.055385466665029526, |
|
"eval_runtime": 689.1891, |
|
"eval_samples_per_second": 90.009, |
|
"eval_steps_per_second": 22.503, |
|
"step": 900000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 3.37004610441019e-05, |
|
"loss": 0.0557, |
|
"step": 910000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.352134523139972e-05, |
|
"loss": 0.0548, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 0.05598163977265358, |
|
"eval_runtime": 684.1191, |
|
"eval_samples_per_second": 90.676, |
|
"eval_steps_per_second": 22.67, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 3.334222941869754e-05, |
|
"loss": 0.0552, |
|
"step": 930000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.3163113605995366e-05, |
|
"loss": 0.0565, |
|
"step": 940000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 0.055193424224853516, |
|
"eval_runtime": 732.5988, |
|
"eval_samples_per_second": 84.675, |
|
"eval_steps_per_second": 21.17, |
|
"step": 940000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.2983997793293186e-05, |
|
"loss": 0.0557, |
|
"step": 950000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 3.280488198059101e-05, |
|
"loss": 0.055, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 0.05571094900369644, |
|
"eval_runtime": 724.8884, |
|
"eval_samples_per_second": 85.576, |
|
"eval_steps_per_second": 21.395, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 3.262576616788883e-05, |
|
"loss": 0.0556, |
|
"step": 970000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 3.2446650355186657e-05, |
|
"loss": 0.0563, |
|
"step": 980000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_loss": 0.05553479492664337, |
|
"eval_runtime": 714.2985, |
|
"eval_samples_per_second": 86.845, |
|
"eval_steps_per_second": 21.712, |
|
"step": 980000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 3.2267534542484476e-05, |
|
"loss": 0.0552, |
|
"step": 990000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 3.20884187297823e-05, |
|
"loss": 0.0553, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"eval_loss": 0.05587638169527054, |
|
"eval_runtime": 709.3024, |
|
"eval_samples_per_second": 87.456, |
|
"eval_steps_per_second": 21.865, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 3.190930291708013e-05, |
|
"loss": 0.0544, |
|
"step": 1010000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.1730187104377953e-05, |
|
"loss": 0.0545, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 0.05536632239818573, |
|
"eval_runtime": 696.888, |
|
"eval_samples_per_second": 89.014, |
|
"eval_steps_per_second": 22.255, |
|
"step": 1020000 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 3.155107129167577e-05, |
|
"loss": 0.0543, |
|
"step": 1030000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 3.13719554789736e-05, |
|
"loss": 0.0544, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"eval_loss": 0.05515935644507408, |
|
"eval_runtime": 706.1745, |
|
"eval_samples_per_second": 87.844, |
|
"eval_steps_per_second": 21.962, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 3.119283966627142e-05, |
|
"loss": 0.0534, |
|
"step": 1050000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 3.1013723853569243e-05, |
|
"loss": 0.0552, |
|
"step": 1060000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 0.054923560470342636, |
|
"eval_runtime": 716.4597, |
|
"eval_samples_per_second": 86.583, |
|
"eval_steps_per_second": 21.647, |
|
"step": 1060000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.083460804086706e-05, |
|
"loss": 0.055, |
|
"step": 1070000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 3.065549222816489e-05, |
|
"loss": 0.0547, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 0.05532824248075485, |
|
"eval_runtime": 713.1552, |
|
"eval_samples_per_second": 86.984, |
|
"eval_steps_per_second": 21.747, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 3.047637641546271e-05, |
|
"loss": 0.0526, |
|
"step": 1090000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 3.0297260602760537e-05, |
|
"loss": 0.0536, |
|
"step": 1100000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_loss": 0.05594659596681595, |
|
"eval_runtime": 765.7749, |
|
"eval_samples_per_second": 81.007, |
|
"eval_steps_per_second": 20.253, |
|
"step": 1100000 |
|
} |
|
], |
|
"max_steps": 2791490, |
|
"num_train_epochs": 10, |
|
"total_flos": 7.350678561491497e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|