|
{ |
|
"best_metric": 1.6783268451690674, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.2770083102493075, |
|
"eval_steps": 100, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002770083102493075, |
|
"grad_norm": 0.9283442497253418, |
|
"learning_rate": 5e-06, |
|
"loss": 2.05, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002770083102493075, |
|
"eval_loss": 2.1240499019622803, |
|
"eval_runtime": 95.1756, |
|
"eval_samples_per_second": 6.388, |
|
"eval_steps_per_second": 1.597, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00554016620498615, |
|
"grad_norm": 0.955880343914032, |
|
"learning_rate": 1e-05, |
|
"loss": 2.0261, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.008310249307479225, |
|
"grad_norm": 0.9946913123130798, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.0557, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0110803324099723, |
|
"grad_norm": 0.9017006158828735, |
|
"learning_rate": 2e-05, |
|
"loss": 2.0127, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.013850415512465374, |
|
"grad_norm": 1.0481677055358887, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.9824, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01662049861495845, |
|
"grad_norm": 0.7929118275642395, |
|
"learning_rate": 3e-05, |
|
"loss": 2.0085, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.019390581717451522, |
|
"grad_norm": 0.7500401139259338, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.9579, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0221606648199446, |
|
"grad_norm": 0.6779894232749939, |
|
"learning_rate": 4e-05, |
|
"loss": 1.9187, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.024930747922437674, |
|
"grad_norm": 0.644287645816803, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.9291, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.027700831024930747, |
|
"grad_norm": 0.6950892806053162, |
|
"learning_rate": 5e-05, |
|
"loss": 1.9033, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.030470914127423823, |
|
"grad_norm": 0.6530030369758606, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 1.849, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0332409972299169, |
|
"grad_norm": 0.6152698993682861, |
|
"learning_rate": 6e-05, |
|
"loss": 1.8331, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.036011080332409975, |
|
"grad_norm": 0.638900637626648, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 1.7434, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.038781163434903045, |
|
"grad_norm": 0.6423549056053162, |
|
"learning_rate": 7e-05, |
|
"loss": 1.809, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04155124653739612, |
|
"grad_norm": 0.6707618236541748, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.8022, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0443213296398892, |
|
"grad_norm": 0.6206409335136414, |
|
"learning_rate": 8e-05, |
|
"loss": 1.7494, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04709141274238227, |
|
"grad_norm": 0.5846340656280518, |
|
"learning_rate": 8.5e-05, |
|
"loss": 1.7886, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04986149584487535, |
|
"grad_norm": 0.5503649115562439, |
|
"learning_rate": 9e-05, |
|
"loss": 1.7373, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05263157894736842, |
|
"grad_norm": 0.55040043592453, |
|
"learning_rate": 9.5e-05, |
|
"loss": 1.7224, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.055401662049861494, |
|
"grad_norm": 0.5535205006599426, |
|
"learning_rate": 0.0001, |
|
"loss": 1.6901, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05817174515235457, |
|
"grad_norm": 0.559658408164978, |
|
"learning_rate": 9.999787808528638e-05, |
|
"loss": 1.6755, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.060941828254847646, |
|
"grad_norm": 0.5673468708992004, |
|
"learning_rate": 9.999151252124639e-05, |
|
"loss": 1.6903, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06371191135734072, |
|
"grad_norm": 0.5480718612670898, |
|
"learning_rate": 9.99809038481674e-05, |
|
"loss": 1.8207, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0664819944598338, |
|
"grad_norm": 0.5410633087158203, |
|
"learning_rate": 9.996605296647737e-05, |
|
"loss": 1.7156, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06925207756232687, |
|
"grad_norm": 0.5444090366363525, |
|
"learning_rate": 9.99469611366685e-05, |
|
"loss": 1.695, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07202216066481995, |
|
"grad_norm": 0.5443681478500366, |
|
"learning_rate": 9.992362997919016e-05, |
|
"loss": 1.6809, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07479224376731301, |
|
"grad_norm": 0.5565460920333862, |
|
"learning_rate": 9.98960614743114e-05, |
|
"loss": 1.7007, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07756232686980609, |
|
"grad_norm": 0.5411592721939087, |
|
"learning_rate": 9.986425796195287e-05, |
|
"loss": 1.7413, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08033240997229917, |
|
"grad_norm": 0.5616833567619324, |
|
"learning_rate": 9.982822214148821e-05, |
|
"loss": 1.6731, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.08310249307479224, |
|
"grad_norm": 0.5728862881660461, |
|
"learning_rate": 9.978795707151492e-05, |
|
"loss": 1.7152, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08587257617728532, |
|
"grad_norm": 0.5711526274681091, |
|
"learning_rate": 9.974346616959476e-05, |
|
"loss": 1.6633, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0886426592797784, |
|
"grad_norm": 0.5852300524711609, |
|
"learning_rate": 9.969475321196373e-05, |
|
"loss": 1.6699, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.09141274238227147, |
|
"grad_norm": 0.5722000598907471, |
|
"learning_rate": 9.96418223332115e-05, |
|
"loss": 1.6235, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.09418282548476455, |
|
"grad_norm": 0.6049830317497253, |
|
"learning_rate": 9.958467802593046e-05, |
|
"loss": 1.7123, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09695290858725762, |
|
"grad_norm": 0.6205933690071106, |
|
"learning_rate": 9.952332514033447e-05, |
|
"loss": 1.6483, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0997229916897507, |
|
"grad_norm": 0.5876198410987854, |
|
"learning_rate": 9.94577688838472e-05, |
|
"loss": 1.6032, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.10249307479224377, |
|
"grad_norm": 0.568473756313324, |
|
"learning_rate": 9.938801482065998e-05, |
|
"loss": 1.6591, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.10526315789473684, |
|
"grad_norm": 0.5977758765220642, |
|
"learning_rate": 9.93140688712598e-05, |
|
"loss": 1.5954, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.10803324099722991, |
|
"grad_norm": 0.6168347597122192, |
|
"learning_rate": 9.923593731192655e-05, |
|
"loss": 1.7199, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.11080332409972299, |
|
"grad_norm": 0.567383348941803, |
|
"learning_rate": 9.915362677420044e-05, |
|
"loss": 1.5559, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11357340720221606, |
|
"grad_norm": 0.5804698467254639, |
|
"learning_rate": 9.906714424431913e-05, |
|
"loss": 1.595, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.11634349030470914, |
|
"grad_norm": 0.6087197661399841, |
|
"learning_rate": 9.897649706262473e-05, |
|
"loss": 1.6415, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11911357340720222, |
|
"grad_norm": 0.6603733897209167, |
|
"learning_rate": 9.888169292294076e-05, |
|
"loss": 1.7308, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.12188365650969529, |
|
"grad_norm": 0.597120463848114, |
|
"learning_rate": 9.87827398719192e-05, |
|
"loss": 1.651, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.12465373961218837, |
|
"grad_norm": 0.563897967338562, |
|
"learning_rate": 9.867964630835743e-05, |
|
"loss": 1.5197, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.12742382271468145, |
|
"grad_norm": 0.5994200706481934, |
|
"learning_rate": 9.857242098248542e-05, |
|
"loss": 1.6037, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.13019390581717452, |
|
"grad_norm": 0.643892765045166, |
|
"learning_rate": 9.846107299522304e-05, |
|
"loss": 1.6028, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.1329639889196676, |
|
"grad_norm": 0.6362243294715881, |
|
"learning_rate": 9.834561179740762e-05, |
|
"loss": 1.7313, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.13573407202216067, |
|
"grad_norm": 0.8057113885879517, |
|
"learning_rate": 9.82260471889917e-05, |
|
"loss": 1.8693, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.13850415512465375, |
|
"grad_norm": 1.2995071411132812, |
|
"learning_rate": 9.810238931821138e-05, |
|
"loss": 2.3462, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.14127423822714683, |
|
"grad_norm": 0.9156427383422852, |
|
"learning_rate": 9.797464868072488e-05, |
|
"loss": 1.9039, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1440443213296399, |
|
"grad_norm": 0.7253287434577942, |
|
"learning_rate": 9.784283611872169e-05, |
|
"loss": 1.7885, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.14681440443213298, |
|
"grad_norm": 0.5801952481269836, |
|
"learning_rate": 9.770696282000244e-05, |
|
"loss": 1.7586, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.14958448753462603, |
|
"grad_norm": 0.48791196942329407, |
|
"learning_rate": 9.756704031702918e-05, |
|
"loss": 1.7201, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.1523545706371191, |
|
"grad_norm": 0.47324320673942566, |
|
"learning_rate": 9.742308048594665e-05, |
|
"loss": 1.7283, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.15512465373961218, |
|
"grad_norm": 0.44494226574897766, |
|
"learning_rate": 9.727509554557417e-05, |
|
"loss": 1.7024, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.15789473684210525, |
|
"grad_norm": 0.45924824476242065, |
|
"learning_rate": 9.712309805636863e-05, |
|
"loss": 1.6947, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.16066481994459833, |
|
"grad_norm": 0.5117136240005493, |
|
"learning_rate": 9.696710091935841e-05, |
|
"loss": 1.7821, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1634349030470914, |
|
"grad_norm": 0.4841713607311249, |
|
"learning_rate": 9.68071173750483e-05, |
|
"loss": 1.6528, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.16620498614958448, |
|
"grad_norm": 0.4830092787742615, |
|
"learning_rate": 9.664316100229578e-05, |
|
"loss": 1.7166, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16897506925207756, |
|
"grad_norm": 0.4221424162387848, |
|
"learning_rate": 9.647524571715843e-05, |
|
"loss": 1.6441, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.17174515235457063, |
|
"grad_norm": 0.4262312650680542, |
|
"learning_rate": 9.630338577171282e-05, |
|
"loss": 1.6742, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1745152354570637, |
|
"grad_norm": 0.4470296800136566, |
|
"learning_rate": 9.612759575284483e-05, |
|
"loss": 1.6873, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1772853185595568, |
|
"grad_norm": 0.43828487396240234, |
|
"learning_rate": 9.594789058101153e-05, |
|
"loss": 1.6717, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.18005540166204986, |
|
"grad_norm": 0.44721511006355286, |
|
"learning_rate": 9.576428550897489e-05, |
|
"loss": 1.6437, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.18282548476454294, |
|
"grad_norm": 0.4734845459461212, |
|
"learning_rate": 9.557679612050708e-05, |
|
"loss": 1.617, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.18559556786703602, |
|
"grad_norm": 0.4750541150569916, |
|
"learning_rate": 9.538543832906773e-05, |
|
"loss": 1.6971, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.1883656509695291, |
|
"grad_norm": 0.45159047842025757, |
|
"learning_rate": 9.519022837645337e-05, |
|
"loss": 1.6612, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.19113573407202217, |
|
"grad_norm": 0.45829257369041443, |
|
"learning_rate": 9.499118283141887e-05, |
|
"loss": 1.5972, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.19390581717451524, |
|
"grad_norm": 0.49154049158096313, |
|
"learning_rate": 9.478831858827104e-05, |
|
"loss": 1.6616, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.19667590027700832, |
|
"grad_norm": 0.47941017150878906, |
|
"learning_rate": 9.458165286543476e-05, |
|
"loss": 1.6312, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1994459833795014, |
|
"grad_norm": 0.47044363617897034, |
|
"learning_rate": 9.437120320399158e-05, |
|
"loss": 1.6031, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.20221606648199447, |
|
"grad_norm": 0.4906218349933624, |
|
"learning_rate": 9.415698746619079e-05, |
|
"loss": 1.6558, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.20498614958448755, |
|
"grad_norm": 0.46305832266807556, |
|
"learning_rate": 9.393902383393347e-05, |
|
"loss": 1.626, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.2077562326869806, |
|
"grad_norm": 0.4860275983810425, |
|
"learning_rate": 9.371733080722911e-05, |
|
"loss": 1.6518, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.21052631578947367, |
|
"grad_norm": 0.49080756306648254, |
|
"learning_rate": 9.349192720262555e-05, |
|
"loss": 1.5951, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.21329639889196675, |
|
"grad_norm": 0.512401282787323, |
|
"learning_rate": 9.326283215161178e-05, |
|
"loss": 1.5436, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.21606648199445982, |
|
"grad_norm": 0.48431679606437683, |
|
"learning_rate": 9.303006509899418e-05, |
|
"loss": 1.693, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2188365650969529, |
|
"grad_norm": 0.5012890696525574, |
|
"learning_rate": 9.279364580124614e-05, |
|
"loss": 1.6496, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.22160664819944598, |
|
"grad_norm": 0.5362542867660522, |
|
"learning_rate": 9.255359432483105e-05, |
|
"loss": 1.5378, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22437673130193905, |
|
"grad_norm": 0.5153225660324097, |
|
"learning_rate": 9.230993104449939e-05, |
|
"loss": 1.5482, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.22714681440443213, |
|
"grad_norm": 0.5314464569091797, |
|
"learning_rate": 9.206267664155907e-05, |
|
"loss": 1.6165, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2299168975069252, |
|
"grad_norm": 0.5320023894309998, |
|
"learning_rate": 9.181185210212034e-05, |
|
"loss": 1.5757, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.23268698060941828, |
|
"grad_norm": 0.542969286441803, |
|
"learning_rate": 9.155747871531444e-05, |
|
"loss": 1.5714, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.23545706371191136, |
|
"grad_norm": 0.5345393419265747, |
|
"learning_rate": 9.129957807148666e-05, |
|
"loss": 1.5333, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.23822714681440443, |
|
"grad_norm": 0.5255215167999268, |
|
"learning_rate": 9.103817206036382e-05, |
|
"loss": 1.5549, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2409972299168975, |
|
"grad_norm": 0.5266215801239014, |
|
"learning_rate": 9.077328286919638e-05, |
|
"loss": 1.5857, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.24376731301939059, |
|
"grad_norm": 0.5361652374267578, |
|
"learning_rate": 9.050493298087523e-05, |
|
"loss": 1.5605, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.24653739612188366, |
|
"grad_norm": 0.5425163507461548, |
|
"learning_rate": 9.02331451720234e-05, |
|
"loss": 1.6093, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.24930747922437674, |
|
"grad_norm": 0.5160465240478516, |
|
"learning_rate": 8.995794251106295e-05, |
|
"loss": 1.5442, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2520775623268698, |
|
"grad_norm": 0.5372593998908997, |
|
"learning_rate": 8.967934835625689e-05, |
|
"loss": 1.5728, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.2548476454293629, |
|
"grad_norm": 0.5655612349510193, |
|
"learning_rate": 8.939738635372664e-05, |
|
"loss": 1.5368, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.25761772853185594, |
|
"grad_norm": 0.5733644366264343, |
|
"learning_rate": 8.911208043544513e-05, |
|
"loss": 1.5212, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.26038781163434904, |
|
"grad_norm": 0.5658868551254272, |
|
"learning_rate": 8.882345481720533e-05, |
|
"loss": 1.5177, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2631578947368421, |
|
"grad_norm": 0.5318059325218201, |
|
"learning_rate": 8.853153399656513e-05, |
|
"loss": 1.5302, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2659279778393352, |
|
"grad_norm": 0.5699280500411987, |
|
"learning_rate": 8.823634275076791e-05, |
|
"loss": 1.544, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.26869806094182824, |
|
"grad_norm": 0.6027292013168335, |
|
"learning_rate": 8.793790613463955e-05, |
|
"loss": 1.5461, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.27146814404432135, |
|
"grad_norm": 0.6108666062355042, |
|
"learning_rate": 8.763624947846195e-05, |
|
"loss": 1.5956, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2742382271468144, |
|
"grad_norm": 0.7130281329154968, |
|
"learning_rate": 8.7331398385823e-05, |
|
"loss": 1.6505, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2770083102493075, |
|
"grad_norm": 1.1027400493621826, |
|
"learning_rate": 8.702337873144343e-05, |
|
"loss": 2.2798, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2770083102493075, |
|
"eval_loss": 1.6783268451690674, |
|
"eval_runtime": 96.3073, |
|
"eval_samples_per_second": 6.313, |
|
"eval_steps_per_second": 1.578, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 361, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.427158308225024e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|