|
{ |
|
"best_metric": 1.0647395849227905, |
|
"best_model_checkpoint": "autotrain-qs45q-p9syi/checkpoint-453", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 453, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013245033112582781, |
|
"grad_norm": 3.4723896980285645, |
|
"learning_rate": 1.3157894736842106e-06, |
|
"loss": 3.0146, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.026490066225165563, |
|
"grad_norm": 3.5154001712799072, |
|
"learning_rate": 2.631578947368421e-06, |
|
"loss": 3.0404, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.039735099337748346, |
|
"grad_norm": 3.4036054611206055, |
|
"learning_rate": 3.9473684210526315e-06, |
|
"loss": 3.0648, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.052980132450331126, |
|
"grad_norm": 3.3698582649230957, |
|
"learning_rate": 5.263157894736842e-06, |
|
"loss": 3.0555, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06622516556291391, |
|
"grad_norm": 3.074321985244751, |
|
"learning_rate": 6.578947368421053e-06, |
|
"loss": 3.0161, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07947019867549669, |
|
"grad_norm": 3.69840407371521, |
|
"learning_rate": 7.894736842105263e-06, |
|
"loss": 3.0226, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09271523178807947, |
|
"grad_norm": 3.0229387283325195, |
|
"learning_rate": 9.210526315789474e-06, |
|
"loss": 3.0117, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.10596026490066225, |
|
"grad_norm": 3.633251190185547, |
|
"learning_rate": 1.0526315789473684e-05, |
|
"loss": 2.9922, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11920529801324503, |
|
"grad_norm": 3.404634952545166, |
|
"learning_rate": 1.1842105263157895e-05, |
|
"loss": 2.9323, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.13245033112582782, |
|
"grad_norm": 4.926875114440918, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 2.9597, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1456953642384106, |
|
"grad_norm": 3.341071367263794, |
|
"learning_rate": 1.4473684210526317e-05, |
|
"loss": 2.927, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.15894039735099338, |
|
"grad_norm": 3.596402645111084, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 2.8622, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.17218543046357615, |
|
"grad_norm": 3.554511547088623, |
|
"learning_rate": 1.7105263157894737e-05, |
|
"loss": 2.8552, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.18543046357615894, |
|
"grad_norm": 3.871518135070801, |
|
"learning_rate": 1.8421052631578947e-05, |
|
"loss": 2.8545, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1986754966887417, |
|
"grad_norm": 4.008641719818115, |
|
"learning_rate": 1.9736842105263158e-05, |
|
"loss": 2.8275, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2119205298013245, |
|
"grad_norm": 4.181613445281982, |
|
"learning_rate": 2.105263157894737e-05, |
|
"loss": 2.7814, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2251655629139073, |
|
"grad_norm": 4.396944999694824, |
|
"learning_rate": 2.236842105263158e-05, |
|
"loss": 2.6354, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.23841059602649006, |
|
"grad_norm": 4.029453754425049, |
|
"learning_rate": 2.368421052631579e-05, |
|
"loss": 2.5842, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.25165562913907286, |
|
"grad_norm": 3.9177920818328857, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.6199, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.26490066225165565, |
|
"grad_norm": 5.803523063659668, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 2.5609, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2781456953642384, |
|
"grad_norm": 6.542681694030762, |
|
"learning_rate": 2.7631578947368426e-05, |
|
"loss": 2.2103, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2913907284768212, |
|
"grad_norm": 4.970069408416748, |
|
"learning_rate": 2.8947368421052634e-05, |
|
"loss": 2.5223, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.304635761589404, |
|
"grad_norm": 7.797888278961182, |
|
"learning_rate": 3.0263157894736844e-05, |
|
"loss": 2.2236, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.31788079470198677, |
|
"grad_norm": 5.971277713775635, |
|
"learning_rate": 3.157894736842105e-05, |
|
"loss": 2.6381, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.33112582781456956, |
|
"grad_norm": 6.939202785491943, |
|
"learning_rate": 3.289473684210527e-05, |
|
"loss": 2.3483, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3443708609271523, |
|
"grad_norm": 5.877003192901611, |
|
"learning_rate": 3.421052631578947e-05, |
|
"loss": 1.9744, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3576158940397351, |
|
"grad_norm": 5.517603397369385, |
|
"learning_rate": 3.5526315789473684e-05, |
|
"loss": 1.9791, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3708609271523179, |
|
"grad_norm": 7.6599440574646, |
|
"learning_rate": 3.6842105263157895e-05, |
|
"loss": 2.0475, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.3841059602649007, |
|
"grad_norm": 6.77654504776001, |
|
"learning_rate": 3.815789473684211e-05, |
|
"loss": 1.923, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3973509933774834, |
|
"grad_norm": 6.017172336578369, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 1.9688, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4105960264900662, |
|
"grad_norm": 5.819124221801758, |
|
"learning_rate": 4.078947368421053e-05, |
|
"loss": 2.2777, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.423841059602649, |
|
"grad_norm": 5.240574359893799, |
|
"learning_rate": 4.210526315789474e-05, |
|
"loss": 1.7219, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.4370860927152318, |
|
"grad_norm": 7.376441478729248, |
|
"learning_rate": 4.342105263157895e-05, |
|
"loss": 1.5784, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4503311258278146, |
|
"grad_norm": 5.029400825500488, |
|
"learning_rate": 4.473684210526316e-05, |
|
"loss": 1.6895, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.46357615894039733, |
|
"grad_norm": 10.123241424560547, |
|
"learning_rate": 4.605263157894737e-05, |
|
"loss": 1.6955, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4768211920529801, |
|
"grad_norm": 6.67560338973999, |
|
"learning_rate": 4.736842105263158e-05, |
|
"loss": 1.9591, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4900662251655629, |
|
"grad_norm": 8.467432975769043, |
|
"learning_rate": 4.868421052631579e-05, |
|
"loss": 2.0633, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.5033112582781457, |
|
"grad_norm": 5.2431535720825195, |
|
"learning_rate": 5e-05, |
|
"loss": 1.3436, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5165562913907285, |
|
"grad_norm": 7.770199298858643, |
|
"learning_rate": 4.985272459499264e-05, |
|
"loss": 1.9379, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5298013245033113, |
|
"grad_norm": 5.734556198120117, |
|
"learning_rate": 4.9705449189985276e-05, |
|
"loss": 1.7279, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.543046357615894, |
|
"grad_norm": 6.759193420410156, |
|
"learning_rate": 4.955817378497791e-05, |
|
"loss": 1.4606, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5562913907284768, |
|
"grad_norm": 10.67810344696045, |
|
"learning_rate": 4.941089837997055e-05, |
|
"loss": 1.3069, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5695364238410596, |
|
"grad_norm": 7.0206098556518555, |
|
"learning_rate": 4.926362297496319e-05, |
|
"loss": 1.8698, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5827814569536424, |
|
"grad_norm": 4.365483283996582, |
|
"learning_rate": 4.911634756995582e-05, |
|
"loss": 1.5244, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5960264900662252, |
|
"grad_norm": 8.567715644836426, |
|
"learning_rate": 4.8969072164948454e-05, |
|
"loss": 1.7503, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.609271523178808, |
|
"grad_norm": 7.213935852050781, |
|
"learning_rate": 4.882179675994109e-05, |
|
"loss": 1.6514, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6225165562913907, |
|
"grad_norm": 5.607529163360596, |
|
"learning_rate": 4.867452135493373e-05, |
|
"loss": 1.0184, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6357615894039735, |
|
"grad_norm": 9.907747268676758, |
|
"learning_rate": 4.8527245949926364e-05, |
|
"loss": 1.7225, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6490066225165563, |
|
"grad_norm": 13.226700782775879, |
|
"learning_rate": 4.8379970544919e-05, |
|
"loss": 1.9432, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6622516556291391, |
|
"grad_norm": 11.118176460266113, |
|
"learning_rate": 4.823269513991164e-05, |
|
"loss": 1.416, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6754966887417219, |
|
"grad_norm": 10.29113483428955, |
|
"learning_rate": 4.8085419734904275e-05, |
|
"loss": 1.6586, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6887417218543046, |
|
"grad_norm": 9.081689834594727, |
|
"learning_rate": 4.793814432989691e-05, |
|
"loss": 1.305, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.7019867549668874, |
|
"grad_norm": 6.368233680725098, |
|
"learning_rate": 4.779086892488955e-05, |
|
"loss": 1.1645, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7152317880794702, |
|
"grad_norm": 12.125802040100098, |
|
"learning_rate": 4.764359351988218e-05, |
|
"loss": 0.8825, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7284768211920529, |
|
"grad_norm": 14.471185684204102, |
|
"learning_rate": 4.7496318114874815e-05, |
|
"loss": 1.3133, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7417218543046358, |
|
"grad_norm": 10.047285079956055, |
|
"learning_rate": 4.734904270986745e-05, |
|
"loss": 1.0271, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7549668874172185, |
|
"grad_norm": 10.376700401306152, |
|
"learning_rate": 4.720176730486009e-05, |
|
"loss": 1.5487, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7682119205298014, |
|
"grad_norm": 9.097859382629395, |
|
"learning_rate": 4.7054491899852726e-05, |
|
"loss": 1.7211, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7814569536423841, |
|
"grad_norm": 11.168022155761719, |
|
"learning_rate": 4.690721649484536e-05, |
|
"loss": 1.3699, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7947019867549668, |
|
"grad_norm": 7.571075439453125, |
|
"learning_rate": 4.6759941089838e-05, |
|
"loss": 0.7628, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8079470198675497, |
|
"grad_norm": 7.332944393157959, |
|
"learning_rate": 4.661266568483064e-05, |
|
"loss": 1.2271, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.8211920529801324, |
|
"grad_norm": 9.472811698913574, |
|
"learning_rate": 4.6465390279823274e-05, |
|
"loss": 0.9225, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8344370860927153, |
|
"grad_norm": 7.298526287078857, |
|
"learning_rate": 4.631811487481591e-05, |
|
"loss": 1.4189, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.847682119205298, |
|
"grad_norm": 6.718891620635986, |
|
"learning_rate": 4.617083946980855e-05, |
|
"loss": 0.9269, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8609271523178808, |
|
"grad_norm": 5.722424507141113, |
|
"learning_rate": 4.602356406480118e-05, |
|
"loss": 1.5321, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8741721854304636, |
|
"grad_norm": 8.258366584777832, |
|
"learning_rate": 4.5876288659793814e-05, |
|
"loss": 0.9162, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8874172185430463, |
|
"grad_norm": 6.919400691986084, |
|
"learning_rate": 4.572901325478645e-05, |
|
"loss": 1.0685, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.9006622516556292, |
|
"grad_norm": 6.770501613616943, |
|
"learning_rate": 4.558173784977909e-05, |
|
"loss": 0.7407, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.9139072847682119, |
|
"grad_norm": 6.387173652648926, |
|
"learning_rate": 4.5434462444771725e-05, |
|
"loss": 0.6707, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9271523178807947, |
|
"grad_norm": 11.18732738494873, |
|
"learning_rate": 4.528718703976436e-05, |
|
"loss": 1.3069, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9403973509933775, |
|
"grad_norm": 4.17434549331665, |
|
"learning_rate": 4.5139911634757e-05, |
|
"loss": 0.7223, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.9536423841059603, |
|
"grad_norm": 7.86276388168335, |
|
"learning_rate": 4.4992636229749635e-05, |
|
"loss": 0.7461, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9668874172185431, |
|
"grad_norm": 11.436422348022461, |
|
"learning_rate": 4.484536082474227e-05, |
|
"loss": 1.4973, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9801324503311258, |
|
"grad_norm": 11.492631912231445, |
|
"learning_rate": 4.469808541973491e-05, |
|
"loss": 1.1266, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9933774834437086, |
|
"grad_norm": 6.025942802429199, |
|
"learning_rate": 4.4550810014727546e-05, |
|
"loss": 0.5809, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.62, |
|
"eval_f1_macro": 0.2182729551150604, |
|
"eval_f1_micro": 0.62, |
|
"eval_f1_weighted": 0.5708288904078378, |
|
"eval_loss": 1.3821334838867188, |
|
"eval_precision_macro": 0.20881118881118882, |
|
"eval_precision_micro": 0.62, |
|
"eval_precision_weighted": 0.5551748251748252, |
|
"eval_recall_macro": 0.25717532467532467, |
|
"eval_recall_micro": 0.62, |
|
"eval_recall_weighted": 0.62, |
|
"eval_runtime": 0.1587, |
|
"eval_samples_per_second": 630.071, |
|
"eval_steps_per_second": 44.105, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.0066225165562914, |
|
"grad_norm": 6.717867374420166, |
|
"learning_rate": 4.447717231222386e-05, |
|
"loss": 0.8661, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.0198675496688743, |
|
"grad_norm": 5.969238758087158, |
|
"learning_rate": 4.4329896907216494e-05, |
|
"loss": 0.6295, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.033112582781457, |
|
"grad_norm": 5.343296527862549, |
|
"learning_rate": 4.418262150220913e-05, |
|
"loss": 1.1331, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.0463576158940397, |
|
"grad_norm": 5.666139602661133, |
|
"learning_rate": 4.403534609720177e-05, |
|
"loss": 0.8978, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.0596026490066226, |
|
"grad_norm": 5.301098823547363, |
|
"learning_rate": 4.3888070692194405e-05, |
|
"loss": 0.6172, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0728476821192052, |
|
"grad_norm": 6.197515964508057, |
|
"learning_rate": 4.374079528718704e-05, |
|
"loss": 0.6356, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.086092715231788, |
|
"grad_norm": 9.58243179321289, |
|
"learning_rate": 4.359351988217968e-05, |
|
"loss": 1.5982, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.099337748344371, |
|
"grad_norm": 5.4112629890441895, |
|
"learning_rate": 4.3446244477172316e-05, |
|
"loss": 0.5373, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.1125827814569536, |
|
"grad_norm": 6.890096664428711, |
|
"learning_rate": 4.329896907216495e-05, |
|
"loss": 1.594, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.1258278145695364, |
|
"grad_norm": 13.663280487060547, |
|
"learning_rate": 4.315169366715759e-05, |
|
"loss": 0.6526, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1390728476821192, |
|
"grad_norm": 5.038116455078125, |
|
"learning_rate": 4.3004418262150226e-05, |
|
"loss": 0.8239, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.152317880794702, |
|
"grad_norm": 10.813724517822266, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 0.9156, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.1655629139072847, |
|
"grad_norm": 3.2282865047454834, |
|
"learning_rate": 4.270986745213549e-05, |
|
"loss": 0.6795, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.1788079470198676, |
|
"grad_norm": 5.9910197257995605, |
|
"learning_rate": 4.256259204712813e-05, |
|
"loss": 1.0571, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.1920529801324504, |
|
"grad_norm": 4.989276885986328, |
|
"learning_rate": 4.241531664212077e-05, |
|
"loss": 1.1591, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.205298013245033, |
|
"grad_norm": 12.654097557067871, |
|
"learning_rate": 4.2268041237113404e-05, |
|
"loss": 1.3218, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.218543046357616, |
|
"grad_norm": 4.979647636413574, |
|
"learning_rate": 4.212076583210604e-05, |
|
"loss": 0.6925, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.2317880794701987, |
|
"grad_norm": 9.59062385559082, |
|
"learning_rate": 4.197349042709868e-05, |
|
"loss": 1.2088, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.2450331125827814, |
|
"grad_norm": 8.358089447021484, |
|
"learning_rate": 4.1826215022091314e-05, |
|
"loss": 0.7255, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.2582781456953642, |
|
"grad_norm": 1.6286242008209229, |
|
"learning_rate": 4.167893961708395e-05, |
|
"loss": 0.6235, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.271523178807947, |
|
"grad_norm": 7.9347662925720215, |
|
"learning_rate": 4.153166421207659e-05, |
|
"loss": 0.964, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.2847682119205297, |
|
"grad_norm": 6.71475887298584, |
|
"learning_rate": 4.138438880706922e-05, |
|
"loss": 0.9162, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.2980132450331126, |
|
"grad_norm": 2.8124191761016846, |
|
"learning_rate": 4.1237113402061855e-05, |
|
"loss": 0.3924, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.3112582781456954, |
|
"grad_norm": 8.39048957824707, |
|
"learning_rate": 4.108983799705449e-05, |
|
"loss": 0.68, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.3245033112582782, |
|
"grad_norm": 10.033766746520996, |
|
"learning_rate": 4.094256259204713e-05, |
|
"loss": 0.6962, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3377483443708609, |
|
"grad_norm": 6.6539106369018555, |
|
"learning_rate": 4.0795287187039766e-05, |
|
"loss": 1.0892, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.3509933774834437, |
|
"grad_norm": 5.977195739746094, |
|
"learning_rate": 4.06480117820324e-05, |
|
"loss": 0.5886, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.3642384105960264, |
|
"grad_norm": 5.548197269439697, |
|
"learning_rate": 4.050073637702504e-05, |
|
"loss": 0.9609, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.3774834437086092, |
|
"grad_norm": 6.958375453948975, |
|
"learning_rate": 4.0353460972017676e-05, |
|
"loss": 1.0345, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.390728476821192, |
|
"grad_norm": 14.204687118530273, |
|
"learning_rate": 4.020618556701031e-05, |
|
"loss": 0.7327, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.403973509933775, |
|
"grad_norm": 8.209890365600586, |
|
"learning_rate": 4.005891016200295e-05, |
|
"loss": 0.5259, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.4172185430463577, |
|
"grad_norm": 6.424736976623535, |
|
"learning_rate": 3.991163475699559e-05, |
|
"loss": 0.9341, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.4304635761589404, |
|
"grad_norm": 7.625380039215088, |
|
"learning_rate": 3.976435935198822e-05, |
|
"loss": 1.1303, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.4437086092715232, |
|
"grad_norm": 7.861330509185791, |
|
"learning_rate": 3.9617083946980854e-05, |
|
"loss": 0.5599, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.4569536423841059, |
|
"grad_norm": 11.318681716918945, |
|
"learning_rate": 3.946980854197349e-05, |
|
"loss": 0.665, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.4701986754966887, |
|
"grad_norm": 8.797566413879395, |
|
"learning_rate": 3.932253313696613e-05, |
|
"loss": 0.9297, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.4834437086092715, |
|
"grad_norm": 9.536187171936035, |
|
"learning_rate": 3.9175257731958764e-05, |
|
"loss": 0.5168, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.4966887417218544, |
|
"grad_norm": 5.4594807624816895, |
|
"learning_rate": 3.90279823269514e-05, |
|
"loss": 0.6562, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.5099337748344372, |
|
"grad_norm": 6.416939735412598, |
|
"learning_rate": 3.888070692194404e-05, |
|
"loss": 0.4248, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.5231788079470199, |
|
"grad_norm": 8.049701690673828, |
|
"learning_rate": 3.8733431516936675e-05, |
|
"loss": 0.6311, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.5364238410596025, |
|
"grad_norm": 6.978274345397949, |
|
"learning_rate": 3.858615611192931e-05, |
|
"loss": 0.5238, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.5496688741721854, |
|
"grad_norm": 4.730326175689697, |
|
"learning_rate": 3.843888070692195e-05, |
|
"loss": 0.9127, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.5629139072847682, |
|
"grad_norm": 6.017629623413086, |
|
"learning_rate": 3.836524300441827e-05, |
|
"loss": 0.6197, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.576158940397351, |
|
"grad_norm": 1.4615082740783691, |
|
"learning_rate": 3.82179675994109e-05, |
|
"loss": 0.1855, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.589403973509934, |
|
"grad_norm": 17.785310745239258, |
|
"learning_rate": 3.8070692194403534e-05, |
|
"loss": 1.4609, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.6026490066225165, |
|
"grad_norm": 11.241979598999023, |
|
"learning_rate": 3.792341678939617e-05, |
|
"loss": 0.8828, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.6158940397350994, |
|
"grad_norm": 13.630949974060059, |
|
"learning_rate": 3.777614138438881e-05, |
|
"loss": 1.374, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.629139072847682, |
|
"grad_norm": 13.52291488647461, |
|
"learning_rate": 3.7628865979381445e-05, |
|
"loss": 0.7898, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.6423841059602649, |
|
"grad_norm": 6.53799295425415, |
|
"learning_rate": 3.748159057437408e-05, |
|
"loss": 0.7232, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.6556291390728477, |
|
"grad_norm": 7.03351354598999, |
|
"learning_rate": 3.733431516936672e-05, |
|
"loss": 1.5363, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.6688741721854305, |
|
"grad_norm": 10.971170425415039, |
|
"learning_rate": 3.7187039764359355e-05, |
|
"loss": 0.3952, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.6821192052980134, |
|
"grad_norm": 10.918181419372559, |
|
"learning_rate": 3.703976435935199e-05, |
|
"loss": 1.0573, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.695364238410596, |
|
"grad_norm": 5.845329284667969, |
|
"learning_rate": 3.689248895434463e-05, |
|
"loss": 0.9152, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.7086092715231787, |
|
"grad_norm": 10.043814659118652, |
|
"learning_rate": 3.6745213549337266e-05, |
|
"loss": 0.7483, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.7218543046357615, |
|
"grad_norm": 9.504009246826172, |
|
"learning_rate": 3.6597938144329896e-05, |
|
"loss": 1.3825, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.7350993377483444, |
|
"grad_norm": 7.6096415519714355, |
|
"learning_rate": 3.645066273932253e-05, |
|
"loss": 0.3722, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.7483443708609272, |
|
"grad_norm": 15.977928161621094, |
|
"learning_rate": 3.630338733431517e-05, |
|
"loss": 0.7817, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.76158940397351, |
|
"grad_norm": 12.720684051513672, |
|
"learning_rate": 3.6156111929307806e-05, |
|
"loss": 0.5112, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.7748344370860927, |
|
"grad_norm": 8.58932113647461, |
|
"learning_rate": 3.600883652430044e-05, |
|
"loss": 0.7, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.7880794701986755, |
|
"grad_norm": 13.423791885375977, |
|
"learning_rate": 3.586156111929308e-05, |
|
"loss": 1.6471, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.8013245033112582, |
|
"grad_norm": 6.468234062194824, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 0.66, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.814569536423841, |
|
"grad_norm": 9.493569374084473, |
|
"learning_rate": 3.5567010309278354e-05, |
|
"loss": 1.1906, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.8278145695364238, |
|
"grad_norm": 4.703698635101318, |
|
"learning_rate": 3.541973490427099e-05, |
|
"loss": 0.4843, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.8410596026490067, |
|
"grad_norm": 7.946201324462891, |
|
"learning_rate": 3.527245949926363e-05, |
|
"loss": 0.5529, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.8543046357615895, |
|
"grad_norm": 6.270992279052734, |
|
"learning_rate": 3.512518409425626e-05, |
|
"loss": 0.9139, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.8675496688741722, |
|
"grad_norm": 12.685065269470215, |
|
"learning_rate": 3.4977908689248894e-05, |
|
"loss": 0.4489, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.8807947019867548, |
|
"grad_norm": 9.88779354095459, |
|
"learning_rate": 3.483063328424153e-05, |
|
"loss": 1.0594, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.8940397350993377, |
|
"grad_norm": 15.420830726623535, |
|
"learning_rate": 3.468335787923417e-05, |
|
"loss": 1.5748, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.9072847682119205, |
|
"grad_norm": 12.748838424682617, |
|
"learning_rate": 3.4536082474226805e-05, |
|
"loss": 0.3906, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.9205298013245033, |
|
"grad_norm": 8.6655912399292, |
|
"learning_rate": 3.438880706921944e-05, |
|
"loss": 0.7372, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.9337748344370862, |
|
"grad_norm": 2.1088364124298096, |
|
"learning_rate": 3.424153166421208e-05, |
|
"loss": 0.6368, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.9470198675496688, |
|
"grad_norm": 5.693451404571533, |
|
"learning_rate": 3.4094256259204716e-05, |
|
"loss": 1.1065, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.9602649006622517, |
|
"grad_norm": 5.4681315422058105, |
|
"learning_rate": 3.394698085419735e-05, |
|
"loss": 0.2881, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.9735099337748343, |
|
"grad_norm": 3.1230578422546387, |
|
"learning_rate": 3.379970544918999e-05, |
|
"loss": 0.7056, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.9867549668874172, |
|
"grad_norm": 8.198813438415527, |
|
"learning_rate": 3.3652430044182626e-05, |
|
"loss": 0.3768, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.530179023742676, |
|
"learning_rate": 3.3505154639175256e-05, |
|
"loss": 0.3156, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.74, |
|
"eval_f1_macro": 0.45774191750278714, |
|
"eval_f1_micro": 0.74, |
|
"eval_f1_weighted": 0.7020952062430323, |
|
"eval_loss": 1.118362307548523, |
|
"eval_precision_macro": 0.5131296992481202, |
|
"eval_precision_micro": 0.74, |
|
"eval_precision_weighted": 0.7261616541353384, |
|
"eval_recall_macro": 0.48530303030303035, |
|
"eval_recall_micro": 0.74, |
|
"eval_recall_weighted": 0.74, |
|
"eval_runtime": 0.1632, |
|
"eval_samples_per_second": 612.634, |
|
"eval_steps_per_second": 42.884, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.013245033112583, |
|
"grad_norm": 1.5750505924224854, |
|
"learning_rate": 3.335787923416789e-05, |
|
"loss": 0.1854, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.0264900662251657, |
|
"grad_norm": 3.4210903644561768, |
|
"learning_rate": 3.321060382916053e-05, |
|
"loss": 0.3148, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.0397350993377485, |
|
"grad_norm": 7.4305033683776855, |
|
"learning_rate": 3.306332842415317e-05, |
|
"loss": 0.7654, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.052980132450331, |
|
"grad_norm": 2.1427576541900635, |
|
"learning_rate": 3.2916053019145804e-05, |
|
"loss": 0.2679, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.066225165562914, |
|
"grad_norm": 4.614518165588379, |
|
"learning_rate": 3.276877761413844e-05, |
|
"loss": 0.722, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.0794701986754967, |
|
"grad_norm": 5.343958377838135, |
|
"learning_rate": 3.262150220913108e-05, |
|
"loss": 0.658, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.0927152317880795, |
|
"grad_norm": 7.354612827301025, |
|
"learning_rate": 3.2474226804123714e-05, |
|
"loss": 0.5283, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.1059602649006623, |
|
"grad_norm": 3.1576592922210693, |
|
"learning_rate": 3.232695139911635e-05, |
|
"loss": 0.096, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.119205298013245, |
|
"grad_norm": 3.2874159812927246, |
|
"learning_rate": 3.217967599410899e-05, |
|
"loss": 0.2096, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.1324503311258276, |
|
"grad_norm": 0.5562194585800171, |
|
"learning_rate": 3.2032400589101625e-05, |
|
"loss": 0.1196, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.1456953642384105, |
|
"grad_norm": 13.274640083312988, |
|
"learning_rate": 3.1885125184094255e-05, |
|
"loss": 1.067, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.1589403973509933, |
|
"grad_norm": 8.369612693786621, |
|
"learning_rate": 3.173784977908689e-05, |
|
"loss": 0.6852, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.172185430463576, |
|
"grad_norm": 10.826526641845703, |
|
"learning_rate": 3.159057437407953e-05, |
|
"loss": 0.5073, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.185430463576159, |
|
"grad_norm": 6.016572952270508, |
|
"learning_rate": 3.1443298969072166e-05, |
|
"loss": 0.5949, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.198675496688742, |
|
"grad_norm": 10.392781257629395, |
|
"learning_rate": 3.12960235640648e-05, |
|
"loss": 0.7787, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.2119205298013247, |
|
"grad_norm": 4.342617511749268, |
|
"learning_rate": 3.114874815905744e-05, |
|
"loss": 0.4602, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.225165562913907, |
|
"grad_norm": 7.711668014526367, |
|
"learning_rate": 3.1001472754050076e-05, |
|
"loss": 0.3792, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.23841059602649, |
|
"grad_norm": 5.162533283233643, |
|
"learning_rate": 3.085419734904271e-05, |
|
"loss": 0.2087, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.251655629139073, |
|
"grad_norm": 7.244391441345215, |
|
"learning_rate": 3.070692194403535e-05, |
|
"loss": 0.298, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.2649006622516556, |
|
"grad_norm": 10.348755836486816, |
|
"learning_rate": 3.055964653902799e-05, |
|
"loss": 0.3321, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.2781456953642385, |
|
"grad_norm": 1.1933870315551758, |
|
"learning_rate": 3.0412371134020617e-05, |
|
"loss": 0.0756, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.2913907284768213, |
|
"grad_norm": 9.06655216217041, |
|
"learning_rate": 3.0265095729013254e-05, |
|
"loss": 0.7712, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.304635761589404, |
|
"grad_norm": 8.230104446411133, |
|
"learning_rate": 3.011782032400589e-05, |
|
"loss": 0.9383, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.3178807947019866, |
|
"grad_norm": 10.278660774230957, |
|
"learning_rate": 2.9970544918998527e-05, |
|
"loss": 0.6566, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.3311258278145695, |
|
"grad_norm": 1.700579047203064, |
|
"learning_rate": 2.9823269513991164e-05, |
|
"loss": 0.1095, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.3443708609271523, |
|
"grad_norm": 4.130746841430664, |
|
"learning_rate": 2.96759941089838e-05, |
|
"loss": 0.7986, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.357615894039735, |
|
"grad_norm": 9.603137016296387, |
|
"learning_rate": 2.9528718703976438e-05, |
|
"loss": 0.6449, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.370860927152318, |
|
"grad_norm": 13.046420097351074, |
|
"learning_rate": 2.9381443298969075e-05, |
|
"loss": 0.7776, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.384105960264901, |
|
"grad_norm": 4.095331192016602, |
|
"learning_rate": 2.9234167893961712e-05, |
|
"loss": 0.2639, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.3973509933774833, |
|
"grad_norm": 0.3949756324291229, |
|
"learning_rate": 2.908689248895435e-05, |
|
"loss": 0.1134, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.410596026490066, |
|
"grad_norm": 3.804518699645996, |
|
"learning_rate": 2.8939617083946985e-05, |
|
"loss": 0.3421, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.423841059602649, |
|
"grad_norm": 12.5763521194458, |
|
"learning_rate": 2.8792341678939616e-05, |
|
"loss": 0.7242, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.437086092715232, |
|
"grad_norm": 5.6927032470703125, |
|
"learning_rate": 2.8645066273932252e-05, |
|
"loss": 0.9591, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.4503311258278146, |
|
"grad_norm": 13.04416275024414, |
|
"learning_rate": 2.849779086892489e-05, |
|
"loss": 0.4636, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.4635761589403975, |
|
"grad_norm": 4.799520015716553, |
|
"learning_rate": 2.8350515463917526e-05, |
|
"loss": 0.4037, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.47682119205298, |
|
"grad_norm": 8.108109474182129, |
|
"learning_rate": 2.8203240058910163e-05, |
|
"loss": 1.2403, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.4900662251655628, |
|
"grad_norm": 3.977107286453247, |
|
"learning_rate": 2.80559646539028e-05, |
|
"loss": 0.1793, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.5033112582781456, |
|
"grad_norm": 8.900064468383789, |
|
"learning_rate": 2.7908689248895437e-05, |
|
"loss": 0.6712, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.5165562913907285, |
|
"grad_norm": 5.152413368225098, |
|
"learning_rate": 2.7761413843888074e-05, |
|
"loss": 0.0786, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.5298013245033113, |
|
"grad_norm": 1.7878741025924683, |
|
"learning_rate": 2.761413843888071e-05, |
|
"loss": 0.3663, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.543046357615894, |
|
"grad_norm": 4.110722064971924, |
|
"learning_rate": 2.7466863033873347e-05, |
|
"loss": 0.3523, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.556291390728477, |
|
"grad_norm": 15.2096586227417, |
|
"learning_rate": 2.7319587628865977e-05, |
|
"loss": 0.233, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.5695364238410594, |
|
"grad_norm": 4.752151012420654, |
|
"learning_rate": 2.7172312223858614e-05, |
|
"loss": 0.5043, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.5827814569536423, |
|
"grad_norm": 9.852655410766602, |
|
"learning_rate": 2.702503681885125e-05, |
|
"loss": 0.1858, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.596026490066225, |
|
"grad_norm": 13.454380989074707, |
|
"learning_rate": 2.6877761413843888e-05, |
|
"loss": 0.597, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.609271523178808, |
|
"grad_norm": 7.8776326179504395, |
|
"learning_rate": 2.6730486008836525e-05, |
|
"loss": 0.7841, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.622516556291391, |
|
"grad_norm": 9.048988342285156, |
|
"learning_rate": 2.6583210603829162e-05, |
|
"loss": 0.8305, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.6357615894039736, |
|
"grad_norm": 13.187280654907227, |
|
"learning_rate": 2.64359351988218e-05, |
|
"loss": 1.0179, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.6490066225165565, |
|
"grad_norm": 13.392657279968262, |
|
"learning_rate": 2.6288659793814435e-05, |
|
"loss": 0.1317, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.662251655629139, |
|
"grad_norm": 11.062936782836914, |
|
"learning_rate": 2.6141384388807072e-05, |
|
"loss": 0.6804, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.6754966887417218, |
|
"grad_norm": 11.463652610778809, |
|
"learning_rate": 2.599410898379971e-05, |
|
"loss": 0.5558, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.6887417218543046, |
|
"grad_norm": 5.696853160858154, |
|
"learning_rate": 2.5846833578792346e-05, |
|
"loss": 0.7812, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.7019867549668874, |
|
"grad_norm": 15.077645301818848, |
|
"learning_rate": 2.5699558173784976e-05, |
|
"loss": 0.6744, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.7152317880794703, |
|
"grad_norm": 4.043674468994141, |
|
"learning_rate": 2.5552282768777613e-05, |
|
"loss": 0.3354, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.7284768211920527, |
|
"grad_norm": 1.150486946105957, |
|
"learning_rate": 2.540500736377025e-05, |
|
"loss": 0.3314, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.741721854304636, |
|
"grad_norm": 12.133696556091309, |
|
"learning_rate": 2.5257731958762887e-05, |
|
"loss": 0.526, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.7549668874172184, |
|
"grad_norm": 11.533574104309082, |
|
"learning_rate": 2.5110456553755524e-05, |
|
"loss": 0.8745, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.7682119205298013, |
|
"grad_norm": 1.3846099376678467, |
|
"learning_rate": 2.496318114874816e-05, |
|
"loss": 0.204, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.781456953642384, |
|
"grad_norm": 4.653772830963135, |
|
"learning_rate": 2.4815905743740797e-05, |
|
"loss": 0.3621, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.794701986754967, |
|
"grad_norm": 8.693764686584473, |
|
"learning_rate": 2.4668630338733434e-05, |
|
"loss": 0.4153, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.80794701986755, |
|
"grad_norm": 11.415797233581543, |
|
"learning_rate": 2.4521354933726068e-05, |
|
"loss": 0.6997, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.821192052980132, |
|
"grad_norm": 3.682908773422241, |
|
"learning_rate": 2.4374079528718704e-05, |
|
"loss": 0.3039, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.8344370860927155, |
|
"grad_norm": 9.39224624633789, |
|
"learning_rate": 2.422680412371134e-05, |
|
"loss": 0.6302, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.847682119205298, |
|
"grad_norm": 3.271193742752075, |
|
"learning_rate": 2.4079528718703978e-05, |
|
"loss": 0.2706, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.8609271523178808, |
|
"grad_norm": 7.453132629394531, |
|
"learning_rate": 2.3932253313696615e-05, |
|
"loss": 0.4594, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.8741721854304636, |
|
"grad_norm": 16.519481658935547, |
|
"learning_rate": 2.378497790868925e-05, |
|
"loss": 0.2437, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.8874172185430464, |
|
"grad_norm": 17.087709426879883, |
|
"learning_rate": 2.3637702503681885e-05, |
|
"loss": 0.9733, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.9006622516556293, |
|
"grad_norm": 1.0074738264083862, |
|
"learning_rate": 2.3490427098674522e-05, |
|
"loss": 0.649, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 2.9139072847682117, |
|
"grad_norm": 4.366696834564209, |
|
"learning_rate": 2.334315169366716e-05, |
|
"loss": 0.5205, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.9271523178807946, |
|
"grad_norm": 20.609182357788086, |
|
"learning_rate": 2.3195876288659796e-05, |
|
"loss": 0.2493, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 2.9403973509933774, |
|
"grad_norm": 9.770134925842285, |
|
"learning_rate": 2.3048600883652433e-05, |
|
"loss": 0.1564, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 2.9536423841059603, |
|
"grad_norm": 8.714824676513672, |
|
"learning_rate": 2.2901325478645066e-05, |
|
"loss": 0.6449, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 2.966887417218543, |
|
"grad_norm": 7.8644514083862305, |
|
"learning_rate": 2.2754050073637703e-05, |
|
"loss": 1.3438, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 2.980132450331126, |
|
"grad_norm": 5.881997108459473, |
|
"learning_rate": 2.260677466863034e-05, |
|
"loss": 0.3383, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.993377483443709, |
|
"grad_norm": 12.096423149108887, |
|
"learning_rate": 2.2459499263622977e-05, |
|
"loss": 0.7644, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.74, |
|
"eval_f1_macro": 0.4558641367469575, |
|
"eval_f1_micro": 0.74, |
|
"eval_f1_weighted": 0.7165269403625714, |
|
"eval_loss": 1.0647395849227905, |
|
"eval_precision_macro": 0.47941017316017315, |
|
"eval_precision_micro": 0.74, |
|
"eval_precision_weighted": 0.7261709956709957, |
|
"eval_recall_macro": 0.4931601731601732, |
|
"eval_recall_micro": 0.74, |
|
"eval_recall_weighted": 0.74, |
|
"eval_runtime": 0.1879, |
|
"eval_samples_per_second": 532.193, |
|
"eval_steps_per_second": 37.254, |
|
"step": 453 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 755, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 119358311592960.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|