|
{ |
|
"best_metric": 0.762063227953411, |
|
"best_model_checkpoint": "finetuned_models/wisesight_sentiment/checkpoint-2400", |
|
"epoch": 5.991124260355029, |
|
"eval_steps": 100, |
|
"global_step": 8100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07396449704142012, |
|
"eval_class_f1": { |
|
"neg": 0.018604651162790697, |
|
"neu": 0.6997558991049634, |
|
"pos": 0.0, |
|
"q": 0.0 |
|
}, |
|
"eval_loss": 1.0810712575912476, |
|
"eval_macro_average_f1": 0.17959013756693853, |
|
"eval_micro_average_f1": 0.5391014975041597, |
|
"eval_runtime": 6.8397, |
|
"eval_samples_per_second": 351.476, |
|
"eval_steps_per_second": 22.077, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.14792899408284024, |
|
"eval_class_f1": { |
|
"neg": 0.6223055295220243, |
|
"neu": 0.7497702909647779, |
|
"pos": 0.0, |
|
"q": 0.0 |
|
}, |
|
"eval_loss": 0.8820463418960571, |
|
"eval_macro_average_f1": 0.34301895512170055, |
|
"eval_micro_average_f1": 0.6472545757071547, |
|
"eval_runtime": 6.8712, |
|
"eval_samples_per_second": 349.865, |
|
"eval_steps_per_second": 21.976, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22189349112426035, |
|
"eval_class_f1": { |
|
"neg": 0.7588757396449703, |
|
"neu": 0.7731384829505916, |
|
"pos": 0.27037037037037037, |
|
"q": 0.0 |
|
}, |
|
"eval_loss": 0.7263810038566589, |
|
"eval_macro_average_f1": 0.4505961482414831, |
|
"eval_micro_average_f1": 0.7059068219633944, |
|
"eval_runtime": 7.0136, |
|
"eval_samples_per_second": 342.761, |
|
"eval_steps_per_second": 21.529, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2958579881656805, |
|
"eval_class_f1": { |
|
"neg": 0.7806637806637807, |
|
"neu": 0.760541586073501, |
|
"pos": 0.501891551071879, |
|
"q": 0.0909090909090909 |
|
}, |
|
"eval_loss": 0.6896220445632935, |
|
"eval_macro_average_f1": 0.5335015021795629, |
|
"eval_micro_average_f1": 0.7175540765391015, |
|
"eval_runtime": 7.0909, |
|
"eval_samples_per_second": 339.026, |
|
"eval_steps_per_second": 21.295, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.3698224852071006, |
|
"grad_norm": 5.410265922546387, |
|
"learning_rate": 1.828817733990148e-05, |
|
"loss": 0.8994, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3698224852071006, |
|
"eval_class_f1": { |
|
"neg": 0.7767988252569751, |
|
"neu": 0.7759882869692534, |
|
"pos": 0.41987179487179493, |
|
"q": 0.3777777777777778 |
|
}, |
|
"eval_loss": 0.6673027276992798, |
|
"eval_macro_average_f1": 0.5876091712189503, |
|
"eval_micro_average_f1": 0.7225457570715474, |
|
"eval_runtime": 7.071, |
|
"eval_samples_per_second": 339.978, |
|
"eval_steps_per_second": 21.355, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4437869822485207, |
|
"eval_class_f1": { |
|
"neg": 0.7704042715484364, |
|
"neu": 0.7415287628053587, |
|
"pos": 0.5426356589147286, |
|
"q": 0.14285714285714285 |
|
}, |
|
"eval_loss": 0.6752218008041382, |
|
"eval_macro_average_f1": 0.5493564590314166, |
|
"eval_micro_average_f1": 0.7050748752079867, |
|
"eval_runtime": 7.2386, |
|
"eval_samples_per_second": 332.107, |
|
"eval_steps_per_second": 20.86, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.5177514792899408, |
|
"eval_class_f1": { |
|
"neg": 0.7780979827089337, |
|
"neu": 0.7535296490520371, |
|
"pos": 0.5532435740514076, |
|
"q": 0.3870967741935484 |
|
}, |
|
"eval_loss": 0.667128324508667, |
|
"eval_macro_average_f1": 0.6179919950014817, |
|
"eval_micro_average_f1": 0.7171381031613977, |
|
"eval_runtime": 7.3636, |
|
"eval_samples_per_second": 326.472, |
|
"eval_steps_per_second": 20.506, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.591715976331361, |
|
"eval_class_f1": { |
|
"neg": 0.7555923777961889, |
|
"neu": 0.7529501332318232, |
|
"pos": 0.5357575757575757, |
|
"q": 0.4161073825503356 |
|
}, |
|
"eval_loss": 0.662220299243927, |
|
"eval_macro_average_f1": 0.6151018673339809, |
|
"eval_micro_average_f1": 0.7059068219633944, |
|
"eval_runtime": 7.3848, |
|
"eval_samples_per_second": 325.535, |
|
"eval_steps_per_second": 20.448, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.665680473372781, |
|
"eval_class_f1": { |
|
"neg": 0.7756714060031595, |
|
"neu": 0.7722698471859858, |
|
"pos": 0.5253807106598984, |
|
"q": 0.2535211267605634 |
|
}, |
|
"eval_loss": 0.6270455121994019, |
|
"eval_macro_average_f1": 0.5817107726524018, |
|
"eval_micro_average_f1": 0.7250415973377704, |
|
"eval_runtime": 7.3625, |
|
"eval_samples_per_second": 326.52, |
|
"eval_steps_per_second": 20.509, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.7396449704142012, |
|
"grad_norm": 7.580224514007568, |
|
"learning_rate": 2.9252054794520548e-05, |
|
"loss": 0.6495, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.7396449704142012, |
|
"eval_class_f1": { |
|
"neg": 0.7862993298585256, |
|
"neu": 0.7885968159940763, |
|
"pos": 0.5219858156028369, |
|
"q": 0.30508474576271183 |
|
}, |
|
"eval_loss": 0.6415818929672241, |
|
"eval_macro_average_f1": 0.6004916768045376, |
|
"eval_micro_average_f1": 0.742928452579035, |
|
"eval_runtime": 7.3324, |
|
"eval_samples_per_second": 327.861, |
|
"eval_steps_per_second": 20.594, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8136094674556213, |
|
"eval_class_f1": { |
|
"neg": 0.7824267782426777, |
|
"neu": 0.7750972762645915, |
|
"pos": 0.550531914893617, |
|
"q": 0.1923076923076923 |
|
}, |
|
"eval_loss": 0.6599770188331604, |
|
"eval_macro_average_f1": 0.5750909154271446, |
|
"eval_micro_average_f1": 0.7358569051580699, |
|
"eval_runtime": 7.4168, |
|
"eval_samples_per_second": 324.129, |
|
"eval_steps_per_second": 20.359, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8875739644970414, |
|
"eval_class_f1": { |
|
"neg": 0.7531806615776081, |
|
"neu": 0.7700414000752727, |
|
"pos": 0.5685164212910533, |
|
"q": 0.35955056179775274 |
|
}, |
|
"eval_loss": 0.6348879933357239, |
|
"eval_macro_average_f1": 0.6128222611854217, |
|
"eval_micro_average_f1": 0.721297836938436, |
|
"eval_runtime": 7.4408, |
|
"eval_samples_per_second": 323.084, |
|
"eval_steps_per_second": 20.294, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"eval_class_f1": { |
|
"neg": 0.7920646583394563, |
|
"neu": 0.783076923076923, |
|
"pos": 0.535014005602241, |
|
"q": 0.46616541353383456 |
|
}, |
|
"eval_loss": 0.6110679507255554, |
|
"eval_macro_average_f1": 0.6440802501381137, |
|
"eval_micro_average_f1": 0.7400166389351082, |
|
"eval_runtime": 7.4775, |
|
"eval_samples_per_second": 321.498, |
|
"eval_steps_per_second": 20.194, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.0355029585798816, |
|
"eval_class_f1": { |
|
"neg": 0.8015094339622642, |
|
"neu": 0.8065934065934067, |
|
"pos": 0.5252225519287834, |
|
"q": 0.379746835443038 |
|
}, |
|
"eval_loss": 0.6416576504707336, |
|
"eval_macro_average_f1": 0.628268056981873, |
|
"eval_micro_average_f1": 0.7587354409317804, |
|
"eval_runtime": 7.358, |
|
"eval_samples_per_second": 326.721, |
|
"eval_steps_per_second": 20.522, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.1094674556213018, |
|
"grad_norm": 3.8226146697998047, |
|
"learning_rate": 2.72013698630137e-05, |
|
"loss": 0.6084, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.1094674556213018, |
|
"eval_class_f1": { |
|
"neg": 0.7846277021617293, |
|
"neu": 0.8024917552216929, |
|
"pos": 0.5813953488372093, |
|
"q": 0.2857142857142857 |
|
}, |
|
"eval_loss": 0.6498740315437317, |
|
"eval_macro_average_f1": 0.6135572729837293, |
|
"eval_micro_average_f1": 0.7562396006655574, |
|
"eval_runtime": 7.2975, |
|
"eval_samples_per_second": 329.428, |
|
"eval_steps_per_second": 20.692, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.183431952662722, |
|
"eval_class_f1": { |
|
"neg": 0.7917525773195877, |
|
"neu": 0.7863247863247863, |
|
"pos": 0.5444126074498568, |
|
"q": 0.345679012345679 |
|
}, |
|
"eval_loss": 0.685055673122406, |
|
"eval_macro_average_f1": 0.6170422458599774, |
|
"eval_micro_average_f1": 0.7454242928452579, |
|
"eval_runtime": 7.1996, |
|
"eval_samples_per_second": 333.908, |
|
"eval_steps_per_second": 20.973, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.2573964497041419, |
|
"eval_class_f1": { |
|
"neg": 0.7966231772831925, |
|
"neu": 0.7783018867924529, |
|
"pos": 0.5695216907675196, |
|
"q": 0.25806451612903225 |
|
}, |
|
"eval_loss": 0.6685267090797424, |
|
"eval_macro_average_f1": 0.6006278177430493, |
|
"eval_micro_average_f1": 0.7375207986688852, |
|
"eval_runtime": 7.2849, |
|
"eval_samples_per_second": 329.999, |
|
"eval_steps_per_second": 20.728, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.331360946745562, |
|
"eval_class_f1": { |
|
"neg": 0.8018362662586075, |
|
"neu": 0.7977570093457944, |
|
"pos": 0.5578947368421052, |
|
"q": 0.36363636363636365 |
|
}, |
|
"eval_loss": 0.6347253918647766, |
|
"eval_macro_average_f1": 0.6302810940207177, |
|
"eval_micro_average_f1": 0.7549916805324459, |
|
"eval_runtime": 7.1263, |
|
"eval_samples_per_second": 337.343, |
|
"eval_steps_per_second": 21.189, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.4053254437869822, |
|
"eval_class_f1": { |
|
"neg": 0.7917329093799682, |
|
"neu": 0.7925512104283055, |
|
"pos": 0.5742821473158551, |
|
"q": 0.28125 |
|
}, |
|
"eval_loss": 0.6284430027008057, |
|
"eval_macro_average_f1": 0.6099540667810323, |
|
"eval_micro_average_f1": 0.7491680532445923, |
|
"eval_runtime": 7.2447, |
|
"eval_samples_per_second": 331.831, |
|
"eval_steps_per_second": 20.843, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.4792899408284024, |
|
"grad_norm": 4.046507835388184, |
|
"learning_rate": 2.5146575342465757e-05, |
|
"loss": 0.5135, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.4792899408284024, |
|
"eval_class_f1": { |
|
"neg": 0.7920792079207921, |
|
"neu": 0.7867370007535796, |
|
"pos": 0.5517241379310345, |
|
"q": 0.27586206896551724 |
|
}, |
|
"eval_loss": 0.6431812644004822, |
|
"eval_macro_average_f1": 0.601600603892731, |
|
"eval_micro_average_f1": 0.7437603993344426, |
|
"eval_runtime": 7.2356, |
|
"eval_samples_per_second": 332.247, |
|
"eval_steps_per_second": 20.869, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.5532544378698225, |
|
"eval_class_f1": { |
|
"neg": 0.7887550200803214, |
|
"neu": 0.7950581395348836, |
|
"pos": 0.555407209612817, |
|
"q": 0.29032258064516125 |
|
}, |
|
"eval_loss": 0.6327183842658997, |
|
"eval_macro_average_f1": 0.6073857374682958, |
|
"eval_micro_average_f1": 0.7495840266222962, |
|
"eval_runtime": 7.3333, |
|
"eval_samples_per_second": 327.82, |
|
"eval_steps_per_second": 20.591, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6272189349112427, |
|
"eval_class_f1": { |
|
"neg": 0.7658119658119659, |
|
"neu": 0.8002847988608045, |
|
"pos": 0.56951871657754, |
|
"q": 0.4197530864197531 |
|
}, |
|
"eval_loss": 0.6534045338630676, |
|
"eval_macro_average_f1": 0.6388421419175159, |
|
"eval_micro_average_f1": 0.7495840266222962, |
|
"eval_runtime": 7.2578, |
|
"eval_samples_per_second": 331.231, |
|
"eval_steps_per_second": 20.805, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.7011834319526629, |
|
"eval_class_f1": { |
|
"neg": 0.7832369942196531, |
|
"neu": 0.7650099403578529, |
|
"pos": 0.5829268292682928, |
|
"q": 0.35955056179775274 |
|
}, |
|
"eval_loss": 0.6581071019172668, |
|
"eval_macro_average_f1": 0.6226810814108878, |
|
"eval_micro_average_f1": 0.7316971713810316, |
|
"eval_runtime": 7.1824, |
|
"eval_samples_per_second": 334.708, |
|
"eval_steps_per_second": 21.024, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.7751479289940828, |
|
"eval_class_f1": { |
|
"neg": 0.7951807228915662, |
|
"neu": 0.8109843081312411, |
|
"pos": 0.5441595441595442, |
|
"q": 0.3157894736842105 |
|
}, |
|
"eval_loss": 0.6206311583518982, |
|
"eval_macro_average_f1": 0.6165285122166405, |
|
"eval_micro_average_f1": 0.762063227953411, |
|
"eval_runtime": 7.2501, |
|
"eval_samples_per_second": 331.583, |
|
"eval_steps_per_second": 20.827, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.849112426035503, |
|
"grad_norm": 6.195135593414307, |
|
"learning_rate": 2.3091780821917807e-05, |
|
"loss": 0.4995, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.849112426035503, |
|
"eval_class_f1": { |
|
"neg": 0.7932148626817447, |
|
"neu": 0.8, |
|
"pos": 0.5830164765525983, |
|
"q": 0.196078431372549 |
|
}, |
|
"eval_loss": 0.6029447913169861, |
|
"eval_macro_average_f1": 0.5930774426517229, |
|
"eval_micro_average_f1": 0.7562396006655574, |
|
"eval_runtime": 7.1935, |
|
"eval_samples_per_second": 334.192, |
|
"eval_steps_per_second": 20.991, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"eval_class_f1": { |
|
"neg": 0.8059236165237724, |
|
"neu": 0.7899159663865546, |
|
"pos": 0.579415501905972, |
|
"q": 0.36666666666666664 |
|
}, |
|
"eval_loss": 0.6066814064979553, |
|
"eval_macro_average_f1": 0.6354804378707414, |
|
"eval_micro_average_f1": 0.7491680532445923, |
|
"eval_runtime": 7.2817, |
|
"eval_samples_per_second": 330.143, |
|
"eval_steps_per_second": 20.737, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.997041420118343, |
|
"eval_class_f1": { |
|
"neg": 0.8003157063930545, |
|
"neu": 0.797884397431054, |
|
"pos": 0.5773447015834348, |
|
"q": 0.3835616438356164 |
|
}, |
|
"eval_loss": 0.630171537399292, |
|
"eval_macro_average_f1": 0.63977661231079, |
|
"eval_micro_average_f1": 0.7545757071547421, |
|
"eval_runtime": 7.2376, |
|
"eval_samples_per_second": 332.156, |
|
"eval_steps_per_second": 20.863, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.0710059171597632, |
|
"eval_class_f1": { |
|
"neg": 0.7848509266720386, |
|
"neu": 0.7945103857566765, |
|
"pos": 0.5853051058530511, |
|
"q": 0.35294117647058826 |
|
}, |
|
"eval_loss": 0.7064331769943237, |
|
"eval_macro_average_f1": 0.6294018986880886, |
|
"eval_micro_average_f1": 0.7508319467554077, |
|
"eval_runtime": 7.2934, |
|
"eval_samples_per_second": 329.612, |
|
"eval_steps_per_second": 20.704, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.1449704142011834, |
|
"eval_class_f1": { |
|
"neg": 0.797752808988764, |
|
"neu": 0.8026412325752018, |
|
"pos": 0.5824742268041238, |
|
"q": 0.26666666666666666 |
|
}, |
|
"eval_loss": 0.7201129794120789, |
|
"eval_macro_average_f1": 0.612383733758689, |
|
"eval_micro_average_f1": 0.7591514143094842, |
|
"eval_runtime": 7.1883, |
|
"eval_samples_per_second": 334.433, |
|
"eval_steps_per_second": 21.006, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.2189349112426036, |
|
"grad_norm": 6.065237045288086, |
|
"learning_rate": 2.1036986301369864e-05, |
|
"loss": 0.4003, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.2189349112426036, |
|
"eval_class_f1": { |
|
"neg": 0.7861271676300579, |
|
"neu": 0.800578034682081, |
|
"pos": 0.5670391061452514, |
|
"q": 0.3185840707964602 |
|
}, |
|
"eval_loss": 0.7178497910499573, |
|
"eval_macro_average_f1": 0.6180820948134627, |
|
"eval_micro_average_f1": 0.7508319467554077, |
|
"eval_runtime": 7.2106, |
|
"eval_samples_per_second": 333.398, |
|
"eval_steps_per_second": 20.941, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.2928994082840237, |
|
"eval_class_f1": { |
|
"neg": 0.7955801104972374, |
|
"neu": 0.781854043392505, |
|
"pos": 0.5852585258525853, |
|
"q": 0.28865979381443296 |
|
}, |
|
"eval_loss": 0.7727176547050476, |
|
"eval_macro_average_f1": 0.6128381183891901, |
|
"eval_micro_average_f1": 0.7383527454242929, |
|
"eval_runtime": 7.2299, |
|
"eval_samples_per_second": 332.51, |
|
"eval_steps_per_second": 20.886, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.366863905325444, |
|
"eval_class_f1": { |
|
"neg": 0.7893462469733656, |
|
"neu": 0.788983997022702, |
|
"pos": 0.5606060606060607, |
|
"q": 0.2888888888888889 |
|
}, |
|
"eval_loss": 0.7219040393829346, |
|
"eval_macro_average_f1": 0.6069562983727543, |
|
"eval_micro_average_f1": 0.7420965058236273, |
|
"eval_runtime": 7.2669, |
|
"eval_samples_per_second": 330.815, |
|
"eval_steps_per_second": 20.779, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.440828402366864, |
|
"eval_class_f1": { |
|
"neg": 0.8073115003808072, |
|
"neu": 0.7814829344841114, |
|
"pos": 0.5855338691159586, |
|
"q": 0.26666666666666666 |
|
}, |
|
"eval_loss": 0.7229210734367371, |
|
"eval_macro_average_f1": 0.610248742661886, |
|
"eval_micro_average_f1": 0.7450083194675541, |
|
"eval_runtime": 7.1283, |
|
"eval_samples_per_second": 337.248, |
|
"eval_steps_per_second": 21.183, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.5147928994082838, |
|
"eval_class_f1": { |
|
"neg": 0.7984790874524715, |
|
"neu": 0.7856049004594182, |
|
"pos": 0.5773447015834348, |
|
"q": 0.3 |
|
}, |
|
"eval_loss": 0.7037935853004456, |
|
"eval_macro_average_f1": 0.615357172373831, |
|
"eval_micro_average_f1": 0.747504159733777, |
|
"eval_runtime": 7.2219, |
|
"eval_samples_per_second": 332.876, |
|
"eval_steps_per_second": 20.909, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.5887573964497044, |
|
"grad_norm": 3.8475677967071533, |
|
"learning_rate": 1.8982191780821918e-05, |
|
"loss": 0.3579, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.5887573964497044, |
|
"eval_class_f1": { |
|
"neg": 0.7871815940838127, |
|
"neu": 0.7871305649083427, |
|
"pos": 0.5738916256157636, |
|
"q": 0.37735849056603776 |
|
}, |
|
"eval_loss": 0.7569752931594849, |
|
"eval_macro_average_f1": 0.6313905687934891, |
|
"eval_micro_average_f1": 0.7420965058236273, |
|
"eval_runtime": 7.3391, |
|
"eval_samples_per_second": 327.56, |
|
"eval_steps_per_second": 20.575, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.662721893491124, |
|
"eval_class_f1": { |
|
"neg": 0.8064269319051262, |
|
"neu": 0.7905718701700155, |
|
"pos": 0.5779927448609432, |
|
"q": 0.3255813953488372 |
|
}, |
|
"eval_loss": 0.7201011180877686, |
|
"eval_macro_average_f1": 0.6251432355712305, |
|
"eval_micro_average_f1": 0.75, |
|
"eval_runtime": 7.2188, |
|
"eval_samples_per_second": 333.02, |
|
"eval_steps_per_second": 20.918, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7366863905325443, |
|
"eval_class_f1": { |
|
"neg": 0.7847896440129449, |
|
"neu": 0.7701911822083495, |
|
"pos": 0.5797413793103448, |
|
"q": 0.3703703703703704 |
|
}, |
|
"eval_loss": 0.7302864789962769, |
|
"eval_macro_average_f1": 0.6262731439755023, |
|
"eval_micro_average_f1": 0.7304492512479202, |
|
"eval_runtime": 7.2541, |
|
"eval_samples_per_second": 331.401, |
|
"eval_steps_per_second": 20.816, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.8106508875739644, |
|
"eval_class_f1": { |
|
"neg": 0.7971698113207547, |
|
"neu": 0.8014842300556586, |
|
"pos": 0.5839793281653747, |
|
"q": 0.3283582089552239 |
|
}, |
|
"eval_loss": 0.7112248539924622, |
|
"eval_macro_average_f1": 0.627747894624253, |
|
"eval_micro_average_f1": 0.7587354409317804, |
|
"eval_runtime": 7.2905, |
|
"eval_samples_per_second": 329.743, |
|
"eval_steps_per_second": 20.712, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.8846153846153846, |
|
"eval_class_f1": { |
|
"neg": 0.7999999999999999, |
|
"neu": 0.7944066515495087, |
|
"pos": 0.5961995249406176, |
|
"q": 0.2545454545454545 |
|
}, |
|
"eval_loss": 0.7105884552001953, |
|
"eval_macro_average_f1": 0.6112879077588952, |
|
"eval_micro_average_f1": 0.7549916805324459, |
|
"eval_runtime": 7.4167, |
|
"eval_samples_per_second": 324.132, |
|
"eval_steps_per_second": 20.359, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.9585798816568047, |
|
"grad_norm": 8.97050666809082, |
|
"learning_rate": 1.6927397260273975e-05, |
|
"loss": 0.3409, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.9585798816568047, |
|
"eval_class_f1": { |
|
"neg": 0.803088803088803, |
|
"neu": 0.7901328273244782, |
|
"pos": 0.5671641791044775, |
|
"q": 0.3513513513513513 |
|
}, |
|
"eval_loss": 0.7364293932914734, |
|
"eval_macro_average_f1": 0.6279342902172774, |
|
"eval_micro_average_f1": 0.7495840266222962, |
|
"eval_runtime": 7.2488, |
|
"eval_samples_per_second": 331.641, |
|
"eval_steps_per_second": 20.831, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.032544378698225, |
|
"eval_class_f1": { |
|
"neg": 0.7924836601307189, |
|
"neu": 0.7892777364110202, |
|
"pos": 0.5696969696969698, |
|
"q": 0.3287671232876712 |
|
}, |
|
"eval_loss": 0.8425710201263428, |
|
"eval_macro_average_f1": 0.6200563723815951, |
|
"eval_micro_average_f1": 0.7454242928452579, |
|
"eval_runtime": 7.1671, |
|
"eval_samples_per_second": 335.422, |
|
"eval_steps_per_second": 21.068, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.106508875739645, |
|
"eval_class_f1": { |
|
"neg": 0.7883817427385893, |
|
"neu": 0.7684537684537684, |
|
"pos": 0.5720338983050848, |
|
"q": 0.35294117647058826 |
|
}, |
|
"eval_loss": 0.9264113306999207, |
|
"eval_macro_average_f1": 0.6204526464920077, |
|
"eval_micro_average_f1": 0.7275374376039934, |
|
"eval_runtime": 7.2924, |
|
"eval_samples_per_second": 329.656, |
|
"eval_steps_per_second": 20.706, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 3.1804733727810652, |
|
"eval_class_f1": { |
|
"neg": 0.8064269319051262, |
|
"neu": 0.7787333854573885, |
|
"pos": 0.5774647887323944, |
|
"q": 0.32967032967032966 |
|
}, |
|
"eval_loss": 0.9222328662872314, |
|
"eval_macro_average_f1": 0.6230738589413097, |
|
"eval_micro_average_f1": 0.7420965058236273, |
|
"eval_runtime": 7.2012, |
|
"eval_samples_per_second": 333.833, |
|
"eval_steps_per_second": 20.969, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 3.2544378698224854, |
|
"eval_class_f1": { |
|
"neg": 0.7999999999999999, |
|
"neu": 0.7803557617942769, |
|
"pos": 0.5765124555160142, |
|
"q": 0.35955056179775274 |
|
}, |
|
"eval_loss": 0.9496058821678162, |
|
"eval_macro_average_f1": 0.6291046947770109, |
|
"eval_micro_average_f1": 0.7420965058236273, |
|
"eval_runtime": 7.3334, |
|
"eval_samples_per_second": 327.814, |
|
"eval_steps_per_second": 20.591, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 3.328402366863905, |
|
"grad_norm": 12.435276985168457, |
|
"learning_rate": 1.4872602739726027e-05, |
|
"loss": 0.2249, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.328402366863905, |
|
"eval_class_f1": { |
|
"neg": 0.8012718600953895, |
|
"neu": 0.784238714613619, |
|
"pos": 0.5663082437275986, |
|
"q": 0.32323232323232326 |
|
}, |
|
"eval_loss": 0.9026820063591003, |
|
"eval_macro_average_f1": 0.6187627854172325, |
|
"eval_micro_average_f1": 0.7412645590682196, |
|
"eval_runtime": 7.1404, |
|
"eval_samples_per_second": 336.677, |
|
"eval_steps_per_second": 21.147, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 3.4023668639053253, |
|
"eval_class_f1": { |
|
"neg": 0.8043647700701482, |
|
"neu": 0.7884322678843227, |
|
"pos": 0.5676328502415459, |
|
"q": 0.2898550724637681 |
|
}, |
|
"eval_loss": 0.943065345287323, |
|
"eval_macro_average_f1": 0.6125712401649462, |
|
"eval_micro_average_f1": 0.747504159733777, |
|
"eval_runtime": 7.2681, |
|
"eval_samples_per_second": 330.759, |
|
"eval_steps_per_second": 20.776, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 3.4763313609467454, |
|
"eval_class_f1": { |
|
"neg": 0.8018504240555128, |
|
"neu": 0.7930382141505864, |
|
"pos": 0.5692503176620076, |
|
"q": 0.345679012345679 |
|
}, |
|
"eval_loss": 0.9825762510299683, |
|
"eval_macro_average_f1": 0.6274544920534464, |
|
"eval_micro_average_f1": 0.7512479201331115, |
|
"eval_runtime": 7.2921, |
|
"eval_samples_per_second": 329.672, |
|
"eval_steps_per_second": 20.707, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 3.5502958579881656, |
|
"eval_class_f1": { |
|
"neg": 0.7946498819826908, |
|
"neu": 0.7813455657492355, |
|
"pos": 0.5795053003533569, |
|
"q": 0.3055555555555555 |
|
}, |
|
"eval_loss": 0.9374552965164185, |
|
"eval_macro_average_f1": 0.6152640759102097, |
|
"eval_micro_average_f1": 0.7420965058236273, |
|
"eval_runtime": 7.387, |
|
"eval_samples_per_second": 325.436, |
|
"eval_steps_per_second": 20.441, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 3.6242603550295858, |
|
"eval_class_f1": { |
|
"neg": 0.7984790874524715, |
|
"neu": 0.7785547785547785, |
|
"pos": 0.5748218527315915, |
|
"q": 0.3116883116883117 |
|
}, |
|
"eval_loss": 0.9656402468681335, |
|
"eval_macro_average_f1": 0.6158860076067884, |
|
"eval_micro_average_f1": 0.7408485856905158, |
|
"eval_runtime": 7.4821, |
|
"eval_samples_per_second": 321.299, |
|
"eval_steps_per_second": 20.181, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 3.698224852071006, |
|
"grad_norm": 0.6623280644416809, |
|
"learning_rate": 1.2817808219178083e-05, |
|
"loss": 0.2207, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.698224852071006, |
|
"eval_class_f1": { |
|
"neg": 0.7987616099071206, |
|
"neu": 0.7862857142857143, |
|
"pos": 0.5647348951911221, |
|
"q": 0.3 |
|
}, |
|
"eval_loss": 0.9422620534896851, |
|
"eval_macro_average_f1": 0.6124455548459892, |
|
"eval_micro_average_f1": 0.7441763727121464, |
|
"eval_runtime": 7.2765, |
|
"eval_samples_per_second": 330.379, |
|
"eval_steps_per_second": 20.752, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 3.772189349112426, |
|
"eval_class_f1": { |
|
"neg": 0.8, |
|
"neu": 0.7671342685370742, |
|
"pos": 0.5714285714285714, |
|
"q": 0.3414634146341463 |
|
}, |
|
"eval_loss": 0.9625053405761719, |
|
"eval_macro_average_f1": 0.620006563649948, |
|
"eval_micro_average_f1": 0.7296173044925125, |
|
"eval_runtime": 7.3445, |
|
"eval_samples_per_second": 327.318, |
|
"eval_steps_per_second": 20.559, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"eval_class_f1": { |
|
"neg": 0.8018942383583267, |
|
"neu": 0.7968691762951919, |
|
"pos": 0.5706874189364461, |
|
"q": 0.25287356321839083 |
|
}, |
|
"eval_loss": 0.9822611212730408, |
|
"eval_macro_average_f1": 0.6055810992020889, |
|
"eval_micro_average_f1": 0.7520798668885191, |
|
"eval_runtime": 7.4499, |
|
"eval_samples_per_second": 322.688, |
|
"eval_steps_per_second": 20.269, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 3.9201183431952664, |
|
"eval_class_f1": { |
|
"neg": 0.7891268533772653, |
|
"neu": 0.7875375375375375, |
|
"pos": 0.5810968494749125, |
|
"q": 0.273972602739726 |
|
}, |
|
"eval_loss": 0.9442653656005859, |
|
"eval_macro_average_f1": 0.6079334607823603, |
|
"eval_micro_average_f1": 0.7433444259567388, |
|
"eval_runtime": 7.3731, |
|
"eval_samples_per_second": 326.052, |
|
"eval_steps_per_second": 20.48, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 3.994082840236686, |
|
"eval_class_f1": { |
|
"neg": 0.8046511627906977, |
|
"neu": 0.7945516458569808, |
|
"pos": 0.5775, |
|
"q": 0.32 |
|
}, |
|
"eval_loss": 0.9429491758346558, |
|
"eval_macro_average_f1": 0.6241757021619195, |
|
"eval_micro_average_f1": 0.7537437603993344, |
|
"eval_runtime": 7.3966, |
|
"eval_samples_per_second": 325.013, |
|
"eval_steps_per_second": 20.415, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 4.068047337278107, |
|
"grad_norm": 2.4124114513397217, |
|
"learning_rate": 1.0763013698630138e-05, |
|
"loss": 0.2077, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.068047337278107, |
|
"eval_class_f1": { |
|
"neg": 0.8063781321184511, |
|
"neu": 0.7866927592954991, |
|
"pos": 0.5862884160756501, |
|
"q": 0.3333333333333333 |
|
}, |
|
"eval_loss": 1.1077452898025513, |
|
"eval_macro_average_f1": 0.6281731602057334, |
|
"eval_micro_average_f1": 0.7483361064891847, |
|
"eval_runtime": 7.369, |
|
"eval_samples_per_second": 326.23, |
|
"eval_steps_per_second": 20.491, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 4.1420118343195265, |
|
"eval_class_f1": { |
|
"neg": 0.7993920972644377, |
|
"neu": 0.7660256410256411, |
|
"pos": 0.5726775956284154, |
|
"q": 0.29629629629629634 |
|
}, |
|
"eval_loss": 1.1472598314285278, |
|
"eval_macro_average_f1": 0.6085979075536977, |
|
"eval_micro_average_f1": 0.7304492512479202, |
|
"eval_runtime": 7.5033, |
|
"eval_samples_per_second": 320.394, |
|
"eval_steps_per_second": 20.125, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 4.215976331360947, |
|
"eval_class_f1": { |
|
"neg": 0.8024502297090352, |
|
"neu": 0.7868978805394989, |
|
"pos": 0.5731559854897219, |
|
"q": 0.3 |
|
}, |
|
"eval_loss": 1.169406533241272, |
|
"eval_macro_average_f1": 0.615626023934564, |
|
"eval_micro_average_f1": 0.7462562396006656, |
|
"eval_runtime": 7.4833, |
|
"eval_samples_per_second": 321.249, |
|
"eval_steps_per_second": 20.178, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 4.289940828402367, |
|
"eval_class_f1": { |
|
"neg": 0.803951367781155, |
|
"neu": 0.7902550437761706, |
|
"pos": 0.5685019206145967, |
|
"q": 0.30952380952380953 |
|
}, |
|
"eval_loss": 1.1968339681625366, |
|
"eval_macro_average_f1": 0.6180580354239329, |
|
"eval_micro_average_f1": 0.7495840266222962, |
|
"eval_runtime": 7.4382, |
|
"eval_samples_per_second": 323.197, |
|
"eval_steps_per_second": 20.301, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 4.363905325443787, |
|
"eval_class_f1": { |
|
"neg": 0.8024786986831913, |
|
"neu": 0.7916030534351146, |
|
"pos": 0.5773955773955775, |
|
"q": 0.3373493975903615 |
|
}, |
|
"eval_loss": 1.1896393299102783, |
|
"eval_macro_average_f1": 0.6272066817760612, |
|
"eval_micro_average_f1": 0.7504159733777038, |
|
"eval_runtime": 7.423, |
|
"eval_samples_per_second": 323.857, |
|
"eval_steps_per_second": 20.342, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 4.437869822485207, |
|
"grad_norm": 0.9506312608718872, |
|
"learning_rate": 8.708219178082192e-06, |
|
"loss": 0.1324, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.437869822485207, |
|
"eval_class_f1": { |
|
"neg": 0.8024316109422492, |
|
"neu": 0.7925840092699884, |
|
"pos": 0.5878048780487805, |
|
"q": 0.28915662650602414 |
|
}, |
|
"eval_loss": 1.2535008192062378, |
|
"eval_macro_average_f1": 0.6179942811917606, |
|
"eval_micro_average_f1": 0.7516638935108153, |
|
"eval_runtime": 7.3808, |
|
"eval_samples_per_second": 325.711, |
|
"eval_steps_per_second": 20.459, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 4.511834319526627, |
|
"eval_class_f1": { |
|
"neg": 0.7901821060965954, |
|
"neu": 0.7819374758780393, |
|
"pos": 0.5821064552661382, |
|
"q": 0.2535211267605634 |
|
}, |
|
"eval_loss": 1.2182434797286987, |
|
"eval_macro_average_f1": 0.601936791000334, |
|
"eval_micro_average_f1": 0.7396006655574043, |
|
"eval_runtime": 7.3498, |
|
"eval_samples_per_second": 327.085, |
|
"eval_steps_per_second": 20.545, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 4.585798816568047, |
|
"eval_class_f1": { |
|
"neg": 0.7945425361155697, |
|
"neu": 0.7956989247311828, |
|
"pos": 0.5761006289308177, |
|
"q": 0.22857142857142854 |
|
}, |
|
"eval_loss": 1.2836171388626099, |
|
"eval_macro_average_f1": 0.5987283795872497, |
|
"eval_micro_average_f1": 0.7508319467554077, |
|
"eval_runtime": 7.4332, |
|
"eval_samples_per_second": 323.412, |
|
"eval_steps_per_second": 20.314, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 4.659763313609467, |
|
"eval_class_f1": { |
|
"neg": 0.8003025718608169, |
|
"neu": 0.7733970529669454, |
|
"pos": 0.5691609977324262, |
|
"q": 0.3225806451612903 |
|
}, |
|
"eval_loss": 1.2842472791671753, |
|
"eval_macro_average_f1": 0.6163603169303697, |
|
"eval_micro_average_f1": 0.7346089850249584, |
|
"eval_runtime": 7.4941, |
|
"eval_samples_per_second": 320.786, |
|
"eval_steps_per_second": 20.149, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 4.733727810650888, |
|
"eval_class_f1": { |
|
"neg": 0.799375487900078, |
|
"neu": 0.7936865839909809, |
|
"pos": 0.5614489003880984, |
|
"q": 0.3225806451612903 |
|
}, |
|
"eval_loss": 1.3067219257354736, |
|
"eval_macro_average_f1": 0.619272904360112, |
|
"eval_micro_average_f1": 0.7487520798668885, |
|
"eval_runtime": 7.3805, |
|
"eval_samples_per_second": 325.723, |
|
"eval_steps_per_second": 20.459, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 4.8076923076923075, |
|
"grad_norm": 0.2732117772102356, |
|
"learning_rate": 6.653424657534246e-06, |
|
"loss": 0.1441, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.8076923076923075, |
|
"eval_class_f1": { |
|
"neg": 0.8027628549501151, |
|
"neu": 0.7751572327044026, |
|
"pos": 0.5694760820045559, |
|
"q": 0.3132530120481927 |
|
}, |
|
"eval_loss": 1.2718240022659302, |
|
"eval_macro_average_f1": 0.6151622954268166, |
|
"eval_micro_average_f1": 0.7371048252911814, |
|
"eval_runtime": 7.3822, |
|
"eval_samples_per_second": 325.649, |
|
"eval_steps_per_second": 20.455, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 4.881656804733728, |
|
"eval_class_f1": { |
|
"neg": 0.796875, |
|
"neu": 0.7868601986249045, |
|
"pos": 0.5735115431348725, |
|
"q": 0.29885057471264365 |
|
}, |
|
"eval_loss": 1.261472225189209, |
|
"eval_macro_average_f1": 0.6140243291181051, |
|
"eval_micro_average_f1": 0.7441763727121464, |
|
"eval_runtime": 7.5114, |
|
"eval_samples_per_second": 320.048, |
|
"eval_steps_per_second": 20.103, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 4.955621301775148, |
|
"eval_class_f1": { |
|
"neg": 0.7930763178599529, |
|
"neu": 0.7766536964980545, |
|
"pos": 0.5714285714285715, |
|
"q": 0.35294117647058826 |
|
}, |
|
"eval_loss": 1.2753080129623413, |
|
"eval_macro_average_f1": 0.6235249405642919, |
|
"eval_micro_average_f1": 0.7358569051580699, |
|
"eval_runtime": 7.3656, |
|
"eval_samples_per_second": 326.381, |
|
"eval_steps_per_second": 20.501, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 5.029585798816568, |
|
"eval_class_f1": { |
|
"neg": 0.7962962962962963, |
|
"neu": 0.7754943776657619, |
|
"pos": 0.5657276995305165, |
|
"q": 0.345679012345679 |
|
}, |
|
"eval_loss": 1.3079357147216797, |
|
"eval_macro_average_f1": 0.6207993464595634, |
|
"eval_micro_average_f1": 0.7366888519134775, |
|
"eval_runtime": 7.4026, |
|
"eval_samples_per_second": 324.753, |
|
"eval_steps_per_second": 20.398, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 5.103550295857988, |
|
"eval_class_f1": { |
|
"neg": 0.7972136222910216, |
|
"neu": 0.7786790266512167, |
|
"pos": 0.5721040189125295, |
|
"q": 0.3703703703703704 |
|
}, |
|
"eval_loss": 1.3499900102615356, |
|
"eval_macro_average_f1": 0.6295917595562845, |
|
"eval_micro_average_f1": 0.740432612312812, |
|
"eval_runtime": 7.3777, |
|
"eval_samples_per_second": 325.846, |
|
"eval_steps_per_second": 20.467, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 5.177514792899408, |
|
"grad_norm": 11.024497985839844, |
|
"learning_rate": 4.598630136986302e-06, |
|
"loss": 0.1111, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.177514792899408, |
|
"eval_class_f1": { |
|
"neg": 0.7956147220046985, |
|
"neu": 0.7807853602744949, |
|
"pos": 0.5693606755126658, |
|
"q": 0.3544303797468354 |
|
}, |
|
"eval_loss": 1.4051584005355835, |
|
"eval_macro_average_f1": 0.6250477843846737, |
|
"eval_micro_average_f1": 0.7412645590682196, |
|
"eval_runtime": 7.2531, |
|
"eval_samples_per_second": 331.444, |
|
"eval_steps_per_second": 20.819, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 5.2514792899408285, |
|
"eval_class_f1": { |
|
"neg": 0.7925407925407926, |
|
"neu": 0.7769230769230769, |
|
"pos": 0.5737898465171192, |
|
"q": 0.27027027027027023 |
|
}, |
|
"eval_loss": 1.4020917415618896, |
|
"eval_macro_average_f1": 0.6033809965628147, |
|
"eval_micro_average_f1": 0.7375207986688852, |
|
"eval_runtime": 7.404, |
|
"eval_samples_per_second": 324.69, |
|
"eval_steps_per_second": 20.394, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 5.325443786982248, |
|
"eval_class_f1": { |
|
"neg": 0.7949326999208235, |
|
"neu": 0.7753846153846154, |
|
"pos": 0.5727482678983833, |
|
"q": 0.27848101265822783 |
|
}, |
|
"eval_loss": 1.4238033294677734, |
|
"eval_macro_average_f1": 0.6053866489655125, |
|
"eval_micro_average_f1": 0.7358569051580699, |
|
"eval_runtime": 7.4328, |
|
"eval_samples_per_second": 323.433, |
|
"eval_steps_per_second": 20.315, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 5.399408284023669, |
|
"eval_class_f1": { |
|
"neg": 0.7969348659003831, |
|
"neu": 0.7798306389530408, |
|
"pos": 0.5721212121212121, |
|
"q": 0.3 |
|
}, |
|
"eval_loss": 1.4431192874908447, |
|
"eval_macro_average_f1": 0.612221679243659, |
|
"eval_micro_average_f1": 0.7408485856905158, |
|
"eval_runtime": 7.3682, |
|
"eval_samples_per_second": 326.266, |
|
"eval_steps_per_second": 20.493, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 5.4733727810650885, |
|
"eval_class_f1": { |
|
"neg": 0.7940717628705148, |
|
"neu": 0.783072817384674, |
|
"pos": 0.5703883495145632, |
|
"q": 0.3037974683544304 |
|
}, |
|
"eval_loss": 1.4316595792770386, |
|
"eval_macro_average_f1": 0.6128325995310456, |
|
"eval_micro_average_f1": 0.7416805324459235, |
|
"eval_runtime": 7.3736, |
|
"eval_samples_per_second": 326.03, |
|
"eval_steps_per_second": 20.479, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 5.547337278106509, |
|
"grad_norm": 0.4265735149383545, |
|
"learning_rate": 2.543835616438356e-06, |
|
"loss": 0.0933, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.547337278106509, |
|
"eval_class_f1": { |
|
"neg": 0.7978311386522074, |
|
"neu": 0.7788089713843775, |
|
"pos": 0.567409144196952, |
|
"q": 0.30769230769230765 |
|
}, |
|
"eval_loss": 1.4399964809417725, |
|
"eval_macro_average_f1": 0.6129353904814612, |
|
"eval_micro_average_f1": 0.7387687188019967, |
|
"eval_runtime": 7.2697, |
|
"eval_samples_per_second": 330.689, |
|
"eval_steps_per_second": 20.771, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 5.621301775147929, |
|
"eval_class_f1": { |
|
"neg": 0.7984375, |
|
"neu": 0.7812379853902346, |
|
"pos": 0.5714285714285714, |
|
"q": 0.32500000000000007 |
|
}, |
|
"eval_loss": 1.4240373373031616, |
|
"eval_macro_average_f1": 0.6190260142047015, |
|
"eval_micro_average_f1": 0.7412645590682196, |
|
"eval_runtime": 7.4341, |
|
"eval_samples_per_second": 323.375, |
|
"eval_steps_per_second": 20.312, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 5.695266272189349, |
|
"eval_class_f1": { |
|
"neg": 0.7987470634299139, |
|
"neu": 0.7843286420692278, |
|
"pos": 0.5703883495145632, |
|
"q": 0.30769230769230765 |
|
}, |
|
"eval_loss": 1.4332064390182495, |
|
"eval_macro_average_f1": 0.6152890906765031, |
|
"eval_micro_average_f1": 0.7437603993344426, |
|
"eval_runtime": 7.4434, |
|
"eval_samples_per_second": 322.969, |
|
"eval_steps_per_second": 20.286, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 5.769230769230769, |
|
"eval_class_f1": { |
|
"neg": 0.7981220657276996, |
|
"neu": 0.781874039938556, |
|
"pos": 0.5731132075471698, |
|
"q": 0.30769230769230765 |
|
}, |
|
"eval_loss": 1.4344979524612427, |
|
"eval_macro_average_f1": 0.6152004052264332, |
|
"eval_micro_average_f1": 0.7416805324459235, |
|
"eval_runtime": 7.3808, |
|
"eval_samples_per_second": 325.708, |
|
"eval_steps_per_second": 20.458, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 5.84319526627219, |
|
"eval_class_f1": { |
|
"neg": 0.7990654205607477, |
|
"neu": 0.7815384615384616, |
|
"pos": 0.5724465558194775, |
|
"q": 0.3414634146341463 |
|
}, |
|
"eval_loss": 1.4412455558776855, |
|
"eval_macro_average_f1": 0.6236284631382082, |
|
"eval_micro_average_f1": 0.7420965058236273, |
|
"eval_runtime": 7.3915, |
|
"eval_samples_per_second": 325.237, |
|
"eval_steps_per_second": 20.429, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 5.9171597633136095, |
|
"grad_norm": 16.41318702697754, |
|
"learning_rate": 4.89041095890411e-07, |
|
"loss": 0.1006, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.9171597633136095, |
|
"eval_class_f1": { |
|
"neg": 0.7987519500780033, |
|
"neu": 0.7813098429720413, |
|
"pos": 0.5700598802395208, |
|
"q": 0.32500000000000007 |
|
}, |
|
"eval_loss": 1.4469937086105347, |
|
"eval_macro_average_f1": 0.6187804183223914, |
|
"eval_micro_average_f1": 0.7416805324459235, |
|
"eval_runtime": 7.3689, |
|
"eval_samples_per_second": 326.236, |
|
"eval_steps_per_second": 20.492, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 5.991124260355029, |
|
"eval_class_f1": { |
|
"neg": 0.7990654205607477, |
|
"neu": 0.781441717791411, |
|
"pos": 0.569377990430622, |
|
"q": 0.32500000000000007 |
|
}, |
|
"eval_loss": 1.4454258680343628, |
|
"eval_macro_average_f1": 0.6187212821956952, |
|
"eval_micro_average_f1": 0.7416805324459235, |
|
"eval_runtime": 7.448, |
|
"eval_samples_per_second": 322.77, |
|
"eval_steps_per_second": 20.274, |
|
"step": 8100 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 8112, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 100, |
|
"total_flos": 1.0485727069042368e+16, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|