|
{ |
|
"best_metric": 0.6784613322610911, |
|
"best_model_checkpoint": "/tmp/test-ner1_base/checkpoint-26840", |
|
"epoch": 120.0, |
|
"global_step": 29280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7635791130936762, |
|
"eval_f1": 0.4809298946603705, |
|
"eval_loss": 0.8214002847671509, |
|
"eval_precision": 0.4246311738293778, |
|
"eval_recall": 0.5544388609715243, |
|
"eval_runtime": 2.9656, |
|
"eval_samples_per_second": 374.972, |
|
"eval_steps_per_second": 11.802, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8022709381932683, |
|
"eval_f1": 0.5700258397932816, |
|
"eval_loss": 0.6734200119972229, |
|
"eval_precision": 0.5305435305435305, |
|
"eval_recall": 0.615857063093244, |
|
"eval_runtime": 2.95, |
|
"eval_samples_per_second": 376.943, |
|
"eval_steps_per_second": 11.864, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.9487704918032787e-05, |
|
"loss": 0.9764, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8072087975000596, |
|
"eval_f1": 0.6019592678525393, |
|
"eval_loss": 0.6425491571426392, |
|
"eval_precision": 0.5591475095785441, |
|
"eval_recall": 0.6518704634282524, |
|
"eval_runtime": 2.9986, |
|
"eval_samples_per_second": 370.843, |
|
"eval_steps_per_second": 11.672, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8214737243863457, |
|
"eval_f1": 0.6151294808011226, |
|
"eval_loss": 0.6202793121337891, |
|
"eval_precision": 0.5663612872915198, |
|
"eval_recall": 0.6730876605248465, |
|
"eval_runtime": 2.9638, |
|
"eval_samples_per_second": 375.198, |
|
"eval_steps_per_second": 11.809, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 2.8975409836065577e-05, |
|
"loss": 0.4504, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8170606617208559, |
|
"eval_f1": 0.6304066304066305, |
|
"eval_loss": 0.6483346223831177, |
|
"eval_precision": 0.5879227053140097, |
|
"eval_recall": 0.6795086543830262, |
|
"eval_runtime": 2.9472, |
|
"eval_samples_per_second": 377.302, |
|
"eval_steps_per_second": 11.876, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8137687555163283, |
|
"eval_f1": 0.607830950901181, |
|
"eval_loss": 0.6827735900878906, |
|
"eval_precision": 0.5478377772798566, |
|
"eval_recall": 0.6825795644891123, |
|
"eval_runtime": 2.9631, |
|
"eval_samples_per_second": 375.283, |
|
"eval_steps_per_second": 11.812, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 2.846311475409836e-05, |
|
"loss": 0.2877, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8115025882016174, |
|
"eval_f1": 0.6301228183581126, |
|
"eval_loss": 0.709669828414917, |
|
"eval_precision": 0.5868047194798941, |
|
"eval_recall": 0.6803461753210497, |
|
"eval_runtime": 2.9712, |
|
"eval_samples_per_second": 374.266, |
|
"eval_steps_per_second": 11.78, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8133393764461726, |
|
"eval_f1": 0.6333632978040324, |
|
"eval_loss": 0.7538309097290039, |
|
"eval_precision": 0.5864447086801426, |
|
"eval_recall": 0.6884422110552764, |
|
"eval_runtime": 2.99, |
|
"eval_samples_per_second": 371.902, |
|
"eval_steps_per_second": 11.706, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 8.2, |
|
"learning_rate": 2.795081967213115e-05, |
|
"loss": 0.1968, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.812862288590444, |
|
"eval_f1": 0.6361865177295752, |
|
"eval_loss": 0.7852667570114136, |
|
"eval_precision": 0.594850619383046, |
|
"eval_recall": 0.6836962590731435, |
|
"eval_runtime": 2.9647, |
|
"eval_samples_per_second": 375.08, |
|
"eval_steps_per_second": 11.806, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8093556928508385, |
|
"eval_f1": 0.6372536222425271, |
|
"eval_loss": 0.8311049938201904, |
|
"eval_precision": 0.5984309879872518, |
|
"eval_recall": 0.681462869905081, |
|
"eval_runtime": 3.0899, |
|
"eval_samples_per_second": 359.881, |
|
"eval_steps_per_second": 11.327, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 2.7438524590163935e-05, |
|
"loss": 0.1443, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8189928675365569, |
|
"eval_f1": 0.6475366876310273, |
|
"eval_loss": 0.790969967842102, |
|
"eval_precision": 0.6101234567901235, |
|
"eval_recall": 0.6898380792853155, |
|
"eval_runtime": 2.9631, |
|
"eval_samples_per_second": 375.284, |
|
"eval_steps_per_second": 11.812, |
|
"step": 2684 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8147229312277856, |
|
"eval_f1": 0.6334806342604627, |
|
"eval_loss": 0.8414269089698792, |
|
"eval_precision": 0.5926556420233463, |
|
"eval_recall": 0.6803461753210497, |
|
"eval_runtime": 2.9571, |
|
"eval_samples_per_second": 376.042, |
|
"eval_steps_per_second": 11.836, |
|
"step": 2928 |
|
}, |
|
{ |
|
"epoch": 12.3, |
|
"learning_rate": 2.6926229508196725e-05, |
|
"loss": 0.1118, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8069225447866225, |
|
"eval_f1": 0.6412532637075719, |
|
"eval_loss": 0.8946433067321777, |
|
"eval_precision": 0.6022560078469839, |
|
"eval_recall": 0.6856504745951982, |
|
"eval_runtime": 2.9859, |
|
"eval_samples_per_second": 372.42, |
|
"eval_steps_per_second": 11.722, |
|
"step": 3172 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8129815605543761, |
|
"eval_f1": 0.6424639580602883, |
|
"eval_loss": 0.9194995760917664, |
|
"eval_precision": 0.6054841897233202, |
|
"eval_recall": 0.6842546063651591, |
|
"eval_runtime": 2.9708, |
|
"eval_samples_per_second": 374.309, |
|
"eval_steps_per_second": 11.781, |
|
"step": 3416 |
|
}, |
|
{ |
|
"epoch": 14.34, |
|
"learning_rate": 2.6413934426229508e-05, |
|
"loss": 0.0838, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8192314114644211, |
|
"eval_f1": 0.6421779340888367, |
|
"eval_loss": 0.9149069786071777, |
|
"eval_precision": 0.6019536019536019, |
|
"eval_recall": 0.6881630374092685, |
|
"eval_runtime": 2.9929, |
|
"eval_samples_per_second": 371.552, |
|
"eval_steps_per_second": 11.695, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8201140239975191, |
|
"eval_f1": 0.6456589958158997, |
|
"eval_loss": 0.9356908798217773, |
|
"eval_precision": 0.6072306935563208, |
|
"eval_recall": 0.6892797319932998, |
|
"eval_runtime": 2.9882, |
|
"eval_samples_per_second": 372.132, |
|
"eval_steps_per_second": 11.713, |
|
"step": 3904 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"learning_rate": 2.5901639344262294e-05, |
|
"loss": 0.0661, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8172514968631474, |
|
"eval_f1": 0.6432016686220832, |
|
"eval_loss": 0.9784498810768127, |
|
"eval_precision": 0.60332599657618, |
|
"eval_recall": 0.6887213847012842, |
|
"eval_runtime": 2.968, |
|
"eval_samples_per_second": 374.663, |
|
"eval_steps_per_second": 11.792, |
|
"step": 4148 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8184442165024689, |
|
"eval_f1": 0.6472832522036575, |
|
"eval_loss": 0.9842237234115601, |
|
"eval_precision": 0.6120925603383927, |
|
"eval_recall": 0.6867671691792295, |
|
"eval_runtime": 2.981, |
|
"eval_samples_per_second": 373.035, |
|
"eval_steps_per_second": 11.741, |
|
"step": 4392 |
|
}, |
|
{ |
|
"epoch": 18.44, |
|
"learning_rate": 2.5389344262295083e-05, |
|
"loss": 0.0514, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8163688843300494, |
|
"eval_f1": 0.6476140534871526, |
|
"eval_loss": 1.0097302198410034, |
|
"eval_precision": 0.6104794859120118, |
|
"eval_recall": 0.6895589056393077, |
|
"eval_runtime": 2.9667, |
|
"eval_samples_per_second": 374.823, |
|
"eval_steps_per_second": 11.797, |
|
"step": 4636 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8167028458290594, |
|
"eval_f1": 0.648761408083442, |
|
"eval_loss": 1.0300242900848389, |
|
"eval_precision": 0.6086105675146771, |
|
"eval_recall": 0.6945840312674484, |
|
"eval_runtime": 3.032, |
|
"eval_samples_per_second": 366.754, |
|
"eval_steps_per_second": 11.544, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 20.49, |
|
"learning_rate": 2.487704918032787e-05, |
|
"loss": 0.0416, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.8204718398893156, |
|
"eval_f1": 0.652764946548766, |
|
"eval_loss": 1.0250210762023926, |
|
"eval_precision": 0.6190237797246558, |
|
"eval_recall": 0.6903964265773311, |
|
"eval_runtime": 2.9697, |
|
"eval_samples_per_second": 374.455, |
|
"eval_steps_per_second": 11.786, |
|
"step": 5124 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.8167267002218459, |
|
"eval_f1": 0.6531738730450781, |
|
"eval_loss": 1.087920069694519, |
|
"eval_precision": 0.6170846784206605, |
|
"eval_recall": 0.6937465103294249, |
|
"eval_runtime": 2.983, |
|
"eval_samples_per_second": 372.773, |
|
"eval_steps_per_second": 11.733, |
|
"step": 5368 |
|
}, |
|
{ |
|
"epoch": 22.54, |
|
"learning_rate": 2.436475409836066e-05, |
|
"loss": 0.0324, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.8127191622337253, |
|
"eval_f1": 0.6434782608695652, |
|
"eval_loss": 1.134914755821228, |
|
"eval_precision": 0.6092814371257484, |
|
"eval_recall": 0.6817420435510888, |
|
"eval_runtime": 2.9583, |
|
"eval_samples_per_second": 375.891, |
|
"eval_steps_per_second": 11.831, |
|
"step": 5612 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8181102550034589, |
|
"eval_f1": 0.6542006847511194, |
|
"eval_loss": 1.0993841886520386, |
|
"eval_precision": 0.6191425722831505, |
|
"eval_recall": 0.6934673366834171, |
|
"eval_runtime": 2.985, |
|
"eval_samples_per_second": 372.527, |
|
"eval_steps_per_second": 11.725, |
|
"step": 5856 |
|
}, |
|
{ |
|
"epoch": 24.59, |
|
"learning_rate": 2.3852459016393442e-05, |
|
"loss": 0.0277, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.8152715822618736, |
|
"eval_f1": 0.6543161214032323, |
|
"eval_loss": 1.1400500535964966, |
|
"eval_precision": 0.6180193596425912, |
|
"eval_recall": 0.695142378559464, |
|
"eval_runtime": 3.0938, |
|
"eval_samples_per_second": 359.431, |
|
"eval_steps_per_second": 11.313, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.808449225924954, |
|
"eval_f1": 0.6410887880751782, |
|
"eval_loss": 1.1867998838424683, |
|
"eval_precision": 0.5983547060246794, |
|
"eval_recall": 0.6903964265773311, |
|
"eval_runtime": 2.9903, |
|
"eval_samples_per_second": 371.869, |
|
"eval_steps_per_second": 11.704, |
|
"step": 6344 |
|
}, |
|
{ |
|
"epoch": 26.64, |
|
"learning_rate": 2.3340163934426228e-05, |
|
"loss": 0.0223, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.8138880274802605, |
|
"eval_f1": 0.6558120912851995, |
|
"eval_loss": 1.205185890197754, |
|
"eval_precision": 0.628228074661212, |
|
"eval_recall": 0.6859296482412061, |
|
"eval_runtime": 2.9746, |
|
"eval_samples_per_second": 373.828, |
|
"eval_steps_per_second": 11.766, |
|
"step": 6588 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.81529543665466, |
|
"eval_f1": 0.6529110264160862, |
|
"eval_loss": 1.1963977813720703, |
|
"eval_precision": 0.6168363546064067, |
|
"eval_recall": 0.6934673366834171, |
|
"eval_runtime": 2.9694, |
|
"eval_samples_per_second": 374.481, |
|
"eval_steps_per_second": 11.787, |
|
"step": 6832 |
|
}, |
|
{ |
|
"epoch": 28.69, |
|
"learning_rate": 2.2827868852459018e-05, |
|
"loss": 0.019, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.820161732783092, |
|
"eval_f1": 0.6533368644067797, |
|
"eval_loss": 1.1897813081741333, |
|
"eval_precision": 0.6214105793450881, |
|
"eval_recall": 0.6887213847012842, |
|
"eval_runtime": 2.9761, |
|
"eval_samples_per_second": 373.647, |
|
"eval_steps_per_second": 11.76, |
|
"step": 7076 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.8134586484101047, |
|
"eval_f1": 0.6515647505565013, |
|
"eval_loss": 1.2819464206695557, |
|
"eval_precision": 0.6135635018495684, |
|
"eval_recall": 0.6945840312674484, |
|
"eval_runtime": 2.9771, |
|
"eval_samples_per_second": 373.523, |
|
"eval_steps_per_second": 11.757, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 30.74, |
|
"learning_rate": 2.2315573770491804e-05, |
|
"loss": 0.0159, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.8127907254120846, |
|
"eval_f1": 0.6476589066178393, |
|
"eval_loss": 1.2686526775360107, |
|
"eval_precision": 0.609251968503937, |
|
"eval_recall": 0.6912339475153545, |
|
"eval_runtime": 3.0513, |
|
"eval_samples_per_second": 364.436, |
|
"eval_steps_per_second": 11.471, |
|
"step": 7564 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.8158917964743208, |
|
"eval_f1": 0.6522136607026251, |
|
"eval_loss": 1.2997089624404907, |
|
"eval_precision": 0.612760736196319, |
|
"eval_recall": 0.6970965940815187, |
|
"eval_runtime": 3.0022, |
|
"eval_samples_per_second": 370.396, |
|
"eval_steps_per_second": 11.658, |
|
"step": 7808 |
|
}, |
|
{ |
|
"epoch": 32.79, |
|
"learning_rate": 2.180327868852459e-05, |
|
"loss": 0.0141, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.8157248157248157, |
|
"eval_f1": 0.6490136369654441, |
|
"eval_loss": 1.2799758911132812, |
|
"eval_precision": 0.6172248803827751, |
|
"eval_recall": 0.6842546063651591, |
|
"eval_runtime": 2.9595, |
|
"eval_samples_per_second": 375.745, |
|
"eval_steps_per_second": 11.827, |
|
"step": 8052 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.8141265714081248, |
|
"eval_f1": 0.6543144520910535, |
|
"eval_loss": 1.3110435009002686, |
|
"eval_precision": 0.6220432813286362, |
|
"eval_recall": 0.6901172529313233, |
|
"eval_runtime": 2.9482, |
|
"eval_samples_per_second": 377.185, |
|
"eval_steps_per_second": 11.872, |
|
"step": 8296 |
|
}, |
|
{ |
|
"epoch": 34.84, |
|
"learning_rate": 2.1290983606557376e-05, |
|
"loss": 0.0107, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.8159633596526801, |
|
"eval_f1": 0.6468897020386828, |
|
"eval_loss": 1.3342597484588623, |
|
"eval_precision": 0.6081081081081081, |
|
"eval_recall": 0.6909547738693468, |
|
"eval_runtime": 2.9541, |
|
"eval_samples_per_second": 376.43, |
|
"eval_steps_per_second": 11.848, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8130292693399489, |
|
"eval_f1": 0.6493064642763675, |
|
"eval_loss": 1.3406134843826294, |
|
"eval_precision": 0.6110837438423645, |
|
"eval_recall": 0.6926298157453936, |
|
"eval_runtime": 2.9685, |
|
"eval_samples_per_second": 374.606, |
|
"eval_steps_per_second": 11.791, |
|
"step": 8784 |
|
}, |
|
{ |
|
"epoch": 36.89, |
|
"learning_rate": 2.0778688524590166e-05, |
|
"loss": 0.0106, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.8127430166265118, |
|
"eval_f1": 0.6453556923883139, |
|
"eval_loss": 1.3921236991882324, |
|
"eval_precision": 0.6079980251789682, |
|
"eval_recall": 0.6876046901172529, |
|
"eval_runtime": 2.9551, |
|
"eval_samples_per_second": 376.302, |
|
"eval_steps_per_second": 11.844, |
|
"step": 9028 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.8099997614560721, |
|
"eval_f1": 0.6453305351521511, |
|
"eval_loss": 1.4060559272766113, |
|
"eval_precision": 0.6086095992083127, |
|
"eval_recall": 0.6867671691792295, |
|
"eval_runtime": 2.9714, |
|
"eval_samples_per_second": 374.231, |
|
"eval_steps_per_second": 11.779, |
|
"step": 9272 |
|
}, |
|
{ |
|
"epoch": 38.93, |
|
"learning_rate": 2.0266393442622952e-05, |
|
"loss": 0.0088, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.8165835738651273, |
|
"eval_f1": 0.6592208482914507, |
|
"eval_loss": 1.382816195487976, |
|
"eval_precision": 0.6293475501396294, |
|
"eval_recall": 0.692071468453378, |
|
"eval_runtime": 2.9744, |
|
"eval_samples_per_second": 373.857, |
|
"eval_steps_per_second": 11.767, |
|
"step": 9516 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.8129577061615897, |
|
"eval_f1": 0.6572372769332453, |
|
"eval_loss": 1.42629873752594, |
|
"eval_precision": 0.6241526487572182, |
|
"eval_recall": 0.6940256839754327, |
|
"eval_runtime": 3.0851, |
|
"eval_samples_per_second": 360.448, |
|
"eval_steps_per_second": 11.345, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 40.98, |
|
"learning_rate": 1.975409836065574e-05, |
|
"loss": 0.0086, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.8185157796808282, |
|
"eval_f1": 0.6573940427765386, |
|
"eval_loss": 1.3521106243133545, |
|
"eval_precision": 0.620203020549641, |
|
"eval_recall": 0.6993299832495813, |
|
"eval_runtime": 2.9673, |
|
"eval_samples_per_second": 374.747, |
|
"eval_steps_per_second": 11.795, |
|
"step": 10004 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.8196369361417906, |
|
"eval_f1": 0.6713979646491698, |
|
"eval_loss": 1.372209072113037, |
|
"eval_precision": 0.6451363870303655, |
|
"eval_recall": 0.6998883305415968, |
|
"eval_runtime": 2.9625, |
|
"eval_samples_per_second": 375.361, |
|
"eval_steps_per_second": 11.814, |
|
"step": 10248 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.822594880847308, |
|
"eval_f1": 0.6647074539139727, |
|
"eval_loss": 1.3783916234970093, |
|
"eval_precision": 0.6372950819672131, |
|
"eval_recall": 0.6945840312674484, |
|
"eval_runtime": 2.9632, |
|
"eval_samples_per_second": 375.273, |
|
"eval_steps_per_second": 11.812, |
|
"step": 10492 |
|
}, |
|
{ |
|
"epoch": 43.03, |
|
"learning_rate": 1.9241803278688525e-05, |
|
"loss": 0.0075, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8140072994441927, |
|
"eval_f1": 0.6623151725056613, |
|
"eval_loss": 1.433977484703064, |
|
"eval_precision": 0.6333757961783439, |
|
"eval_recall": 0.6940256839754327, |
|
"eval_runtime": 2.9882, |
|
"eval_samples_per_second": 372.133, |
|
"eval_steps_per_second": 11.713, |
|
"step": 10736 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.816130340402185, |
|
"eval_f1": 0.6643708609271523, |
|
"eval_loss": 1.390194058418274, |
|
"eval_precision": 0.6320564516129032, |
|
"eval_recall": 0.7001675041876047, |
|
"eval_runtime": 2.9941, |
|
"eval_samples_per_second": 371.401, |
|
"eval_steps_per_second": 11.69, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 45.08, |
|
"learning_rate": 1.872950819672131e-05, |
|
"loss": 0.0066, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.8162496123661173, |
|
"eval_f1": 0.6585943669386681, |
|
"eval_loss": 1.401918888092041, |
|
"eval_precision": 0.62300796812749, |
|
"eval_recall": 0.6984924623115578, |
|
"eval_runtime": 2.9812, |
|
"eval_samples_per_second": 373.006, |
|
"eval_steps_per_second": 11.74, |
|
"step": 11224 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.816130340402185, |
|
"eval_f1": 0.6548463356973996, |
|
"eval_loss": 1.431990623474121, |
|
"eval_precision": 0.6183035714285714, |
|
"eval_recall": 0.6959798994974874, |
|
"eval_runtime": 2.9274, |
|
"eval_samples_per_second": 379.864, |
|
"eval_steps_per_second": 11.956, |
|
"step": 11468 |
|
}, |
|
{ |
|
"epoch": 47.13, |
|
"learning_rate": 1.82172131147541e-05, |
|
"loss": 0.0067, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8200424608191599, |
|
"eval_f1": 0.6645460569913849, |
|
"eval_loss": 1.4461051225662231, |
|
"eval_precision": 0.6326015644713601, |
|
"eval_recall": 0.6998883305415968, |
|
"eval_runtime": 3.0764, |
|
"eval_samples_per_second": 361.458, |
|
"eval_steps_per_second": 11.377, |
|
"step": 11712 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.8202332959614513, |
|
"eval_f1": 0.6595464135021096, |
|
"eval_loss": 1.432677984237671, |
|
"eval_precision": 0.6249375312343828, |
|
"eval_recall": 0.69821328866555, |
|
"eval_runtime": 3.0963, |
|
"eval_samples_per_second": 359.141, |
|
"eval_steps_per_second": 11.304, |
|
"step": 11956 |
|
}, |
|
{ |
|
"epoch": 49.18, |
|
"learning_rate": 1.7704918032786887e-05, |
|
"loss": 0.0054, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.8176331671477303, |
|
"eval_f1": 0.6632, |
|
"eval_loss": 1.4615715742111206, |
|
"eval_precision": 0.6347626339969372, |
|
"eval_recall": 0.6943048576214406, |
|
"eval_runtime": 3.0477, |
|
"eval_samples_per_second": 364.866, |
|
"eval_steps_per_second": 11.484, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.8177047303260896, |
|
"eval_f1": 0.6543322475570033, |
|
"eval_loss": 1.4536991119384766, |
|
"eval_precision": 0.6134864402638651, |
|
"eval_recall": 0.7010050251256281, |
|
"eval_runtime": 2.9579, |
|
"eval_samples_per_second": 375.937, |
|
"eval_steps_per_second": 11.833, |
|
"step": 12444 |
|
}, |
|
{ |
|
"epoch": 51.23, |
|
"learning_rate": 1.7192622950819673e-05, |
|
"loss": 0.0052, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8095703823859164, |
|
"eval_f1": 0.6560255387071029, |
|
"eval_loss": 1.5621511936187744, |
|
"eval_precision": 0.6265243902439024, |
|
"eval_recall": 0.6884422110552764, |
|
"eval_runtime": 3.0041, |
|
"eval_samples_per_second": 370.165, |
|
"eval_steps_per_second": 11.651, |
|
"step": 12688 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.8235967653443381, |
|
"eval_f1": 0.6670200079501789, |
|
"eval_loss": 1.4217201471328735, |
|
"eval_precision": 0.6348045397225726, |
|
"eval_recall": 0.7026800670016751, |
|
"eval_runtime": 3.0478, |
|
"eval_samples_per_second": 364.848, |
|
"eval_steps_per_second": 11.484, |
|
"step": 12932 |
|
}, |
|
{ |
|
"epoch": 53.28, |
|
"learning_rate": 1.668032786885246e-05, |
|
"loss": 0.0051, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.8217361227069965, |
|
"eval_f1": 0.6597038603913273, |
|
"eval_loss": 1.4624608755111694, |
|
"eval_precision": 0.6265695630336514, |
|
"eval_recall": 0.696538246789503, |
|
"eval_runtime": 3.0888, |
|
"eval_samples_per_second": 360.016, |
|
"eval_steps_per_second": 11.331, |
|
"step": 13176 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.8257198063023306, |
|
"eval_f1": 0.6645213193885761, |
|
"eval_loss": 1.4358925819396973, |
|
"eval_precision": 0.6393188854489165, |
|
"eval_recall": 0.6917922948073701, |
|
"eval_runtime": 3.0221, |
|
"eval_samples_per_second": 367.953, |
|
"eval_steps_per_second": 11.581, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 55.33, |
|
"learning_rate": 1.6168032786885245e-05, |
|
"loss": 0.0049, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8230719687030367, |
|
"eval_f1": 0.6701528559935639, |
|
"eval_loss": 1.4616944789886475, |
|
"eval_precision": 0.6447368421052632, |
|
"eval_recall": 0.6976549413735343, |
|
"eval_runtime": 3.0092, |
|
"eval_samples_per_second": 369.528, |
|
"eval_steps_per_second": 11.631, |
|
"step": 13664 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.8181102550034589, |
|
"eval_f1": 0.6630275595792837, |
|
"eval_loss": 1.5170940160751343, |
|
"eval_precision": 0.6337490455586663, |
|
"eval_recall": 0.695142378559464, |
|
"eval_runtime": 2.9576, |
|
"eval_samples_per_second": 375.979, |
|
"eval_steps_per_second": 11.834, |
|
"step": 13908 |
|
}, |
|
{ |
|
"epoch": 57.38, |
|
"learning_rate": 1.5655737704918035e-05, |
|
"loss": 0.0037, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.8205672574604613, |
|
"eval_f1": 0.6667548967707781, |
|
"eval_loss": 1.4998589754104614, |
|
"eval_precision": 0.6338701560140916, |
|
"eval_recall": 0.7032384142936907, |
|
"eval_runtime": 2.9519, |
|
"eval_samples_per_second": 376.709, |
|
"eval_steps_per_second": 11.857, |
|
"step": 14152 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.8208296557811121, |
|
"eval_f1": 0.6617453203269179, |
|
"eval_loss": 1.484113335609436, |
|
"eval_precision": 0.6268731268731269, |
|
"eval_recall": 0.7007258514796203, |
|
"eval_runtime": 2.9535, |
|
"eval_samples_per_second": 376.498, |
|
"eval_steps_per_second": 11.85, |
|
"step": 14396 |
|
}, |
|
{ |
|
"epoch": 59.43, |
|
"learning_rate": 1.514344262295082e-05, |
|
"loss": 0.004, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8244316690918633, |
|
"eval_f1": 0.6695859872611465, |
|
"eval_loss": 1.436055302619934, |
|
"eval_precision": 0.6380880121396054, |
|
"eval_recall": 0.7043551088777219, |
|
"eval_runtime": 2.9713, |
|
"eval_samples_per_second": 374.253, |
|
"eval_steps_per_second": 11.78, |
|
"step": 14640 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.8235252021659789, |
|
"eval_f1": 0.6716417910447762, |
|
"eval_loss": 1.4800474643707275, |
|
"eval_precision": 0.6425293217746048, |
|
"eval_recall": 0.7035175879396985, |
|
"eval_runtime": 2.9663, |
|
"eval_samples_per_second": 374.881, |
|
"eval_steps_per_second": 11.799, |
|
"step": 14884 |
|
}, |
|
{ |
|
"epoch": 61.48, |
|
"learning_rate": 1.4631147540983607e-05, |
|
"loss": 0.004, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.8240977075928533, |
|
"eval_f1": 0.664367206155479, |
|
"eval_loss": 1.4699968099594116, |
|
"eval_precision": 0.6329625884732053, |
|
"eval_recall": 0.6990508096035735, |
|
"eval_runtime": 2.9625, |
|
"eval_samples_per_second": 375.361, |
|
"eval_steps_per_second": 11.814, |
|
"step": 15128 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.821211326065695, |
|
"eval_f1": 0.6643754130865829, |
|
"eval_loss": 1.5107179880142212, |
|
"eval_precision": 0.6309314586994728, |
|
"eval_recall": 0.7015633724176438, |
|
"eval_runtime": 2.9695, |
|
"eval_samples_per_second": 374.47, |
|
"eval_steps_per_second": 11.786, |
|
"step": 15372 |
|
}, |
|
{ |
|
"epoch": 63.52, |
|
"learning_rate": 1.4118852459016394e-05, |
|
"loss": 0.0037, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.8227141528112402, |
|
"eval_f1": 0.6691489361702128, |
|
"eval_loss": 1.5131914615631104, |
|
"eval_precision": 0.6389029964448959, |
|
"eval_recall": 0.7024008933556672, |
|
"eval_runtime": 2.9514, |
|
"eval_samples_per_second": 376.769, |
|
"eval_steps_per_second": 11.859, |
|
"step": 15616 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.8239307268433482, |
|
"eval_f1": 0.6631481725821349, |
|
"eval_loss": 1.5229130983352661, |
|
"eval_precision": 0.6287215411558669, |
|
"eval_recall": 0.7015633724176438, |
|
"eval_runtime": 3.0184, |
|
"eval_samples_per_second": 368.409, |
|
"eval_steps_per_second": 11.596, |
|
"step": 15860 |
|
}, |
|
{ |
|
"epoch": 65.57, |
|
"learning_rate": 1.3606557377049181e-05, |
|
"loss": 0.0033, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.8242408339495718, |
|
"eval_f1": 0.6695929768555469, |
|
"eval_loss": 1.5573978424072266, |
|
"eval_precision": 0.6394817073170732, |
|
"eval_recall": 0.7026800670016751, |
|
"eval_runtime": 2.9796, |
|
"eval_samples_per_second": 373.204, |
|
"eval_steps_per_second": 11.747, |
|
"step": 16104 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.8195892273562176, |
|
"eval_f1": 0.6637761135199055, |
|
"eval_loss": 1.5216217041015625, |
|
"eval_precision": 0.6269545793000745, |
|
"eval_recall": 0.7051926298157454, |
|
"eval_runtime": 2.9602, |
|
"eval_samples_per_second": 375.655, |
|
"eval_steps_per_second": 11.824, |
|
"step": 16348 |
|
}, |
|
{ |
|
"epoch": 67.62, |
|
"learning_rate": 1.3094262295081968e-05, |
|
"loss": 0.0033, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.8242169795567854, |
|
"eval_f1": 0.6635576282478347, |
|
"eval_loss": 1.4876649379730225, |
|
"eval_precision": 0.6347183278103492, |
|
"eval_recall": 0.695142378559464, |
|
"eval_runtime": 2.9571, |
|
"eval_samples_per_second": 376.04, |
|
"eval_steps_per_second": 11.836, |
|
"step": 16592 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.8194699553922855, |
|
"eval_f1": 0.6630635380964935, |
|
"eval_loss": 1.5372997522354126, |
|
"eval_precision": 0.6281218781218781, |
|
"eval_recall": 0.7021217197096594, |
|
"eval_runtime": 3.0929, |
|
"eval_samples_per_second": 359.536, |
|
"eval_steps_per_second": 11.316, |
|
"step": 16836 |
|
}, |
|
{ |
|
"epoch": 69.67, |
|
"learning_rate": 1.2581967213114756e-05, |
|
"loss": 0.0026, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.8200663152119463, |
|
"eval_f1": 0.6651637713831056, |
|
"eval_loss": 1.5521858930587769, |
|
"eval_precision": 0.6334933063905026, |
|
"eval_recall": 0.7001675041876047, |
|
"eval_runtime": 3.0082, |
|
"eval_samples_per_second": 369.652, |
|
"eval_steps_per_second": 11.635, |
|
"step": 17080 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.8226902984184538, |
|
"eval_f1": 0.6691449814126393, |
|
"eval_loss": 1.5180128812789917, |
|
"eval_precision": 0.6379746835443038, |
|
"eval_recall": 0.7035175879396985, |
|
"eval_runtime": 2.9978, |
|
"eval_samples_per_second": 370.939, |
|
"eval_steps_per_second": 11.675, |
|
"step": 17324 |
|
}, |
|
{ |
|
"epoch": 71.72, |
|
"learning_rate": 1.206967213114754e-05, |
|
"loss": 0.0024, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8218076858853558, |
|
"eval_f1": 0.67304324397144, |
|
"eval_loss": 1.5517120361328125, |
|
"eval_precision": 0.6503514709711012, |
|
"eval_recall": 0.6973757677275265, |
|
"eval_runtime": 2.9567, |
|
"eval_samples_per_second": 376.089, |
|
"eval_steps_per_second": 11.837, |
|
"step": 17568 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.8206388206388207, |
|
"eval_f1": 0.6658723854911305, |
|
"eval_loss": 1.539225697517395, |
|
"eval_precision": 0.6331822759315207, |
|
"eval_recall": 0.7021217197096594, |
|
"eval_runtime": 2.9669, |
|
"eval_samples_per_second": 374.796, |
|
"eval_steps_per_second": 11.797, |
|
"step": 17812 |
|
}, |
|
{ |
|
"epoch": 73.77, |
|
"learning_rate": 1.1557377049180328e-05, |
|
"loss": 0.0026, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.8245986498413683, |
|
"eval_f1": 0.669957310565635, |
|
"eval_loss": 1.5395687818527222, |
|
"eval_precision": 0.641543178334185, |
|
"eval_recall": 0.7010050251256281, |
|
"eval_runtime": 2.9773, |
|
"eval_samples_per_second": 373.49, |
|
"eval_steps_per_second": 11.756, |
|
"step": 18056 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.8233343670236875, |
|
"eval_f1": 0.6740153246404087, |
|
"eval_loss": 1.5637731552124023, |
|
"eval_precision": 0.6499870365569095, |
|
"eval_recall": 0.6998883305415968, |
|
"eval_runtime": 2.9662, |
|
"eval_samples_per_second": 374.894, |
|
"eval_steps_per_second": 11.8, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 75.82, |
|
"learning_rate": 1.1045081967213114e-05, |
|
"loss": 0.0019, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.8201855871758784, |
|
"eval_f1": 0.6666666666666666, |
|
"eval_loss": 1.5789735317230225, |
|
"eval_precision": 0.6437857514300572, |
|
"eval_recall": 0.6912339475153545, |
|
"eval_runtime": 2.9784, |
|
"eval_samples_per_second": 373.357, |
|
"eval_steps_per_second": 11.751, |
|
"step": 18544 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.8216168507430643, |
|
"eval_f1": 0.676486341724692, |
|
"eval_loss": 1.5545753240585327, |
|
"eval_precision": 0.6500257334019557, |
|
"eval_recall": 0.7051926298157454, |
|
"eval_runtime": 2.9887, |
|
"eval_samples_per_second": 372.065, |
|
"eval_steps_per_second": 11.711, |
|
"step": 18788 |
|
}, |
|
{ |
|
"epoch": 77.87, |
|
"learning_rate": 1.0532786885245902e-05, |
|
"loss": 0.0029, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.8236206197371246, |
|
"eval_f1": 0.6684357171288311, |
|
"eval_loss": 1.5374187231063843, |
|
"eval_precision": 0.6369152970922882, |
|
"eval_recall": 0.7032384142936907, |
|
"eval_runtime": 2.9659, |
|
"eval_samples_per_second": 374.933, |
|
"eval_steps_per_second": 11.801, |
|
"step": 19032 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.8180148374323132, |
|
"eval_f1": 0.6651595744680852, |
|
"eval_loss": 1.5923025608062744, |
|
"eval_precision": 0.6350939563230066, |
|
"eval_recall": 0.69821328866555, |
|
"eval_runtime": 2.9684, |
|
"eval_samples_per_second": 374.615, |
|
"eval_steps_per_second": 11.791, |
|
"step": 19276 |
|
}, |
|
{ |
|
"epoch": 79.92, |
|
"learning_rate": 1.0020491803278688e-05, |
|
"loss": 0.0015, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.8245986498413683, |
|
"eval_f1": 0.6673737239825004, |
|
"eval_loss": 1.5727756023406982, |
|
"eval_precision": 0.6354455945468316, |
|
"eval_recall": 0.7026800670016751, |
|
"eval_runtime": 2.9791, |
|
"eval_samples_per_second": 373.261, |
|
"eval_steps_per_second": 11.748, |
|
"step": 19520 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.8228811335607452, |
|
"eval_f1": 0.6686279753944906, |
|
"eval_loss": 1.564627766609192, |
|
"eval_precision": 0.6416837782340863, |
|
"eval_recall": 0.6979341150195422, |
|
"eval_runtime": 2.9678, |
|
"eval_samples_per_second": 374.693, |
|
"eval_steps_per_second": 11.793, |
|
"step": 19764 |
|
}, |
|
{ |
|
"epoch": 81.97, |
|
"learning_rate": 9.508196721311476e-06, |
|
"loss": 0.0019, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.8210920541017629, |
|
"eval_f1": 0.6615000656771313, |
|
"eval_loss": 1.5844708681106567, |
|
"eval_precision": 0.6246588935747953, |
|
"eval_recall": 0.7029592406476829, |
|
"eval_runtime": 2.9641, |
|
"eval_samples_per_second": 375.15, |
|
"eval_steps_per_second": 11.808, |
|
"step": 20008 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.819279120249994, |
|
"eval_f1": 0.666935159081756, |
|
"eval_loss": 1.589419960975647, |
|
"eval_precision": 0.6423584173778123, |
|
"eval_recall": 0.6934673366834171, |
|
"eval_runtime": 2.9637, |
|
"eval_samples_per_second": 375.21, |
|
"eval_steps_per_second": 11.81, |
|
"step": 20252 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.8169175353641374, |
|
"eval_f1": 0.665859238325932, |
|
"eval_loss": 1.6702436208724976, |
|
"eval_precision": 0.6427643543777605, |
|
"eval_recall": 0.690675600223339, |
|
"eval_runtime": 2.99, |
|
"eval_samples_per_second": 371.906, |
|
"eval_steps_per_second": 11.706, |
|
"step": 20496 |
|
}, |
|
{ |
|
"epoch": 84.02, |
|
"learning_rate": 8.995901639344264e-06, |
|
"loss": 0.0012, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.8188974499654111, |
|
"eval_f1": 0.6666666666666667, |
|
"eval_loss": 1.6313210725784302, |
|
"eval_precision": 0.6341647770219199, |
|
"eval_recall": 0.7026800670016751, |
|
"eval_runtime": 3.0786, |
|
"eval_samples_per_second": 361.198, |
|
"eval_steps_per_second": 11.369, |
|
"step": 20740 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.8231912406669688, |
|
"eval_f1": 0.6679915209326974, |
|
"eval_loss": 1.5829322338104248, |
|
"eval_precision": 0.6356530509329299, |
|
"eval_recall": 0.7037967615857063, |
|
"eval_runtime": 2.9543, |
|
"eval_samples_per_second": 376.397, |
|
"eval_steps_per_second": 11.847, |
|
"step": 20984 |
|
}, |
|
{ |
|
"epoch": 86.07, |
|
"learning_rate": 8.483606557377049e-06, |
|
"loss": 0.0015, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.8209966365306172, |
|
"eval_f1": 0.672317880794702, |
|
"eval_loss": 1.605576753616333, |
|
"eval_precision": 0.639616935483871, |
|
"eval_recall": 0.7085427135678392, |
|
"eval_runtime": 2.9894, |
|
"eval_samples_per_second": 371.983, |
|
"eval_steps_per_second": 11.708, |
|
"step": 21228 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.8224994632761623, |
|
"eval_f1": 0.6773120425815037, |
|
"eval_loss": 1.5823140144348145, |
|
"eval_precision": 0.6470887363335875, |
|
"eval_recall": 0.7104969290898939, |
|
"eval_runtime": 2.973, |
|
"eval_samples_per_second": 374.028, |
|
"eval_steps_per_second": 11.772, |
|
"step": 21472 |
|
}, |
|
{ |
|
"epoch": 88.11, |
|
"learning_rate": 7.971311475409837e-06, |
|
"loss": 0.0015, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.8249326113403783, |
|
"eval_f1": 0.6675521317572054, |
|
"eval_loss": 1.573556661605835, |
|
"eval_precision": 0.6366860907017988, |
|
"eval_recall": 0.7015633724176438, |
|
"eval_runtime": 2.9619, |
|
"eval_samples_per_second": 375.438, |
|
"eval_steps_per_second": 11.817, |
|
"step": 21716 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.823644474129911, |
|
"eval_f1": 0.6724645437516724, |
|
"eval_loss": 1.5920721292495728, |
|
"eval_precision": 0.64568345323741, |
|
"eval_recall": 0.7015633724176438, |
|
"eval_runtime": 2.9692, |
|
"eval_samples_per_second": 374.506, |
|
"eval_steps_per_second": 11.788, |
|
"step": 21960 |
|
}, |
|
{ |
|
"epoch": 90.16, |
|
"learning_rate": 7.459016393442623e-06, |
|
"loss": 0.0012, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.8230958230958231, |
|
"eval_f1": 0.6684364215556146, |
|
"eval_loss": 1.6113594770431519, |
|
"eval_precision": 0.6371457489878543, |
|
"eval_recall": 0.7029592406476829, |
|
"eval_runtime": 2.9681, |
|
"eval_samples_per_second": 374.651, |
|
"eval_steps_per_second": 11.792, |
|
"step": 22204 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.824527086663009, |
|
"eval_f1": 0.6708355508249069, |
|
"eval_loss": 1.5752336978912354, |
|
"eval_precision": 0.6408235892221658, |
|
"eval_recall": 0.7037967615857063, |
|
"eval_runtime": 2.9615, |
|
"eval_samples_per_second": 375.482, |
|
"eval_steps_per_second": 11.818, |
|
"step": 22448 |
|
}, |
|
{ |
|
"epoch": 92.21, |
|
"learning_rate": 6.946721311475411e-06, |
|
"loss": 0.0014, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.8216884139214237, |
|
"eval_f1": 0.6672859986728599, |
|
"eval_loss": 1.6123404502868652, |
|
"eval_precision": 0.6359726789779914, |
|
"eval_recall": 0.7018425460636516, |
|
"eval_runtime": 3.0214, |
|
"eval_samples_per_second": 368.046, |
|
"eval_steps_per_second": 11.584, |
|
"step": 22692 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.8221177929915794, |
|
"eval_f1": 0.6681721572794899, |
|
"eval_loss": 1.618289589881897, |
|
"eval_precision": 0.6373542828180436, |
|
"eval_recall": 0.7021217197096594, |
|
"eval_runtime": 2.9574, |
|
"eval_samples_per_second": 376.01, |
|
"eval_steps_per_second": 11.835, |
|
"step": 22936 |
|
}, |
|
{ |
|
"epoch": 94.26, |
|
"learning_rate": 6.434426229508197e-06, |
|
"loss": 0.0009, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.8274611769757401, |
|
"eval_f1": 0.6721267454350162, |
|
"eval_loss": 1.6077880859375, |
|
"eval_precision": 0.6474392136575272, |
|
"eval_recall": 0.6987716359575656, |
|
"eval_runtime": 2.9598, |
|
"eval_samples_per_second": 375.703, |
|
"eval_steps_per_second": 11.825, |
|
"step": 23180 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.8245747954485818, |
|
"eval_f1": 0.6682679476914866, |
|
"eval_loss": 1.6201205253601074, |
|
"eval_precision": 0.6400817995910021, |
|
"eval_recall": 0.6990508096035735, |
|
"eval_runtime": 2.9623, |
|
"eval_samples_per_second": 375.385, |
|
"eval_steps_per_second": 11.815, |
|
"step": 23424 |
|
}, |
|
{ |
|
"epoch": 96.31, |
|
"learning_rate": 5.922131147540984e-06, |
|
"loss": 0.0008, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.8238114548794161, |
|
"eval_f1": 0.6687067589143161, |
|
"eval_loss": 1.6216107606887817, |
|
"eval_precision": 0.6387900355871886, |
|
"eval_recall": 0.7015633724176438, |
|
"eval_runtime": 2.9859, |
|
"eval_samples_per_second": 372.411, |
|
"eval_steps_per_second": 11.722, |
|
"step": 23668 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.8243839603062904, |
|
"eval_f1": 0.6703077128013853, |
|
"eval_loss": 1.6113009452819824, |
|
"eval_precision": 0.6410191082802548, |
|
"eval_recall": 0.7024008933556672, |
|
"eval_runtime": 2.9619, |
|
"eval_samples_per_second": 375.438, |
|
"eval_steps_per_second": 11.817, |
|
"step": 23912 |
|
}, |
|
{ |
|
"epoch": 98.36, |
|
"learning_rate": 5.409836065573771e-06, |
|
"loss": 0.0011, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.824527086663009, |
|
"eval_f1": 0.6751609442060086, |
|
"eval_loss": 1.5995452404022217, |
|
"eval_precision": 0.6497160557563242, |
|
"eval_recall": 0.7026800670016751, |
|
"eval_runtime": 3.0743, |
|
"eval_samples_per_second": 361.705, |
|
"eval_steps_per_second": 11.385, |
|
"step": 24156 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.8258629326590492, |
|
"eval_f1": 0.6711105185975204, |
|
"eval_loss": 1.5953351259231567, |
|
"eval_precision": 0.642255677468742, |
|
"eval_recall": 0.7026800670016751, |
|
"eval_runtime": 2.974, |
|
"eval_samples_per_second": 373.912, |
|
"eval_steps_per_second": 11.769, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 100.41, |
|
"learning_rate": 4.897540983606557e-06, |
|
"loss": 0.0009, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.8247894849836598, |
|
"eval_f1": 0.6724552497996259, |
|
"eval_loss": 1.6178245544433594, |
|
"eval_precision": 0.6447233606557377, |
|
"eval_recall": 0.7026800670016751, |
|
"eval_runtime": 3.0886, |
|
"eval_samples_per_second": 360.031, |
|
"eval_steps_per_second": 11.332, |
|
"step": 24644 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.8256720975167577, |
|
"eval_f1": 0.67206585236325, |
|
"eval_loss": 1.6170806884765625, |
|
"eval_precision": 0.640759493670886, |
|
"eval_recall": 0.7065884980457845, |
|
"eval_runtime": 2.967, |
|
"eval_samples_per_second": 374.794, |
|
"eval_steps_per_second": 11.797, |
|
"step": 24888 |
|
}, |
|
{ |
|
"epoch": 102.46, |
|
"learning_rate": 4.385245901639344e-06, |
|
"loss": 0.0006, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.8270795066911572, |
|
"eval_f1": 0.6780794436271232, |
|
"eval_loss": 1.6054375171661377, |
|
"eval_precision": 0.6508344030808729, |
|
"eval_recall": 0.7077051926298158, |
|
"eval_runtime": 2.9599, |
|
"eval_samples_per_second": 375.683, |
|
"eval_steps_per_second": 11.825, |
|
"step": 25132 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.8251234464826698, |
|
"eval_f1": 0.6701319472211115, |
|
"eval_loss": 1.621781826019287, |
|
"eval_precision": 0.6411629686304514, |
|
"eval_recall": 0.7018425460636516, |
|
"eval_runtime": 3.0743, |
|
"eval_samples_per_second": 361.711, |
|
"eval_steps_per_second": 11.385, |
|
"step": 25376 |
|
}, |
|
{ |
|
"epoch": 104.51, |
|
"learning_rate": 3.872950819672131e-06, |
|
"loss": 0.0008, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.8244555234846497, |
|
"eval_f1": 0.6738082485270488, |
|
"eval_loss": 1.6307542324066162, |
|
"eval_precision": 0.6474523932063819, |
|
"eval_recall": 0.7024008933556672, |
|
"eval_runtime": 2.9786, |
|
"eval_samples_per_second": 373.327, |
|
"eval_steps_per_second": 11.75, |
|
"step": 25620 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.8267216907993608, |
|
"eval_f1": 0.6755638596022955, |
|
"eval_loss": 1.6341726779937744, |
|
"eval_precision": 0.6471490667348504, |
|
"eval_recall": 0.7065884980457845, |
|
"eval_runtime": 2.9836, |
|
"eval_samples_per_second": 372.709, |
|
"eval_steps_per_second": 11.731, |
|
"step": 25864 |
|
}, |
|
{ |
|
"epoch": 106.56, |
|
"learning_rate": 3.3606557377049183e-06, |
|
"loss": 0.0004, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.8253619904105342, |
|
"eval_f1": 0.673863787818206, |
|
"eval_loss": 1.634595274925232, |
|
"eval_precision": 0.6447334863555215, |
|
"eval_recall": 0.705750977107761, |
|
"eval_runtime": 2.9821, |
|
"eval_samples_per_second": 372.893, |
|
"eval_steps_per_second": 11.737, |
|
"step": 26108 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.825743660695117, |
|
"eval_f1": 0.6736758051636944, |
|
"eval_loss": 1.6328423023223877, |
|
"eval_precision": 0.6436927772126144, |
|
"eval_recall": 0.7065884980457845, |
|
"eval_runtime": 2.998, |
|
"eval_samples_per_second": 370.912, |
|
"eval_steps_per_second": 11.674, |
|
"step": 26352 |
|
}, |
|
{ |
|
"epoch": 108.61, |
|
"learning_rate": 2.848360655737705e-06, |
|
"loss": 0.0008, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.8256720975167577, |
|
"eval_f1": 0.674515050167224, |
|
"eval_loss": 1.6220307350158691, |
|
"eval_precision": 0.6475725661443616, |
|
"eval_recall": 0.7037967615857063, |
|
"eval_runtime": 2.9865, |
|
"eval_samples_per_second": 372.341, |
|
"eval_steps_per_second": 11.719, |
|
"step": 26596 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.8276281577252451, |
|
"eval_f1": 0.6784613322610911, |
|
"eval_loss": 1.6160385608673096, |
|
"eval_precision": 0.6524877545759217, |
|
"eval_recall": 0.7065884980457845, |
|
"eval_runtime": 2.9726, |
|
"eval_samples_per_second": 374.081, |
|
"eval_steps_per_second": 11.774, |
|
"step": 26840 |
|
}, |
|
{ |
|
"epoch": 110.66, |
|
"learning_rate": 2.336065573770492e-06, |
|
"loss": 0.0006, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.8270079435127978, |
|
"eval_f1": 0.6741363211951447, |
|
"eval_loss": 1.609981656074524, |
|
"eval_precision": 0.6454661558109834, |
|
"eval_recall": 0.7054718034617532, |
|
"eval_runtime": 2.9645, |
|
"eval_samples_per_second": 375.104, |
|
"eval_steps_per_second": 11.806, |
|
"step": 27084 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.8246702130197275, |
|
"eval_f1": 0.6708255906556942, |
|
"eval_loss": 1.6269794702529907, |
|
"eval_precision": 0.6394230769230769, |
|
"eval_recall": 0.7054718034617532, |
|
"eval_runtime": 2.9668, |
|
"eval_samples_per_second": 374.813, |
|
"eval_steps_per_second": 11.797, |
|
"step": 27328 |
|
}, |
|
{ |
|
"epoch": 112.7, |
|
"learning_rate": 1.8237704918032786e-06, |
|
"loss": 0.0005, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_accuracy": 0.8273180506190215, |
|
"eval_f1": 0.6754362416107383, |
|
"eval_loss": 1.6233818531036377, |
|
"eval_precision": 0.6504653567735263, |
|
"eval_recall": 0.7024008933556672, |
|
"eval_runtime": 2.9684, |
|
"eval_samples_per_second": 374.612, |
|
"eval_steps_per_second": 11.791, |
|
"step": 27572 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.8252188640538155, |
|
"eval_f1": 0.6711945665201757, |
|
"eval_loss": 1.632752537727356, |
|
"eval_precision": 0.6417112299465241, |
|
"eval_recall": 0.7035175879396985, |
|
"eval_runtime": 2.9514, |
|
"eval_samples_per_second": 376.771, |
|
"eval_steps_per_second": 11.859, |
|
"step": 27816 |
|
}, |
|
{ |
|
"epoch": 114.75, |
|
"learning_rate": 1.3114754098360657e-06, |
|
"loss": 0.0004, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_accuracy": 0.8251473008754562, |
|
"eval_f1": 0.6710262912051248, |
|
"eval_loss": 1.635224461555481, |
|
"eval_precision": 0.6428023523395551, |
|
"eval_recall": 0.7018425460636516, |
|
"eval_runtime": 2.9586, |
|
"eval_samples_per_second": 375.859, |
|
"eval_steps_per_second": 11.83, |
|
"step": 28060 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.8265308556570693, |
|
"eval_f1": 0.6743162108072048, |
|
"eval_loss": 1.6268539428710938, |
|
"eval_precision": 0.6457960644007156, |
|
"eval_recall": 0.7054718034617532, |
|
"eval_runtime": 3.1118, |
|
"eval_samples_per_second": 357.346, |
|
"eval_steps_per_second": 11.247, |
|
"step": 28304 |
|
}, |
|
{ |
|
"epoch": 116.8, |
|
"learning_rate": 7.991803278688524e-07, |
|
"loss": 0.0005, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_accuracy": 0.8253381360177476, |
|
"eval_f1": 0.6728024543150594, |
|
"eval_loss": 1.6376687288284302, |
|
"eval_precision": 0.6441890166028097, |
|
"eval_recall": 0.7040759352317141, |
|
"eval_runtime": 2.9567, |
|
"eval_samples_per_second": 376.091, |
|
"eval_steps_per_second": 11.837, |
|
"step": 28548 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.8256959519095441, |
|
"eval_f1": 0.6736027744431106, |
|
"eval_loss": 1.6352702379226685, |
|
"eval_precision": 0.644955300127714, |
|
"eval_recall": 0.7049134561697376, |
|
"eval_runtime": 2.977, |
|
"eval_samples_per_second": 373.528, |
|
"eval_steps_per_second": 11.757, |
|
"step": 28792 |
|
}, |
|
{ |
|
"epoch": 118.85, |
|
"learning_rate": 2.8688524590163937e-07, |
|
"loss": 0.0004, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_accuracy": 0.825743660695117, |
|
"eval_f1": 0.6747793527681197, |
|
"eval_loss": 1.6394999027252197, |
|
"eval_precision": 0.6475872689938398, |
|
"eval_recall": 0.7043551088777219, |
|
"eval_runtime": 2.9575, |
|
"eval_samples_per_second": 375.995, |
|
"eval_steps_per_second": 11.834, |
|
"step": 29036 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.8256005343383984, |
|
"eval_f1": 0.6740641711229947, |
|
"eval_loss": 1.6384611129760742, |
|
"eval_precision": 0.6467419189327861, |
|
"eval_recall": 0.7037967615857063, |
|
"eval_runtime": 2.9942, |
|
"eval_samples_per_second": 371.386, |
|
"eval_steps_per_second": 11.689, |
|
"step": 29280 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"step": 29280, |
|
"total_flos": 1.220726808511488e+17, |
|
"train_loss": 0.045685164459416124, |
|
"train_runtime": 6878.0479, |
|
"train_samples_per_second": 135.823, |
|
"train_steps_per_second": 4.257 |
|
} |
|
], |
|
"max_steps": 29280, |
|
"num_train_epochs": 120, |
|
"total_flos": 1.220726808511488e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|