wav2vec2-tiny-demo / trainer_state.json
yenpolin's picture
End of training
c8e662a
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 100.0,
"global_step": 25000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_loss": 1.2708368301391602,
"eval_mean_acc": 0.0,
"eval_median_acc": 0.0,
"eval_runtime": 29.5811,
"eval_samples_per_second": 624.621,
"eval_steps_per_second": 0.811,
"step": 250
},
{
"epoch": 2.0,
"learning_rate": 0.000294,
"loss": 1.2586,
"step": 500
},
{
"epoch": 2.0,
"eval_loss": 1.240064024925232,
"eval_mean_acc": 0.0,
"eval_median_acc": 0.0,
"eval_runtime": 28.81,
"eval_samples_per_second": 641.34,
"eval_steps_per_second": 0.833,
"step": 500
},
{
"epoch": 3.0,
"eval_loss": 1.221199631690979,
"eval_mean_acc": 0.0,
"eval_median_acc": 0.0,
"eval_runtime": 28.8341,
"eval_samples_per_second": 640.805,
"eval_steps_per_second": 0.832,
"step": 750
},
{
"epoch": 4.0,
"learning_rate": 0.00028799999999999995,
"loss": 1.1999,
"step": 1000
},
{
"epoch": 4.0,
"eval_loss": 1.1985267400741577,
"eval_mean_acc": 0.002639524371016869,
"eval_median_acc": 0.0,
"eval_runtime": 29.2636,
"eval_samples_per_second": 631.399,
"eval_steps_per_second": 0.82,
"step": 1000
},
{
"epoch": 5.0,
"eval_loss": 1.1824160814285278,
"eval_mean_acc": 0.012239608588524707,
"eval_median_acc": 0.0,
"eval_runtime": 29.1488,
"eval_samples_per_second": 633.885,
"eval_steps_per_second": 0.823,
"step": 1250
},
{
"epoch": 6.0,
"learning_rate": 0.00028199999999999997,
"loss": 1.1635,
"step": 1500
},
{
"epoch": 6.0,
"eval_loss": 1.1715551614761353,
"eval_mean_acc": 0.030072790065877857,
"eval_median_acc": 0.0,
"eval_runtime": 29.3036,
"eval_samples_per_second": 630.536,
"eval_steps_per_second": 0.819,
"step": 1500
},
{
"epoch": 7.0,
"eval_loss": 1.156156301498413,
"eval_mean_acc": 0.029585336742049613,
"eval_median_acc": 0.0,
"eval_runtime": 29.2009,
"eval_samples_per_second": 632.755,
"eval_steps_per_second": 0.822,
"step": 1750
},
{
"epoch": 8.0,
"learning_rate": 0.000276,
"loss": 1.1361,
"step": 2000
},
{
"epoch": 8.0,
"eval_loss": 1.1506972312927246,
"eval_mean_acc": 0.008759450723621017,
"eval_median_acc": 0.0,
"eval_runtime": 29.1539,
"eval_samples_per_second": 633.775,
"eval_steps_per_second": 0.823,
"step": 2000
},
{
"epoch": 9.0,
"eval_loss": 1.1393311023712158,
"eval_mean_acc": 0.06489605427376179,
"eval_median_acc": 0.0,
"eval_runtime": 29.65,
"eval_samples_per_second": 623.17,
"eval_steps_per_second": 0.809,
"step": 2250
},
{
"epoch": 10.0,
"learning_rate": 0.00027,
"loss": 1.1142,
"step": 2500
},
{
"epoch": 10.0,
"eval_loss": 1.1311384439468384,
"eval_mean_acc": 0.015000378543482608,
"eval_median_acc": 0.0,
"eval_runtime": 29.295,
"eval_samples_per_second": 630.723,
"eval_steps_per_second": 0.819,
"step": 2500
},
{
"epoch": 11.0,
"eval_loss": 1.0849117040634155,
"eval_mean_acc": 0.053357515682597535,
"eval_median_acc": 0.0,
"eval_runtime": 29.2649,
"eval_samples_per_second": 631.37,
"eval_steps_per_second": 0.82,
"step": 2750
},
{
"epoch": 12.0,
"learning_rate": 0.00026399999999999997,
"loss": 1.0648,
"step": 3000
},
{
"epoch": 12.0,
"eval_loss": 1.0643727779388428,
"eval_mean_acc": 0.09116804447578762,
"eval_median_acc": 0.0,
"eval_runtime": 29.1753,
"eval_samples_per_second": 633.309,
"eval_steps_per_second": 0.823,
"step": 3000
},
{
"epoch": 13.0,
"eval_loss": 1.0415236949920654,
"eval_mean_acc": 0.1348069252298496,
"eval_median_acc": 0.0,
"eval_runtime": 29.2028,
"eval_samples_per_second": 632.713,
"eval_steps_per_second": 0.822,
"step": 3250
},
{
"epoch": 14.0,
"learning_rate": 0.000258,
"loss": 1.0185,
"step": 3500
},
{
"epoch": 14.0,
"eval_loss": 1.0389618873596191,
"eval_mean_acc": 0.0442671721053236,
"eval_median_acc": 0.0,
"eval_runtime": 29.3877,
"eval_samples_per_second": 628.732,
"eval_steps_per_second": 0.817,
"step": 3500
},
{
"epoch": 15.0,
"eval_loss": 1.0213755369186401,
"eval_mean_acc": 0.11202564075823995,
"eval_median_acc": 0.0,
"eval_runtime": 29.1898,
"eval_samples_per_second": 632.996,
"eval_steps_per_second": 0.822,
"step": 3750
},
{
"epoch": 16.0,
"learning_rate": 0.00025199999999999995,
"loss": 0.9951,
"step": 4000
},
{
"epoch": 16.0,
"eval_loss": 1.015223741531372,
"eval_mean_acc": 0.16473584913990302,
"eval_median_acc": 0.0,
"eval_runtime": 29.2399,
"eval_samples_per_second": 631.91,
"eval_steps_per_second": 0.821,
"step": 4000
},
{
"epoch": 17.0,
"eval_loss": 1.019250750541687,
"eval_mean_acc": 0.11940677048185683,
"eval_median_acc": 0.0,
"eval_runtime": 29.2372,
"eval_samples_per_second": 631.97,
"eval_steps_per_second": 0.821,
"step": 4250
},
{
"epoch": 18.0,
"learning_rate": 0.00024599999999999996,
"loss": 0.9813,
"step": 4500
},
{
"epoch": 18.0,
"eval_loss": 1.00924813747406,
"eval_mean_acc": 0.11822115362550029,
"eval_median_acc": 0.0,
"eval_runtime": 29.1553,
"eval_samples_per_second": 633.744,
"eval_steps_per_second": 0.823,
"step": 4500
},
{
"epoch": 19.0,
"eval_loss": 1.0164929628372192,
"eval_mean_acc": 0.06716894444980748,
"eval_median_acc": 0.0,
"eval_runtime": 29.1521,
"eval_samples_per_second": 633.813,
"eval_steps_per_second": 0.823,
"step": 4750
},
{
"epoch": 20.0,
"learning_rate": 0.00023999999999999998,
"loss": 0.9625,
"step": 5000
},
{
"epoch": 20.0,
"eval_loss": 0.9429653286933899,
"eval_mean_acc": 9.008243218749312,
"eval_median_acc": 0.0,
"eval_runtime": 30.0347,
"eval_samples_per_second": 615.187,
"eval_steps_per_second": 0.799,
"step": 5000
},
{
"epoch": 21.0,
"eval_loss": 0.9300616979598999,
"eval_mean_acc": 13.245211581468084,
"eval_median_acc": 0.0,
"eval_runtime": 30.3256,
"eval_samples_per_second": 609.287,
"eval_steps_per_second": 0.791,
"step": 5250
},
{
"epoch": 22.0,
"learning_rate": 0.000234,
"loss": 0.8958,
"step": 5500
},
{
"epoch": 22.0,
"eval_loss": 0.9260903596878052,
"eval_mean_acc": 8.987847368165264,
"eval_median_acc": 0.0,
"eval_runtime": 30.0974,
"eval_samples_per_second": 613.906,
"eval_steps_per_second": 0.797,
"step": 5500
},
{
"epoch": 23.0,
"eval_loss": 0.9173910617828369,
"eval_mean_acc": 15.36412671561701,
"eval_median_acc": 0.0,
"eval_runtime": 30.1139,
"eval_samples_per_second": 613.57,
"eval_steps_per_second": 0.797,
"step": 5750
},
{
"epoch": 24.0,
"learning_rate": 0.00022799999999999999,
"loss": 0.8756,
"step": 6000
},
{
"epoch": 24.0,
"eval_loss": 0.9115529656410217,
"eval_mean_acc": 14.901058980647452,
"eval_median_acc": 0.0,
"eval_runtime": 30.2491,
"eval_samples_per_second": 610.828,
"eval_steps_per_second": 0.793,
"step": 6000
},
{
"epoch": 25.0,
"eval_loss": 0.9130357503890991,
"eval_mean_acc": 12.268143458883413,
"eval_median_acc": 0.0,
"eval_runtime": 30.1967,
"eval_samples_per_second": 611.888,
"eval_steps_per_second": 0.795,
"step": 6250
},
{
"epoch": 26.0,
"learning_rate": 0.00022199999999999998,
"loss": 0.8607,
"step": 6500
},
{
"epoch": 26.0,
"eval_loss": 0.9113653898239136,
"eval_mean_acc": 15.28737721209223,
"eval_median_acc": 0.0,
"eval_runtime": 30.4015,
"eval_samples_per_second": 607.766,
"eval_steps_per_second": 0.789,
"step": 6500
},
{
"epoch": 27.0,
"eval_loss": 0.9104825854301453,
"eval_mean_acc": 24.120487175005213,
"eval_median_acc": 0.0,
"eval_runtime": 30.7064,
"eval_samples_per_second": 601.731,
"eval_steps_per_second": 0.782,
"step": 6750
},
{
"epoch": 28.0,
"learning_rate": 0.00021599999999999996,
"loss": 0.8482,
"step": 7000
},
{
"epoch": 28.0,
"eval_loss": 0.9082564115524292,
"eval_mean_acc": 18.66072430390773,
"eval_median_acc": 0.0,
"eval_runtime": 30.4946,
"eval_samples_per_second": 605.911,
"eval_steps_per_second": 0.787,
"step": 7000
},
{
"epoch": 29.0,
"eval_loss": 0.9196337461471558,
"eval_mean_acc": 17.72464537190866,
"eval_median_acc": 0.0,
"eval_runtime": 30.2701,
"eval_samples_per_second": 610.404,
"eval_steps_per_second": 0.793,
"step": 7250
},
{
"epoch": 30.0,
"learning_rate": 0.00020999999999999998,
"loss": 0.8359,
"step": 7500
},
{
"epoch": 30.0,
"eval_loss": 0.9148876667022705,
"eval_mean_acc": 19.7733289435757,
"eval_median_acc": 0.0,
"eval_runtime": 30.705,
"eval_samples_per_second": 601.759,
"eval_steps_per_second": 0.782,
"step": 7500
},
{
"epoch": 31.0,
"eval_loss": 0.9133378863334656,
"eval_mean_acc": 18.62968067275681,
"eval_median_acc": 0.0,
"eval_runtime": 30.3878,
"eval_samples_per_second": 608.039,
"eval_steps_per_second": 0.79,
"step": 7750
},
{
"epoch": 32.0,
"learning_rate": 0.000204,
"loss": 0.8232,
"step": 8000
},
{
"epoch": 32.0,
"eval_loss": 0.9479327201843262,
"eval_mean_acc": 12.27033306041223,
"eval_median_acc": 0.0,
"eval_runtime": 30.187,
"eval_samples_per_second": 612.085,
"eval_steps_per_second": 0.795,
"step": 8000
},
{
"epoch": 33.0,
"eval_loss": 0.930338978767395,
"eval_mean_acc": 19.904256184480708,
"eval_median_acc": 0.0,
"eval_runtime": 30.5106,
"eval_samples_per_second": 605.593,
"eval_steps_per_second": 0.787,
"step": 8250
},
{
"epoch": 34.0,
"learning_rate": 0.000198,
"loss": 0.8092,
"step": 8500
},
{
"epoch": 34.0,
"eval_loss": 0.9299731254577637,
"eval_mean_acc": 22.351005658701947,
"eval_median_acc": 0.0,
"eval_runtime": 30.5242,
"eval_samples_per_second": 605.324,
"eval_steps_per_second": 0.786,
"step": 8500
},
{
"epoch": 35.0,
"eval_loss": 0.9295333027839661,
"eval_mean_acc": 27.811848359305156,
"eval_median_acc": 0.0,
"eval_runtime": 30.6542,
"eval_samples_per_second": 602.756,
"eval_steps_per_second": 0.783,
"step": 8750
},
{
"epoch": 36.0,
"learning_rate": 0.00019199999999999998,
"loss": 0.7951,
"step": 9000
},
{
"epoch": 36.0,
"eval_loss": 0.9439055323600769,
"eval_mean_acc": 23.296268042588853,
"eval_median_acc": 0.0,
"eval_runtime": 30.5922,
"eval_samples_per_second": 603.977,
"eval_steps_per_second": 0.785,
"step": 9000
},
{
"epoch": 37.0,
"eval_loss": 0.962045431137085,
"eval_mean_acc": 20.907431263561396,
"eval_median_acc": 0.0,
"eval_runtime": 30.3915,
"eval_samples_per_second": 607.966,
"eval_steps_per_second": 0.79,
"step": 9250
},
{
"epoch": 38.0,
"learning_rate": 0.000186,
"loss": 0.7803,
"step": 9500
},
{
"epoch": 38.0,
"eval_loss": 0.9570873975753784,
"eval_mean_acc": 28.782833553923417,
"eval_median_acc": 52.30263157894737,
"eval_runtime": 30.7856,
"eval_samples_per_second": 600.183,
"eval_steps_per_second": 0.78,
"step": 9500
},
{
"epoch": 39.0,
"eval_loss": 0.9814175367355347,
"eval_mean_acc": 25.267935353876744,
"eval_median_acc": 0.0,
"eval_runtime": 30.4662,
"eval_samples_per_second": 606.475,
"eval_steps_per_second": 0.788,
"step": 9750
},
{
"epoch": 40.0,
"learning_rate": 0.00017999999999999998,
"loss": 0.7669,
"step": 10000
},
{
"epoch": 40.0,
"eval_loss": 0.9786842465400696,
"eval_mean_acc": 31.28196134808705,
"eval_median_acc": 53.38645418326693,
"eval_runtime": 30.754,
"eval_samples_per_second": 600.8,
"eval_steps_per_second": 0.78,
"step": 10000
},
{
"epoch": 41.0,
"eval_loss": 0.9765278100967407,
"eval_mean_acc": 28.633316896351385,
"eval_median_acc": 52.20338983050847,
"eval_runtime": 30.6096,
"eval_samples_per_second": 603.634,
"eval_steps_per_second": 0.784,
"step": 10250
},
{
"epoch": 42.0,
"learning_rate": 0.00017399999999999997,
"loss": 0.7529,
"step": 10500
},
{
"epoch": 42.0,
"eval_loss": 1.0037761926651,
"eval_mean_acc": 27.51536558563458,
"eval_median_acc": 0.0,
"eval_runtime": 30.6294,
"eval_samples_per_second": 603.244,
"eval_steps_per_second": 0.784,
"step": 10500
},
{
"epoch": 43.0,
"eval_loss": 1.033768892288208,
"eval_mean_acc": 28.494027960898663,
"eval_median_acc": 52.13675213675214,
"eval_runtime": 30.6823,
"eval_samples_per_second": 602.204,
"eval_steps_per_second": 0.782,
"step": 10750
},
{
"epoch": 44.0,
"learning_rate": 0.000168,
"loss": 0.7411,
"step": 11000
},
{
"epoch": 44.0,
"eval_loss": 1.0279306173324585,
"eval_mean_acc": 28.72059143849434,
"eval_median_acc": 52.27817745803357,
"eval_runtime": 30.711,
"eval_samples_per_second": 601.642,
"eval_steps_per_second": 0.781,
"step": 11000
},
{
"epoch": 45.0,
"eval_loss": 1.0176538228988647,
"eval_mean_acc": 29.058336469348834,
"eval_median_acc": 52.41157556270096,
"eval_runtime": 30.72,
"eval_samples_per_second": 601.464,
"eval_steps_per_second": 0.781,
"step": 11250
},
{
"epoch": 46.0,
"learning_rate": 0.000162,
"loss": 0.7299,
"step": 11500
},
{
"epoch": 46.0,
"eval_loss": 1.0147888660430908,
"eval_mean_acc": 32.92298407705084,
"eval_median_acc": 53.6,
"eval_runtime": 30.8969,
"eval_samples_per_second": 598.021,
"eval_steps_per_second": 0.777,
"step": 11500
},
{
"epoch": 47.0,
"eval_loss": 1.0400363206863403,
"eval_mean_acc": 33.51743615357999,
"eval_median_acc": 53.813559322033896,
"eval_runtime": 31.0611,
"eval_samples_per_second": 594.86,
"eval_steps_per_second": 0.773,
"step": 11750
},
{
"epoch": 48.0,
"learning_rate": 0.000156,
"loss": 0.7198,
"step": 12000
},
{
"epoch": 48.0,
"eval_loss": 1.0477961301803589,
"eval_mean_acc": 30.930946796462933,
"eval_median_acc": 53.25443786982249,
"eval_runtime": 31.0217,
"eval_samples_per_second": 595.615,
"eval_steps_per_second": 0.774,
"step": 12000
},
{
"epoch": 49.0,
"eval_loss": 1.0538278818130493,
"eval_mean_acc": 29.24450853094501,
"eval_median_acc": 52.569169960474305,
"eval_runtime": 30.7608,
"eval_samples_per_second": 600.668,
"eval_steps_per_second": 0.78,
"step": 12250
},
{
"epoch": 50.0,
"learning_rate": 0.00015,
"loss": 0.7109,
"step": 12500
},
{
"epoch": 50.0,
"eval_loss": 1.0524476766586304,
"eval_mean_acc": 27.231249267136203,
"eval_median_acc": 0.0,
"eval_runtime": 30.6942,
"eval_samples_per_second": 601.971,
"eval_steps_per_second": 0.782,
"step": 12500
},
{
"epoch": 51.0,
"eval_loss": 1.0861831903457642,
"eval_mean_acc": 33.350417690919826,
"eval_median_acc": 53.72340425531915,
"eval_runtime": 31.0377,
"eval_samples_per_second": 595.308,
"eval_steps_per_second": 0.773,
"step": 12750
},
{
"epoch": 52.0,
"learning_rate": 0.00014399999999999998,
"loss": 0.7036,
"step": 13000
},
{
"epoch": 52.0,
"eval_loss": 1.074357271194458,
"eval_mean_acc": 31.877880928875545,
"eval_median_acc": 53.36787564766839,
"eval_runtime": 30.8278,
"eval_samples_per_second": 599.361,
"eval_steps_per_second": 0.779,
"step": 13000
},
{
"epoch": 53.0,
"eval_loss": 1.062804937362671,
"eval_mean_acc": 28.372776202894872,
"eval_median_acc": 51.71232876712328,
"eval_runtime": 30.6841,
"eval_samples_per_second": 602.169,
"eval_steps_per_second": 0.782,
"step": 13250
},
{
"epoch": 54.0,
"learning_rate": 0.000138,
"loss": 0.6963,
"step": 13500
},
{
"epoch": 54.0,
"eval_loss": 1.0586843490600586,
"eval_mean_acc": 30.98220246074368,
"eval_median_acc": 53.125,
"eval_runtime": 31.0347,
"eval_samples_per_second": 595.366,
"eval_steps_per_second": 0.773,
"step": 13500
},
{
"epoch": 55.0,
"eval_loss": 1.083398699760437,
"eval_mean_acc": 33.21303099917168,
"eval_median_acc": 53.57142857142857,
"eval_runtime": 30.8467,
"eval_samples_per_second": 598.994,
"eval_steps_per_second": 0.778,
"step": 13750
},
{
"epoch": 56.0,
"learning_rate": 0.00013199999999999998,
"loss": 0.69,
"step": 14000
},
{
"epoch": 56.0,
"eval_loss": 1.1077489852905273,
"eval_mean_acc": 36.38462165724688,
"eval_median_acc": 54.285714285714285,
"eval_runtime": 31.0446,
"eval_samples_per_second": 595.175,
"eval_steps_per_second": 0.773,
"step": 14000
},
{
"epoch": 57.0,
"eval_loss": 1.114971399307251,
"eval_mean_acc": 32.35859059650532,
"eval_median_acc": 53.49544072948328,
"eval_runtime": 30.8614,
"eval_samples_per_second": 598.708,
"eval_steps_per_second": 0.778,
"step": 14250
},
{
"epoch": 58.0,
"learning_rate": 0.00012599999999999997,
"loss": 0.6855,
"step": 14500
},
{
"epoch": 58.0,
"eval_loss": 1.1352181434631348,
"eval_mean_acc": 36.9014347424795,
"eval_median_acc": 54.406130268199234,
"eval_runtime": 31.0913,
"eval_samples_per_second": 594.282,
"eval_steps_per_second": 0.772,
"step": 14500
},
{
"epoch": 59.0,
"eval_loss": 1.1556544303894043,
"eval_mean_acc": 33.88705951883505,
"eval_median_acc": 53.84615384615385,
"eval_runtime": 30.8633,
"eval_samples_per_second": 598.672,
"eval_steps_per_second": 0.778,
"step": 14750
},
{
"epoch": 60.0,
"learning_rate": 0.00011999999999999999,
"loss": 0.6811,
"step": 15000
},
{
"epoch": 60.0,
"eval_loss": 1.1314884424209595,
"eval_mean_acc": 33.595877918258616,
"eval_median_acc": 53.77358490566038,
"eval_runtime": 31.0682,
"eval_samples_per_second": 594.724,
"eval_steps_per_second": 0.772,
"step": 15000
},
{
"epoch": 61.0,
"eval_loss": 1.0957316160202026,
"eval_mean_acc": 32.99076589659652,
"eval_median_acc": 53.57142857142857,
"eval_runtime": 30.909,
"eval_samples_per_second": 597.788,
"eval_steps_per_second": 0.776,
"step": 15250
},
{
"epoch": 62.0,
"learning_rate": 0.00011399999999999999,
"loss": 0.6768,
"step": 15500
},
{
"epoch": 62.0,
"eval_loss": 1.1236425638198853,
"eval_mean_acc": 32.76527042786048,
"eval_median_acc": 53.61216730038023,
"eval_runtime": 31.1533,
"eval_samples_per_second": 593.099,
"eval_steps_per_second": 0.77,
"step": 15500
},
{
"epoch": 63.0,
"eval_loss": 1.1152857542037964,
"eval_mean_acc": 34.82832308606056,
"eval_median_acc": 53.94321766561514,
"eval_runtime": 31.0538,
"eval_samples_per_second": 595.001,
"eval_steps_per_second": 0.773,
"step": 15750
},
{
"epoch": 64.0,
"learning_rate": 0.00010799999999999998,
"loss": 0.6722,
"step": 16000
},
{
"epoch": 64.0,
"eval_loss": 1.1300369501113892,
"eval_mean_acc": 35.01100739222209,
"eval_median_acc": 53.96825396825397,
"eval_runtime": 30.8553,
"eval_samples_per_second": 598.827,
"eval_steps_per_second": 0.778,
"step": 16000
},
{
"epoch": 65.0,
"eval_loss": 1.1825590133666992,
"eval_mean_acc": 35.92286498667092,
"eval_median_acc": 54.146341463414636,
"eval_runtime": 30.8364,
"eval_samples_per_second": 599.194,
"eval_steps_per_second": 0.778,
"step": 16250
},
{
"epoch": 66.0,
"learning_rate": 0.000102,
"loss": 0.6682,
"step": 16500
},
{
"epoch": 66.0,
"eval_loss": 1.153441309928894,
"eval_mean_acc": 38.55105344645134,
"eval_median_acc": 54.48504983388705,
"eval_runtime": 31.0294,
"eval_samples_per_second": 595.468,
"eval_steps_per_second": 0.773,
"step": 16500
},
{
"epoch": 67.0,
"eval_loss": 1.1635504961013794,
"eval_mean_acc": 35.828909444304365,
"eval_median_acc": 54.09836065573771,
"eval_runtime": 30.7671,
"eval_samples_per_second": 600.544,
"eval_steps_per_second": 0.78,
"step": 16750
},
{
"epoch": 68.0,
"learning_rate": 9.599999999999999e-05,
"loss": 0.6653,
"step": 17000
},
{
"epoch": 68.0,
"eval_loss": 1.1404204368591309,
"eval_mean_acc": 34.65428003659497,
"eval_median_acc": 53.84615384615385,
"eval_runtime": 30.8901,
"eval_samples_per_second": 598.152,
"eval_steps_per_second": 0.777,
"step": 17000
},
{
"epoch": 69.0,
"eval_loss": 1.1473366022109985,
"eval_mean_acc": 36.24934966791388,
"eval_median_acc": 54.12087912087912,
"eval_runtime": 31.0971,
"eval_samples_per_second": 594.172,
"eval_steps_per_second": 0.772,
"step": 17250
},
{
"epoch": 70.0,
"learning_rate": 8.999999999999999e-05,
"loss": 0.6624,
"step": 17500
},
{
"epoch": 70.0,
"eval_loss": 1.1532074213027954,
"eval_mean_acc": 39.51401788696222,
"eval_median_acc": 54.5774647887324,
"eval_runtime": 31.0363,
"eval_samples_per_second": 595.334,
"eval_steps_per_second": 0.773,
"step": 17500
},
{
"epoch": 71.0,
"eval_loss": 1.1715244054794312,
"eval_mean_acc": 36.23543546260951,
"eval_median_acc": 54.12541254125413,
"eval_runtime": 30.897,
"eval_samples_per_second": 598.02,
"eval_steps_per_second": 0.777,
"step": 17750
},
{
"epoch": 72.0,
"learning_rate": 8.4e-05,
"loss": 0.6597,
"step": 18000
},
{
"epoch": 72.0,
"eval_loss": 1.187477469444275,
"eval_mean_acc": 35.50233390601532,
"eval_median_acc": 54.08560311284047,
"eval_runtime": 30.998,
"eval_samples_per_second": 596.07,
"eval_steps_per_second": 0.774,
"step": 18000
},
{
"epoch": 73.0,
"eval_loss": 1.164323329925537,
"eval_mean_acc": 34.50470237269365,
"eval_median_acc": 53.90243902439025,
"eval_runtime": 30.8465,
"eval_samples_per_second": 598.997,
"eval_steps_per_second": 0.778,
"step": 18250
},
{
"epoch": 74.0,
"learning_rate": 7.8e-05,
"loss": 0.657,
"step": 18500
},
{
"epoch": 74.0,
"eval_loss": 1.1893519163131714,
"eval_mean_acc": 38.75609974678352,
"eval_median_acc": 54.492753623188406,
"eval_runtime": 31.2805,
"eval_samples_per_second": 590.688,
"eval_steps_per_second": 0.767,
"step": 18500
},
{
"epoch": 75.0,
"eval_loss": 1.208187222480774,
"eval_mean_acc": 38.215720244506755,
"eval_median_acc": 54.460093896713616,
"eval_runtime": 31.3374,
"eval_samples_per_second": 589.615,
"eval_steps_per_second": 0.766,
"step": 18750
},
{
"epoch": 76.0,
"learning_rate": 7.199999999999999e-05,
"loss": 0.6543,
"step": 19000
},
{
"epoch": 76.0,
"eval_loss": 1.1842811107635498,
"eval_mean_acc": 34.28566002554328,
"eval_median_acc": 53.883495145631066,
"eval_runtime": 30.9371,
"eval_samples_per_second": 597.244,
"eval_steps_per_second": 0.776,
"step": 19000
},
{
"epoch": 77.0,
"eval_loss": 1.1689387559890747,
"eval_mean_acc": 38.42640276827011,
"eval_median_acc": 54.43548387096774,
"eval_runtime": 31.0373,
"eval_samples_per_second": 595.316,
"eval_steps_per_second": 0.773,
"step": 19250
},
{
"epoch": 78.0,
"learning_rate": 6.599999999999999e-05,
"loss": 0.652,
"step": 19500
},
{
"epoch": 78.0,
"eval_loss": 1.2084593772888184,
"eval_mean_acc": 37.749512857893784,
"eval_median_acc": 54.37499999999999,
"eval_runtime": 31.1448,
"eval_samples_per_second": 593.261,
"eval_steps_per_second": 0.771,
"step": 19500
},
{
"epoch": 79.0,
"eval_loss": 1.1984684467315674,
"eval_mean_acc": 39.14441481832044,
"eval_median_acc": 54.518950437317784,
"eval_runtime": 30.9307,
"eval_samples_per_second": 597.368,
"eval_steps_per_second": 0.776,
"step": 19750
},
{
"epoch": 80.0,
"learning_rate": 5.9999999999999995e-05,
"loss": 0.6497,
"step": 20000
},
{
"epoch": 80.0,
"eval_loss": 1.2331078052520752,
"eval_mean_acc": 40.01896688672192,
"eval_median_acc": 54.61254612546126,
"eval_runtime": 31.1205,
"eval_samples_per_second": 593.724,
"eval_steps_per_second": 0.771,
"step": 20000
},
{
"epoch": 81.0,
"eval_loss": 1.2402710914611816,
"eval_mean_acc": 39.05107714371045,
"eval_median_acc": 54.52054794520548,
"eval_runtime": 30.8964,
"eval_samples_per_second": 598.031,
"eval_steps_per_second": 0.777,
"step": 20250
},
{
"epoch": 82.0,
"learning_rate": 5.399999999999999e-05,
"loss": 0.6476,
"step": 20500
},
{
"epoch": 82.0,
"eval_loss": 1.1909747123718262,
"eval_mean_acc": 37.29363801013909,
"eval_median_acc": 54.24528301886793,
"eval_runtime": 31.0069,
"eval_samples_per_second": 595.9,
"eval_steps_per_second": 0.774,
"step": 20500
},
{
"epoch": 83.0,
"eval_loss": 1.2035155296325684,
"eval_mean_acc": 41.25721916721249,
"eval_median_acc": 54.7244094488189,
"eval_runtime": 31.1934,
"eval_samples_per_second": 592.336,
"eval_steps_per_second": 0.769,
"step": 20750
},
{
"epoch": 84.0,
"learning_rate": 4.7999999999999994e-05,
"loss": 0.6457,
"step": 21000
},
{
"epoch": 84.0,
"eval_loss": 1.2123523950576782,
"eval_mean_acc": 38.7564566539536,
"eval_median_acc": 54.4891640866873,
"eval_runtime": 31.1487,
"eval_samples_per_second": 593.187,
"eval_steps_per_second": 0.77,
"step": 21000
},
{
"epoch": 85.0,
"eval_loss": 1.232680320739746,
"eval_mean_acc": 39.12864363006366,
"eval_median_acc": 54.495912806539515,
"eval_runtime": 30.8988,
"eval_samples_per_second": 597.985,
"eval_steps_per_second": 0.777,
"step": 21250
},
{
"epoch": 86.0,
"learning_rate": 4.2e-05,
"loss": 0.6437,
"step": 21500
},
{
"epoch": 86.0,
"eval_loss": 1.22517991065979,
"eval_mean_acc": 39.8797101803931,
"eval_median_acc": 54.5774647887324,
"eval_runtime": 31.1149,
"eval_samples_per_second": 593.83,
"eval_steps_per_second": 0.771,
"step": 21500
},
{
"epoch": 87.0,
"eval_loss": 1.2346075773239136,
"eval_mean_acc": 38.11276454520886,
"eval_median_acc": 54.385964912280706,
"eval_runtime": 31.1511,
"eval_samples_per_second": 593.142,
"eval_steps_per_second": 0.77,
"step": 21750
},
{
"epoch": 88.0,
"learning_rate": 3.5999999999999994e-05,
"loss": 0.642,
"step": 22000
},
{
"epoch": 88.0,
"eval_loss": 1.244125247001648,
"eval_mean_acc": 40.43582155286865,
"eval_median_acc": 54.65116279069767,
"eval_runtime": 31.1673,
"eval_samples_per_second": 592.833,
"eval_steps_per_second": 0.77,
"step": 22000
},
{
"epoch": 89.0,
"eval_loss": 1.24880850315094,
"eval_mean_acc": 39.99124281027319,
"eval_median_acc": 54.59770114942529,
"eval_runtime": 31.0818,
"eval_samples_per_second": 594.463,
"eval_steps_per_second": 0.772,
"step": 22250
},
{
"epoch": 90.0,
"learning_rate": 2.9999999999999997e-05,
"loss": 0.6403,
"step": 22500
},
{
"epoch": 90.0,
"eval_loss": 1.2482763528823853,
"eval_mean_acc": 39.91157811070793,
"eval_median_acc": 54.60526315789473,
"eval_runtime": 31.194,
"eval_samples_per_second": 592.325,
"eval_steps_per_second": 0.769,
"step": 22500
},
{
"epoch": 91.0,
"eval_loss": 1.2674145698547363,
"eval_mean_acc": 40.94577170886235,
"eval_median_acc": 54.700854700854705,
"eval_runtime": 31.202,
"eval_samples_per_second": 592.174,
"eval_steps_per_second": 0.769,
"step": 22750
},
{
"epoch": 92.0,
"learning_rate": 2.3999999999999997e-05,
"loss": 0.6387,
"step": 23000
},
{
"epoch": 92.0,
"eval_loss": 1.269442081451416,
"eval_mean_acc": 40.442322594799656,
"eval_median_acc": 54.63576158940398,
"eval_runtime": 31.0407,
"eval_samples_per_second": 595.25,
"eval_steps_per_second": 0.773,
"step": 23000
},
{
"epoch": 93.0,
"eval_loss": 1.2716701030731201,
"eval_mean_acc": 40.739734228756824,
"eval_median_acc": 54.67625899280576,
"eval_runtime": 31.2132,
"eval_samples_per_second": 591.961,
"eval_steps_per_second": 0.769,
"step": 23250
},
{
"epoch": 94.0,
"learning_rate": 1.7999999999999997e-05,
"loss": 0.6371,
"step": 23500
},
{
"epoch": 94.0,
"eval_loss": 1.2819631099700928,
"eval_mean_acc": 40.571867011274925,
"eval_median_acc": 54.63917525773196,
"eval_runtime": 31.0385,
"eval_samples_per_second": 595.292,
"eval_steps_per_second": 0.773,
"step": 23500
},
{
"epoch": 95.0,
"eval_loss": 1.28830885887146,
"eval_mean_acc": 40.353439886436945,
"eval_median_acc": 54.666666666666664,
"eval_runtime": 31.183,
"eval_samples_per_second": 592.534,
"eval_steps_per_second": 0.77,
"step": 23750
},
{
"epoch": 96.0,
"learning_rate": 1.1999999999999999e-05,
"loss": 0.6358,
"step": 24000
},
{
"epoch": 96.0,
"eval_loss": 1.30391263961792,
"eval_mean_acc": 40.60459621916925,
"eval_median_acc": 54.666666666666664,
"eval_runtime": 31.0782,
"eval_samples_per_second": 594.532,
"eval_steps_per_second": 0.772,
"step": 24000
},
{
"epoch": 97.0,
"eval_loss": 1.3067735433578491,
"eval_mean_acc": 41.452516923874725,
"eval_median_acc": 54.773869346733676,
"eval_runtime": 30.922,
"eval_samples_per_second": 597.536,
"eval_steps_per_second": 0.776,
"step": 24250
},
{
"epoch": 98.0,
"learning_rate": 5.999999999999999e-06,
"loss": 0.6347,
"step": 24500
},
{
"epoch": 98.0,
"eval_loss": 1.313217282295227,
"eval_mean_acc": 41.42466457051602,
"eval_median_acc": 54.75409836065573,
"eval_runtime": 31.2774,
"eval_samples_per_second": 590.746,
"eval_steps_per_second": 0.767,
"step": 24500
},
{
"epoch": 99.0,
"eval_loss": 1.3124245405197144,
"eval_mean_acc": 41.25630205018876,
"eval_median_acc": 54.74452554744526,
"eval_runtime": 30.8857,
"eval_samples_per_second": 598.238,
"eval_steps_per_second": 0.777,
"step": 24750
},
{
"epoch": 100.0,
"learning_rate": 0.0,
"loss": 0.6339,
"step": 25000
},
{
"epoch": 100.0,
"eval_loss": 1.3174444437026978,
"eval_mean_acc": 41.405191345713106,
"eval_median_acc": 54.75578406169666,
"eval_runtime": 31.1131,
"eval_samples_per_second": 593.865,
"eval_steps_per_second": 0.771,
"step": 25000
},
{
"epoch": 100.0,
"step": 25000,
"total_flos": 1.660761144e+18,
"train_loss": 0.7862733935546875,
"train_runtime": 36507.5594,
"train_samples_per_second": 273.916,
"train_steps_per_second": 0.685
}
],
"max_steps": 25000,
"num_train_epochs": 100,
"total_flos": 1.660761144e+18,
"trial_name": null,
"trial_params": null
}