{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 1.2708368301391602, "eval_mean_acc": 0.0, "eval_median_acc": 0.0, "eval_runtime": 29.5811, "eval_samples_per_second": 624.621, "eval_steps_per_second": 0.811, "step": 250 }, { "epoch": 2.0, "learning_rate": 0.000294, "loss": 1.2586, "step": 500 }, { "epoch": 2.0, "eval_loss": 1.240064024925232, "eval_mean_acc": 0.0, "eval_median_acc": 0.0, "eval_runtime": 28.81, "eval_samples_per_second": 641.34, "eval_steps_per_second": 0.833, "step": 500 }, { "epoch": 3.0, "eval_loss": 1.221199631690979, "eval_mean_acc": 0.0, "eval_median_acc": 0.0, "eval_runtime": 28.8341, "eval_samples_per_second": 640.805, "eval_steps_per_second": 0.832, "step": 750 }, { "epoch": 4.0, "learning_rate": 0.00028799999999999995, "loss": 1.1999, "step": 1000 }, { "epoch": 4.0, "eval_loss": 1.1985267400741577, "eval_mean_acc": 0.002639524371016869, "eval_median_acc": 0.0, "eval_runtime": 29.2636, "eval_samples_per_second": 631.399, "eval_steps_per_second": 0.82, "step": 1000 }, { "epoch": 5.0, "eval_loss": 1.1824160814285278, "eval_mean_acc": 0.012239608588524707, "eval_median_acc": 0.0, "eval_runtime": 29.1488, "eval_samples_per_second": 633.885, "eval_steps_per_second": 0.823, "step": 1250 }, { "epoch": 6.0, "learning_rate": 0.00028199999999999997, "loss": 1.1635, "step": 1500 }, { "epoch": 6.0, "eval_loss": 1.1715551614761353, "eval_mean_acc": 0.030072790065877857, "eval_median_acc": 0.0, "eval_runtime": 29.3036, "eval_samples_per_second": 630.536, "eval_steps_per_second": 0.819, "step": 1500 }, { "epoch": 7.0, "eval_loss": 1.156156301498413, "eval_mean_acc": 0.029585336742049613, "eval_median_acc": 0.0, "eval_runtime": 29.2009, "eval_samples_per_second": 632.755, "eval_steps_per_second": 0.822, "step": 1750 }, { "epoch": 8.0, "learning_rate": 0.000276, "loss": 1.1361, "step": 2000 }, { "epoch": 8.0, "eval_loss": 1.1506972312927246, "eval_mean_acc": 0.008759450723621017, "eval_median_acc": 0.0, "eval_runtime": 29.1539, "eval_samples_per_second": 633.775, "eval_steps_per_second": 0.823, "step": 2000 }, { "epoch": 9.0, "eval_loss": 1.1393311023712158, "eval_mean_acc": 0.06489605427376179, "eval_median_acc": 0.0, "eval_runtime": 29.65, "eval_samples_per_second": 623.17, "eval_steps_per_second": 0.809, "step": 2250 }, { "epoch": 10.0, "learning_rate": 0.00027, "loss": 1.1142, "step": 2500 }, { "epoch": 10.0, "eval_loss": 1.1311384439468384, "eval_mean_acc": 0.015000378543482608, "eval_median_acc": 0.0, "eval_runtime": 29.295, "eval_samples_per_second": 630.723, "eval_steps_per_second": 0.819, "step": 2500 }, { "epoch": 11.0, "eval_loss": 1.0849117040634155, "eval_mean_acc": 0.053357515682597535, "eval_median_acc": 0.0, "eval_runtime": 29.2649, "eval_samples_per_second": 631.37, "eval_steps_per_second": 0.82, "step": 2750 }, { "epoch": 12.0, "learning_rate": 0.00026399999999999997, "loss": 1.0648, "step": 3000 }, { "epoch": 12.0, "eval_loss": 1.0643727779388428, "eval_mean_acc": 0.09116804447578762, "eval_median_acc": 0.0, "eval_runtime": 29.1753, "eval_samples_per_second": 633.309, "eval_steps_per_second": 0.823, "step": 3000 }, { "epoch": 13.0, "eval_loss": 1.0415236949920654, "eval_mean_acc": 0.1348069252298496, "eval_median_acc": 0.0, "eval_runtime": 29.2028, "eval_samples_per_second": 632.713, "eval_steps_per_second": 0.822, "step": 3250 }, { "epoch": 14.0, "learning_rate": 0.000258, "loss": 1.0185, "step": 3500 }, { "epoch": 14.0, "eval_loss": 1.0389618873596191, "eval_mean_acc": 0.0442671721053236, "eval_median_acc": 0.0, "eval_runtime": 29.3877, "eval_samples_per_second": 628.732, "eval_steps_per_second": 0.817, "step": 3500 }, { "epoch": 15.0, "eval_loss": 1.0213755369186401, "eval_mean_acc": 0.11202564075823995, "eval_median_acc": 0.0, "eval_runtime": 29.1898, "eval_samples_per_second": 632.996, "eval_steps_per_second": 0.822, "step": 3750 }, { "epoch": 16.0, "learning_rate": 0.00025199999999999995, "loss": 0.9951, "step": 4000 }, { "epoch": 16.0, "eval_loss": 1.015223741531372, "eval_mean_acc": 0.16473584913990302, "eval_median_acc": 0.0, "eval_runtime": 29.2399, "eval_samples_per_second": 631.91, "eval_steps_per_second": 0.821, "step": 4000 }, { "epoch": 17.0, "eval_loss": 1.019250750541687, "eval_mean_acc": 0.11940677048185683, "eval_median_acc": 0.0, "eval_runtime": 29.2372, "eval_samples_per_second": 631.97, "eval_steps_per_second": 0.821, "step": 4250 }, { "epoch": 18.0, "learning_rate": 0.00024599999999999996, "loss": 0.9813, "step": 4500 }, { "epoch": 18.0, "eval_loss": 1.00924813747406, "eval_mean_acc": 0.11822115362550029, "eval_median_acc": 0.0, "eval_runtime": 29.1553, "eval_samples_per_second": 633.744, "eval_steps_per_second": 0.823, "step": 4500 }, { "epoch": 19.0, "eval_loss": 1.0164929628372192, "eval_mean_acc": 0.06716894444980748, "eval_median_acc": 0.0, "eval_runtime": 29.1521, "eval_samples_per_second": 633.813, "eval_steps_per_second": 0.823, "step": 4750 }, { "epoch": 20.0, "learning_rate": 0.00023999999999999998, "loss": 0.9625, "step": 5000 }, { "epoch": 20.0, "eval_loss": 0.9429653286933899, "eval_mean_acc": 9.008243218749312, "eval_median_acc": 0.0, "eval_runtime": 30.0347, "eval_samples_per_second": 615.187, "eval_steps_per_second": 0.799, "step": 5000 }, { "epoch": 21.0, "eval_loss": 0.9300616979598999, "eval_mean_acc": 13.245211581468084, "eval_median_acc": 0.0, "eval_runtime": 30.3256, "eval_samples_per_second": 609.287, "eval_steps_per_second": 0.791, "step": 5250 }, { "epoch": 22.0, "learning_rate": 0.000234, "loss": 0.8958, "step": 5500 }, { "epoch": 22.0, "eval_loss": 0.9260903596878052, "eval_mean_acc": 8.987847368165264, "eval_median_acc": 0.0, "eval_runtime": 30.0974, "eval_samples_per_second": 613.906, "eval_steps_per_second": 0.797, "step": 5500 }, { "epoch": 23.0, "eval_loss": 0.9173910617828369, "eval_mean_acc": 15.36412671561701, "eval_median_acc": 0.0, "eval_runtime": 30.1139, "eval_samples_per_second": 613.57, "eval_steps_per_second": 0.797, "step": 5750 }, { "epoch": 24.0, "learning_rate": 0.00022799999999999999, "loss": 0.8756, "step": 6000 }, { "epoch": 24.0, "eval_loss": 0.9115529656410217, "eval_mean_acc": 14.901058980647452, "eval_median_acc": 0.0, "eval_runtime": 30.2491, "eval_samples_per_second": 610.828, "eval_steps_per_second": 0.793, "step": 6000 }, { "epoch": 25.0, "eval_loss": 0.9130357503890991, "eval_mean_acc": 12.268143458883413, "eval_median_acc": 0.0, "eval_runtime": 30.1967, "eval_samples_per_second": 611.888, "eval_steps_per_second": 0.795, "step": 6250 }, { "epoch": 26.0, "learning_rate": 0.00022199999999999998, "loss": 0.8607, "step": 6500 }, { "epoch": 26.0, "eval_loss": 0.9113653898239136, "eval_mean_acc": 15.28737721209223, "eval_median_acc": 0.0, "eval_runtime": 30.4015, "eval_samples_per_second": 607.766, "eval_steps_per_second": 0.789, "step": 6500 }, { "epoch": 27.0, "eval_loss": 0.9104825854301453, "eval_mean_acc": 24.120487175005213, "eval_median_acc": 0.0, "eval_runtime": 30.7064, "eval_samples_per_second": 601.731, "eval_steps_per_second": 0.782, "step": 6750 }, { "epoch": 28.0, "learning_rate": 0.00021599999999999996, "loss": 0.8482, "step": 7000 }, { "epoch": 28.0, "eval_loss": 0.9082564115524292, "eval_mean_acc": 18.66072430390773, "eval_median_acc": 0.0, "eval_runtime": 30.4946, "eval_samples_per_second": 605.911, "eval_steps_per_second": 0.787, "step": 7000 }, { "epoch": 29.0, "eval_loss": 0.9196337461471558, "eval_mean_acc": 17.72464537190866, "eval_median_acc": 0.0, "eval_runtime": 30.2701, "eval_samples_per_second": 610.404, "eval_steps_per_second": 0.793, "step": 7250 }, { "epoch": 30.0, "learning_rate": 0.00020999999999999998, "loss": 0.8359, "step": 7500 }, { "epoch": 30.0, "eval_loss": 0.9148876667022705, "eval_mean_acc": 19.7733289435757, "eval_median_acc": 0.0, "eval_runtime": 30.705, "eval_samples_per_second": 601.759, "eval_steps_per_second": 0.782, "step": 7500 }, { "epoch": 31.0, "eval_loss": 0.9133378863334656, "eval_mean_acc": 18.62968067275681, "eval_median_acc": 0.0, "eval_runtime": 30.3878, "eval_samples_per_second": 608.039, "eval_steps_per_second": 0.79, "step": 7750 }, { "epoch": 32.0, "learning_rate": 0.000204, "loss": 0.8232, "step": 8000 }, { "epoch": 32.0, "eval_loss": 0.9479327201843262, "eval_mean_acc": 12.27033306041223, "eval_median_acc": 0.0, "eval_runtime": 30.187, "eval_samples_per_second": 612.085, "eval_steps_per_second": 0.795, "step": 8000 }, { "epoch": 33.0, "eval_loss": 0.930338978767395, "eval_mean_acc": 19.904256184480708, "eval_median_acc": 0.0, "eval_runtime": 30.5106, "eval_samples_per_second": 605.593, "eval_steps_per_second": 0.787, "step": 8250 }, { "epoch": 34.0, "learning_rate": 0.000198, "loss": 0.8092, "step": 8500 }, { "epoch": 34.0, "eval_loss": 0.9299731254577637, "eval_mean_acc": 22.351005658701947, "eval_median_acc": 0.0, "eval_runtime": 30.5242, "eval_samples_per_second": 605.324, "eval_steps_per_second": 0.786, "step": 8500 }, { "epoch": 35.0, "eval_loss": 0.9295333027839661, "eval_mean_acc": 27.811848359305156, "eval_median_acc": 0.0, "eval_runtime": 30.6542, "eval_samples_per_second": 602.756, "eval_steps_per_second": 0.783, "step": 8750 }, { "epoch": 36.0, "learning_rate": 0.00019199999999999998, "loss": 0.7951, "step": 9000 }, { "epoch": 36.0, "eval_loss": 0.9439055323600769, "eval_mean_acc": 23.296268042588853, "eval_median_acc": 0.0, "eval_runtime": 30.5922, "eval_samples_per_second": 603.977, "eval_steps_per_second": 0.785, "step": 9000 }, { "epoch": 37.0, "eval_loss": 0.962045431137085, "eval_mean_acc": 20.907431263561396, "eval_median_acc": 0.0, "eval_runtime": 30.3915, "eval_samples_per_second": 607.966, "eval_steps_per_second": 0.79, "step": 9250 }, { "epoch": 38.0, "learning_rate": 0.000186, "loss": 0.7803, "step": 9500 }, { "epoch": 38.0, "eval_loss": 0.9570873975753784, "eval_mean_acc": 28.782833553923417, "eval_median_acc": 52.30263157894737, "eval_runtime": 30.7856, "eval_samples_per_second": 600.183, "eval_steps_per_second": 0.78, "step": 9500 }, { "epoch": 39.0, "eval_loss": 0.9814175367355347, "eval_mean_acc": 25.267935353876744, "eval_median_acc": 0.0, "eval_runtime": 30.4662, "eval_samples_per_second": 606.475, "eval_steps_per_second": 0.788, "step": 9750 }, { "epoch": 40.0, "learning_rate": 0.00017999999999999998, "loss": 0.7669, "step": 10000 }, { "epoch": 40.0, "eval_loss": 0.9786842465400696, "eval_mean_acc": 31.28196134808705, "eval_median_acc": 53.38645418326693, "eval_runtime": 30.754, "eval_samples_per_second": 600.8, "eval_steps_per_second": 0.78, "step": 10000 }, { "epoch": 41.0, "eval_loss": 0.9765278100967407, "eval_mean_acc": 28.633316896351385, "eval_median_acc": 52.20338983050847, "eval_runtime": 30.6096, "eval_samples_per_second": 603.634, "eval_steps_per_second": 0.784, "step": 10250 }, { "epoch": 42.0, "learning_rate": 0.00017399999999999997, "loss": 0.7529, "step": 10500 }, { "epoch": 42.0, "eval_loss": 1.0037761926651, "eval_mean_acc": 27.51536558563458, "eval_median_acc": 0.0, "eval_runtime": 30.6294, "eval_samples_per_second": 603.244, "eval_steps_per_second": 0.784, "step": 10500 }, { "epoch": 43.0, "eval_loss": 1.033768892288208, "eval_mean_acc": 28.494027960898663, "eval_median_acc": 52.13675213675214, "eval_runtime": 30.6823, "eval_samples_per_second": 602.204, "eval_steps_per_second": 0.782, "step": 10750 }, { "epoch": 44.0, "learning_rate": 0.000168, "loss": 0.7411, "step": 11000 }, { "epoch": 44.0, "eval_loss": 1.0279306173324585, "eval_mean_acc": 28.72059143849434, "eval_median_acc": 52.27817745803357, "eval_runtime": 30.711, "eval_samples_per_second": 601.642, "eval_steps_per_second": 0.781, "step": 11000 }, { "epoch": 45.0, "eval_loss": 1.0176538228988647, "eval_mean_acc": 29.058336469348834, "eval_median_acc": 52.41157556270096, "eval_runtime": 30.72, "eval_samples_per_second": 601.464, "eval_steps_per_second": 0.781, "step": 11250 }, { "epoch": 46.0, "learning_rate": 0.000162, "loss": 0.7299, "step": 11500 }, { "epoch": 46.0, "eval_loss": 1.0147888660430908, "eval_mean_acc": 32.92298407705084, "eval_median_acc": 53.6, "eval_runtime": 30.8969, "eval_samples_per_second": 598.021, "eval_steps_per_second": 0.777, "step": 11500 }, { "epoch": 47.0, "eval_loss": 1.0400363206863403, "eval_mean_acc": 33.51743615357999, "eval_median_acc": 53.813559322033896, "eval_runtime": 31.0611, "eval_samples_per_second": 594.86, "eval_steps_per_second": 0.773, "step": 11750 }, { "epoch": 48.0, "learning_rate": 0.000156, "loss": 0.7198, "step": 12000 }, { "epoch": 48.0, "eval_loss": 1.0477961301803589, "eval_mean_acc": 30.930946796462933, "eval_median_acc": 53.25443786982249, "eval_runtime": 31.0217, "eval_samples_per_second": 595.615, "eval_steps_per_second": 0.774, "step": 12000 }, { "epoch": 49.0, "eval_loss": 1.0538278818130493, "eval_mean_acc": 29.24450853094501, "eval_median_acc": 52.569169960474305, "eval_runtime": 30.7608, "eval_samples_per_second": 600.668, "eval_steps_per_second": 0.78, "step": 12250 }, { "epoch": 50.0, "learning_rate": 0.00015, "loss": 0.7109, "step": 12500 }, { "epoch": 50.0, "eval_loss": 1.0524476766586304, "eval_mean_acc": 27.231249267136203, "eval_median_acc": 0.0, "eval_runtime": 30.6942, "eval_samples_per_second": 601.971, "eval_steps_per_second": 0.782, "step": 12500 }, { "epoch": 51.0, "eval_loss": 1.0861831903457642, "eval_mean_acc": 33.350417690919826, "eval_median_acc": 53.72340425531915, "eval_runtime": 31.0377, "eval_samples_per_second": 595.308, "eval_steps_per_second": 0.773, "step": 12750 }, { "epoch": 52.0, "learning_rate": 0.00014399999999999998, "loss": 0.7036, "step": 13000 }, { "epoch": 52.0, "eval_loss": 1.074357271194458, "eval_mean_acc": 31.877880928875545, "eval_median_acc": 53.36787564766839, "eval_runtime": 30.8278, "eval_samples_per_second": 599.361, "eval_steps_per_second": 0.779, "step": 13000 }, { "epoch": 53.0, "eval_loss": 1.062804937362671, "eval_mean_acc": 28.372776202894872, "eval_median_acc": 51.71232876712328, "eval_runtime": 30.6841, "eval_samples_per_second": 602.169, "eval_steps_per_second": 0.782, "step": 13250 }, { "epoch": 54.0, "learning_rate": 0.000138, "loss": 0.6963, "step": 13500 }, { "epoch": 54.0, "eval_loss": 1.0586843490600586, "eval_mean_acc": 30.98220246074368, "eval_median_acc": 53.125, "eval_runtime": 31.0347, "eval_samples_per_second": 595.366, "eval_steps_per_second": 0.773, "step": 13500 }, { "epoch": 55.0, "eval_loss": 1.083398699760437, "eval_mean_acc": 33.21303099917168, "eval_median_acc": 53.57142857142857, "eval_runtime": 30.8467, "eval_samples_per_second": 598.994, "eval_steps_per_second": 0.778, "step": 13750 }, { "epoch": 56.0, "learning_rate": 0.00013199999999999998, "loss": 0.69, "step": 14000 }, { "epoch": 56.0, "eval_loss": 1.1077489852905273, "eval_mean_acc": 36.38462165724688, "eval_median_acc": 54.285714285714285, "eval_runtime": 31.0446, "eval_samples_per_second": 595.175, "eval_steps_per_second": 0.773, "step": 14000 }, { "epoch": 57.0, "eval_loss": 1.114971399307251, "eval_mean_acc": 32.35859059650532, "eval_median_acc": 53.49544072948328, "eval_runtime": 30.8614, "eval_samples_per_second": 598.708, "eval_steps_per_second": 0.778, "step": 14250 }, { "epoch": 58.0, "learning_rate": 0.00012599999999999997, "loss": 0.6855, "step": 14500 }, { "epoch": 58.0, "eval_loss": 1.1352181434631348, "eval_mean_acc": 36.9014347424795, "eval_median_acc": 54.406130268199234, "eval_runtime": 31.0913, "eval_samples_per_second": 594.282, "eval_steps_per_second": 0.772, "step": 14500 }, { "epoch": 59.0, "eval_loss": 1.1556544303894043, "eval_mean_acc": 33.88705951883505, "eval_median_acc": 53.84615384615385, "eval_runtime": 30.8633, "eval_samples_per_second": 598.672, "eval_steps_per_second": 0.778, "step": 14750 }, { "epoch": 60.0, "learning_rate": 0.00011999999999999999, "loss": 0.6811, "step": 15000 }, { "epoch": 60.0, "eval_loss": 1.1314884424209595, "eval_mean_acc": 33.595877918258616, "eval_median_acc": 53.77358490566038, "eval_runtime": 31.0682, "eval_samples_per_second": 594.724, "eval_steps_per_second": 0.772, "step": 15000 }, { "epoch": 61.0, "eval_loss": 1.0957316160202026, "eval_mean_acc": 32.99076589659652, "eval_median_acc": 53.57142857142857, "eval_runtime": 30.909, "eval_samples_per_second": 597.788, "eval_steps_per_second": 0.776, "step": 15250 }, { "epoch": 62.0, "learning_rate": 0.00011399999999999999, "loss": 0.6768, "step": 15500 }, { "epoch": 62.0, "eval_loss": 1.1236425638198853, "eval_mean_acc": 32.76527042786048, "eval_median_acc": 53.61216730038023, "eval_runtime": 31.1533, "eval_samples_per_second": 593.099, "eval_steps_per_second": 0.77, "step": 15500 }, { "epoch": 63.0, "eval_loss": 1.1152857542037964, "eval_mean_acc": 34.82832308606056, "eval_median_acc": 53.94321766561514, "eval_runtime": 31.0538, "eval_samples_per_second": 595.001, "eval_steps_per_second": 0.773, "step": 15750 }, { "epoch": 64.0, "learning_rate": 0.00010799999999999998, "loss": 0.6722, "step": 16000 }, { "epoch": 64.0, "eval_loss": 1.1300369501113892, "eval_mean_acc": 35.01100739222209, "eval_median_acc": 53.96825396825397, "eval_runtime": 30.8553, "eval_samples_per_second": 598.827, "eval_steps_per_second": 0.778, "step": 16000 }, { "epoch": 65.0, "eval_loss": 1.1825590133666992, "eval_mean_acc": 35.92286498667092, "eval_median_acc": 54.146341463414636, "eval_runtime": 30.8364, "eval_samples_per_second": 599.194, "eval_steps_per_second": 0.778, "step": 16250 }, { "epoch": 66.0, "learning_rate": 0.000102, "loss": 0.6682, "step": 16500 }, { "epoch": 66.0, "eval_loss": 1.153441309928894, "eval_mean_acc": 38.55105344645134, "eval_median_acc": 54.48504983388705, "eval_runtime": 31.0294, "eval_samples_per_second": 595.468, "eval_steps_per_second": 0.773, "step": 16500 }, { "epoch": 67.0, "eval_loss": 1.1635504961013794, "eval_mean_acc": 35.828909444304365, "eval_median_acc": 54.09836065573771, "eval_runtime": 30.7671, "eval_samples_per_second": 600.544, "eval_steps_per_second": 0.78, "step": 16750 }, { "epoch": 68.0, "learning_rate": 9.599999999999999e-05, "loss": 0.6653, "step": 17000 }, { "epoch": 68.0, "eval_loss": 1.1404204368591309, "eval_mean_acc": 34.65428003659497, "eval_median_acc": 53.84615384615385, "eval_runtime": 30.8901, "eval_samples_per_second": 598.152, "eval_steps_per_second": 0.777, "step": 17000 }, { "epoch": 69.0, "eval_loss": 1.1473366022109985, "eval_mean_acc": 36.24934966791388, "eval_median_acc": 54.12087912087912, "eval_runtime": 31.0971, "eval_samples_per_second": 594.172, "eval_steps_per_second": 0.772, "step": 17250 }, { "epoch": 70.0, "learning_rate": 8.999999999999999e-05, "loss": 0.6624, "step": 17500 }, { "epoch": 70.0, "eval_loss": 1.1532074213027954, "eval_mean_acc": 39.51401788696222, "eval_median_acc": 54.5774647887324, "eval_runtime": 31.0363, "eval_samples_per_second": 595.334, "eval_steps_per_second": 0.773, "step": 17500 }, { "epoch": 71.0, "eval_loss": 1.1715244054794312, "eval_mean_acc": 36.23543546260951, "eval_median_acc": 54.12541254125413, "eval_runtime": 30.897, "eval_samples_per_second": 598.02, "eval_steps_per_second": 0.777, "step": 17750 }, { "epoch": 72.0, "learning_rate": 8.4e-05, "loss": 0.6597, "step": 18000 }, { "epoch": 72.0, "eval_loss": 1.187477469444275, "eval_mean_acc": 35.50233390601532, "eval_median_acc": 54.08560311284047, "eval_runtime": 30.998, "eval_samples_per_second": 596.07, "eval_steps_per_second": 0.774, "step": 18000 }, { "epoch": 73.0, "eval_loss": 1.164323329925537, "eval_mean_acc": 34.50470237269365, "eval_median_acc": 53.90243902439025, "eval_runtime": 30.8465, "eval_samples_per_second": 598.997, "eval_steps_per_second": 0.778, "step": 18250 }, { "epoch": 74.0, "learning_rate": 7.8e-05, "loss": 0.657, "step": 18500 }, { "epoch": 74.0, "eval_loss": 1.1893519163131714, "eval_mean_acc": 38.75609974678352, "eval_median_acc": 54.492753623188406, "eval_runtime": 31.2805, "eval_samples_per_second": 590.688, "eval_steps_per_second": 0.767, "step": 18500 }, { "epoch": 75.0, "eval_loss": 1.208187222480774, "eval_mean_acc": 38.215720244506755, "eval_median_acc": 54.460093896713616, "eval_runtime": 31.3374, "eval_samples_per_second": 589.615, "eval_steps_per_second": 0.766, "step": 18750 }, { "epoch": 76.0, "learning_rate": 7.199999999999999e-05, "loss": 0.6543, "step": 19000 }, { "epoch": 76.0, "eval_loss": 1.1842811107635498, "eval_mean_acc": 34.28566002554328, "eval_median_acc": 53.883495145631066, "eval_runtime": 30.9371, "eval_samples_per_second": 597.244, "eval_steps_per_second": 0.776, "step": 19000 }, { "epoch": 77.0, "eval_loss": 1.1689387559890747, "eval_mean_acc": 38.42640276827011, "eval_median_acc": 54.43548387096774, "eval_runtime": 31.0373, "eval_samples_per_second": 595.316, "eval_steps_per_second": 0.773, "step": 19250 }, { "epoch": 78.0, "learning_rate": 6.599999999999999e-05, "loss": 0.652, "step": 19500 }, { "epoch": 78.0, "eval_loss": 1.2084593772888184, "eval_mean_acc": 37.749512857893784, "eval_median_acc": 54.37499999999999, "eval_runtime": 31.1448, "eval_samples_per_second": 593.261, "eval_steps_per_second": 0.771, "step": 19500 }, { "epoch": 79.0, "eval_loss": 1.1984684467315674, "eval_mean_acc": 39.14441481832044, "eval_median_acc": 54.518950437317784, "eval_runtime": 30.9307, "eval_samples_per_second": 597.368, "eval_steps_per_second": 0.776, "step": 19750 }, { "epoch": 80.0, "learning_rate": 5.9999999999999995e-05, "loss": 0.6497, "step": 20000 }, { "epoch": 80.0, "eval_loss": 1.2331078052520752, "eval_mean_acc": 40.01896688672192, "eval_median_acc": 54.61254612546126, "eval_runtime": 31.1205, "eval_samples_per_second": 593.724, "eval_steps_per_second": 0.771, "step": 20000 }, { "epoch": 81.0, "eval_loss": 1.2402710914611816, "eval_mean_acc": 39.05107714371045, "eval_median_acc": 54.52054794520548, "eval_runtime": 30.8964, "eval_samples_per_second": 598.031, "eval_steps_per_second": 0.777, "step": 20250 }, { "epoch": 82.0, "learning_rate": 5.399999999999999e-05, "loss": 0.6476, "step": 20500 }, { "epoch": 82.0, "eval_loss": 1.1909747123718262, "eval_mean_acc": 37.29363801013909, "eval_median_acc": 54.24528301886793, "eval_runtime": 31.0069, "eval_samples_per_second": 595.9, "eval_steps_per_second": 0.774, "step": 20500 }, { "epoch": 83.0, "eval_loss": 1.2035155296325684, "eval_mean_acc": 41.25721916721249, "eval_median_acc": 54.7244094488189, "eval_runtime": 31.1934, "eval_samples_per_second": 592.336, "eval_steps_per_second": 0.769, "step": 20750 }, { "epoch": 84.0, "learning_rate": 4.7999999999999994e-05, "loss": 0.6457, "step": 21000 }, { "epoch": 84.0, "eval_loss": 1.2123523950576782, "eval_mean_acc": 38.7564566539536, "eval_median_acc": 54.4891640866873, "eval_runtime": 31.1487, "eval_samples_per_second": 593.187, "eval_steps_per_second": 0.77, "step": 21000 }, { "epoch": 85.0, "eval_loss": 1.232680320739746, "eval_mean_acc": 39.12864363006366, "eval_median_acc": 54.495912806539515, "eval_runtime": 30.8988, "eval_samples_per_second": 597.985, "eval_steps_per_second": 0.777, "step": 21250 }, { "epoch": 86.0, "learning_rate": 4.2e-05, "loss": 0.6437, "step": 21500 }, { "epoch": 86.0, "eval_loss": 1.22517991065979, "eval_mean_acc": 39.8797101803931, "eval_median_acc": 54.5774647887324, "eval_runtime": 31.1149, "eval_samples_per_second": 593.83, "eval_steps_per_second": 0.771, "step": 21500 }, { "epoch": 87.0, "eval_loss": 1.2346075773239136, "eval_mean_acc": 38.11276454520886, "eval_median_acc": 54.385964912280706, "eval_runtime": 31.1511, "eval_samples_per_second": 593.142, "eval_steps_per_second": 0.77, "step": 21750 }, { "epoch": 88.0, "learning_rate": 3.5999999999999994e-05, "loss": 0.642, "step": 22000 }, { "epoch": 88.0, "eval_loss": 1.244125247001648, "eval_mean_acc": 40.43582155286865, "eval_median_acc": 54.65116279069767, "eval_runtime": 31.1673, "eval_samples_per_second": 592.833, "eval_steps_per_second": 0.77, "step": 22000 }, { "epoch": 89.0, "eval_loss": 1.24880850315094, "eval_mean_acc": 39.99124281027319, "eval_median_acc": 54.59770114942529, "eval_runtime": 31.0818, "eval_samples_per_second": 594.463, "eval_steps_per_second": 0.772, "step": 22250 }, { "epoch": 90.0, "learning_rate": 2.9999999999999997e-05, "loss": 0.6403, "step": 22500 }, { "epoch": 90.0, "eval_loss": 1.2482763528823853, "eval_mean_acc": 39.91157811070793, "eval_median_acc": 54.60526315789473, "eval_runtime": 31.194, "eval_samples_per_second": 592.325, "eval_steps_per_second": 0.769, "step": 22500 }, { "epoch": 91.0, "eval_loss": 1.2674145698547363, "eval_mean_acc": 40.94577170886235, "eval_median_acc": 54.700854700854705, "eval_runtime": 31.202, "eval_samples_per_second": 592.174, "eval_steps_per_second": 0.769, "step": 22750 }, { "epoch": 92.0, "learning_rate": 2.3999999999999997e-05, "loss": 0.6387, "step": 23000 }, { "epoch": 92.0, "eval_loss": 1.269442081451416, "eval_mean_acc": 40.442322594799656, "eval_median_acc": 54.63576158940398, "eval_runtime": 31.0407, "eval_samples_per_second": 595.25, "eval_steps_per_second": 0.773, "step": 23000 }, { "epoch": 93.0, "eval_loss": 1.2716701030731201, "eval_mean_acc": 40.739734228756824, "eval_median_acc": 54.67625899280576, "eval_runtime": 31.2132, "eval_samples_per_second": 591.961, "eval_steps_per_second": 0.769, "step": 23250 }, { "epoch": 94.0, "learning_rate": 1.7999999999999997e-05, "loss": 0.6371, "step": 23500 }, { "epoch": 94.0, "eval_loss": 1.2819631099700928, "eval_mean_acc": 40.571867011274925, "eval_median_acc": 54.63917525773196, "eval_runtime": 31.0385, "eval_samples_per_second": 595.292, "eval_steps_per_second": 0.773, "step": 23500 }, { "epoch": 95.0, "eval_loss": 1.28830885887146, "eval_mean_acc": 40.353439886436945, "eval_median_acc": 54.666666666666664, "eval_runtime": 31.183, "eval_samples_per_second": 592.534, "eval_steps_per_second": 0.77, "step": 23750 }, { "epoch": 96.0, "learning_rate": 1.1999999999999999e-05, "loss": 0.6358, "step": 24000 }, { "epoch": 96.0, "eval_loss": 1.30391263961792, "eval_mean_acc": 40.60459621916925, "eval_median_acc": 54.666666666666664, "eval_runtime": 31.0782, "eval_samples_per_second": 594.532, "eval_steps_per_second": 0.772, "step": 24000 }, { "epoch": 97.0, "eval_loss": 1.3067735433578491, "eval_mean_acc": 41.452516923874725, "eval_median_acc": 54.773869346733676, "eval_runtime": 30.922, "eval_samples_per_second": 597.536, "eval_steps_per_second": 0.776, "step": 24250 }, { "epoch": 98.0, "learning_rate": 5.999999999999999e-06, "loss": 0.6347, "step": 24500 }, { "epoch": 98.0, "eval_loss": 1.313217282295227, "eval_mean_acc": 41.42466457051602, "eval_median_acc": 54.75409836065573, "eval_runtime": 31.2774, "eval_samples_per_second": 590.746, "eval_steps_per_second": 0.767, "step": 24500 }, { "epoch": 99.0, "eval_loss": 1.3124245405197144, "eval_mean_acc": 41.25630205018876, "eval_median_acc": 54.74452554744526, "eval_runtime": 30.8857, "eval_samples_per_second": 598.238, "eval_steps_per_second": 0.777, "step": 24750 }, { "epoch": 100.0, "learning_rate": 0.0, "loss": 0.6339, "step": 25000 }, { "epoch": 100.0, "eval_loss": 1.3174444437026978, "eval_mean_acc": 41.405191345713106, "eval_median_acc": 54.75578406169666, "eval_runtime": 31.1131, "eval_samples_per_second": 593.865, "eval_steps_per_second": 0.771, "step": 25000 }, { "epoch": 100.0, "step": 25000, "total_flos": 1.660761144e+18, "train_loss": 0.7862733935546875, "train_runtime": 36507.5594, "train_samples_per_second": 273.916, "train_steps_per_second": 0.685 } ], "max_steps": 25000, "num_train_epochs": 100, "total_flos": 1.660761144e+18, "trial_name": null, "trial_params": null }