|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 100.0, |
|
"global_step": 25000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.2708368301391602, |
|
"eval_mean_acc": 0.0, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.5811, |
|
"eval_samples_per_second": 624.621, |
|
"eval_steps_per_second": 0.811, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.000294, |
|
"loss": 1.2586, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.240064024925232, |
|
"eval_mean_acc": 0.0, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 28.81, |
|
"eval_samples_per_second": 641.34, |
|
"eval_steps_per_second": 0.833, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.221199631690979, |
|
"eval_mean_acc": 0.0, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 28.8341, |
|
"eval_samples_per_second": 640.805, |
|
"eval_steps_per_second": 0.832, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00028799999999999995, |
|
"loss": 1.1999, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.1985267400741577, |
|
"eval_mean_acc": 0.002639524371016869, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.2636, |
|
"eval_samples_per_second": 631.399, |
|
"eval_steps_per_second": 0.82, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.1824160814285278, |
|
"eval_mean_acc": 0.012239608588524707, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.1488, |
|
"eval_samples_per_second": 633.885, |
|
"eval_steps_per_second": 0.823, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00028199999999999997, |
|
"loss": 1.1635, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.1715551614761353, |
|
"eval_mean_acc": 0.030072790065877857, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.3036, |
|
"eval_samples_per_second": 630.536, |
|
"eval_steps_per_second": 0.819, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.156156301498413, |
|
"eval_mean_acc": 0.029585336742049613, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.2009, |
|
"eval_samples_per_second": 632.755, |
|
"eval_steps_per_second": 0.822, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.000276, |
|
"loss": 1.1361, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.1506972312927246, |
|
"eval_mean_acc": 0.008759450723621017, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.1539, |
|
"eval_samples_per_second": 633.775, |
|
"eval_steps_per_second": 0.823, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.1393311023712158, |
|
"eval_mean_acc": 0.06489605427376179, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.65, |
|
"eval_samples_per_second": 623.17, |
|
"eval_steps_per_second": 0.809, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.00027, |
|
"loss": 1.1142, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.1311384439468384, |
|
"eval_mean_acc": 0.015000378543482608, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.295, |
|
"eval_samples_per_second": 630.723, |
|
"eval_steps_per_second": 0.819, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.0849117040634155, |
|
"eval_mean_acc": 0.053357515682597535, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.2649, |
|
"eval_samples_per_second": 631.37, |
|
"eval_steps_per_second": 0.82, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.00026399999999999997, |
|
"loss": 1.0648, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.0643727779388428, |
|
"eval_mean_acc": 0.09116804447578762, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.1753, |
|
"eval_samples_per_second": 633.309, |
|
"eval_steps_per_second": 0.823, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.0415236949920654, |
|
"eval_mean_acc": 0.1348069252298496, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.2028, |
|
"eval_samples_per_second": 632.713, |
|
"eval_steps_per_second": 0.822, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.000258, |
|
"loss": 1.0185, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.0389618873596191, |
|
"eval_mean_acc": 0.0442671721053236, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.3877, |
|
"eval_samples_per_second": 628.732, |
|
"eval_steps_per_second": 0.817, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.0213755369186401, |
|
"eval_mean_acc": 0.11202564075823995, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.1898, |
|
"eval_samples_per_second": 632.996, |
|
"eval_steps_per_second": 0.822, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.00025199999999999995, |
|
"loss": 0.9951, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.015223741531372, |
|
"eval_mean_acc": 0.16473584913990302, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.2399, |
|
"eval_samples_per_second": 631.91, |
|
"eval_steps_per_second": 0.821, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.019250750541687, |
|
"eval_mean_acc": 0.11940677048185683, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.2372, |
|
"eval_samples_per_second": 631.97, |
|
"eval_steps_per_second": 0.821, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 0.00024599999999999996, |
|
"loss": 0.9813, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.00924813747406, |
|
"eval_mean_acc": 0.11822115362550029, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.1553, |
|
"eval_samples_per_second": 633.744, |
|
"eval_steps_per_second": 0.823, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.0164929628372192, |
|
"eval_mean_acc": 0.06716894444980748, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 29.1521, |
|
"eval_samples_per_second": 633.813, |
|
"eval_steps_per_second": 0.823, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.9625, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.9429653286933899, |
|
"eval_mean_acc": 9.008243218749312, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.0347, |
|
"eval_samples_per_second": 615.187, |
|
"eval_steps_per_second": 0.799, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.9300616979598999, |
|
"eval_mean_acc": 13.245211581468084, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.3256, |
|
"eval_samples_per_second": 609.287, |
|
"eval_steps_per_second": 0.791, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 0.000234, |
|
"loss": 0.8958, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.9260903596878052, |
|
"eval_mean_acc": 8.987847368165264, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.0974, |
|
"eval_samples_per_second": 613.906, |
|
"eval_steps_per_second": 0.797, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.9173910617828369, |
|
"eval_mean_acc": 15.36412671561701, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.1139, |
|
"eval_samples_per_second": 613.57, |
|
"eval_steps_per_second": 0.797, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 0.00022799999999999999, |
|
"loss": 0.8756, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.9115529656410217, |
|
"eval_mean_acc": 14.901058980647452, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.2491, |
|
"eval_samples_per_second": 610.828, |
|
"eval_steps_per_second": 0.793, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.9130357503890991, |
|
"eval_mean_acc": 12.268143458883413, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.1967, |
|
"eval_samples_per_second": 611.888, |
|
"eval_steps_per_second": 0.795, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 0.00022199999999999998, |
|
"loss": 0.8607, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.9113653898239136, |
|
"eval_mean_acc": 15.28737721209223, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.4015, |
|
"eval_samples_per_second": 607.766, |
|
"eval_steps_per_second": 0.789, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.9104825854301453, |
|
"eval_mean_acc": 24.120487175005213, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.7064, |
|
"eval_samples_per_second": 601.731, |
|
"eval_steps_per_second": 0.782, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 0.00021599999999999996, |
|
"loss": 0.8482, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.9082564115524292, |
|
"eval_mean_acc": 18.66072430390773, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.4946, |
|
"eval_samples_per_second": 605.911, |
|
"eval_steps_per_second": 0.787, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.9196337461471558, |
|
"eval_mean_acc": 17.72464537190866, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.2701, |
|
"eval_samples_per_second": 610.404, |
|
"eval_steps_per_second": 0.793, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.8359, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.9148876667022705, |
|
"eval_mean_acc": 19.7733289435757, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.705, |
|
"eval_samples_per_second": 601.759, |
|
"eval_steps_per_second": 0.782, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 0.9133378863334656, |
|
"eval_mean_acc": 18.62968067275681, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.3878, |
|
"eval_samples_per_second": 608.039, |
|
"eval_steps_per_second": 0.79, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 0.000204, |
|
"loss": 0.8232, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.9479327201843262, |
|
"eval_mean_acc": 12.27033306041223, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.187, |
|
"eval_samples_per_second": 612.085, |
|
"eval_steps_per_second": 0.795, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 0.930338978767395, |
|
"eval_mean_acc": 19.904256184480708, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.5106, |
|
"eval_samples_per_second": 605.593, |
|
"eval_steps_per_second": 0.787, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 0.000198, |
|
"loss": 0.8092, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.9299731254577637, |
|
"eval_mean_acc": 22.351005658701947, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.5242, |
|
"eval_samples_per_second": 605.324, |
|
"eval_steps_per_second": 0.786, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 0.9295333027839661, |
|
"eval_mean_acc": 27.811848359305156, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.6542, |
|
"eval_samples_per_second": 602.756, |
|
"eval_steps_per_second": 0.783, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 0.00019199999999999998, |
|
"loss": 0.7951, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.9439055323600769, |
|
"eval_mean_acc": 23.296268042588853, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.5922, |
|
"eval_samples_per_second": 603.977, |
|
"eval_steps_per_second": 0.785, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 0.962045431137085, |
|
"eval_mean_acc": 20.907431263561396, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.3915, |
|
"eval_samples_per_second": 607.966, |
|
"eval_steps_per_second": 0.79, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 0.000186, |
|
"loss": 0.7803, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.9570873975753784, |
|
"eval_mean_acc": 28.782833553923417, |
|
"eval_median_acc": 52.30263157894737, |
|
"eval_runtime": 30.7856, |
|
"eval_samples_per_second": 600.183, |
|
"eval_steps_per_second": 0.78, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 0.9814175367355347, |
|
"eval_mean_acc": 25.267935353876744, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.4662, |
|
"eval_samples_per_second": 606.475, |
|
"eval_steps_per_second": 0.788, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.7669, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.9786842465400696, |
|
"eval_mean_acc": 31.28196134808705, |
|
"eval_median_acc": 53.38645418326693, |
|
"eval_runtime": 30.754, |
|
"eval_samples_per_second": 600.8, |
|
"eval_steps_per_second": 0.78, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 0.9765278100967407, |
|
"eval_mean_acc": 28.633316896351385, |
|
"eval_median_acc": 52.20338983050847, |
|
"eval_runtime": 30.6096, |
|
"eval_samples_per_second": 603.634, |
|
"eval_steps_per_second": 0.784, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 0.00017399999999999997, |
|
"loss": 0.7529, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 1.0037761926651, |
|
"eval_mean_acc": 27.51536558563458, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.6294, |
|
"eval_samples_per_second": 603.244, |
|
"eval_steps_per_second": 0.784, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 1.033768892288208, |
|
"eval_mean_acc": 28.494027960898663, |
|
"eval_median_acc": 52.13675213675214, |
|
"eval_runtime": 30.6823, |
|
"eval_samples_per_second": 602.204, |
|
"eval_steps_per_second": 0.782, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 0.000168, |
|
"loss": 0.7411, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 1.0279306173324585, |
|
"eval_mean_acc": 28.72059143849434, |
|
"eval_median_acc": 52.27817745803357, |
|
"eval_runtime": 30.711, |
|
"eval_samples_per_second": 601.642, |
|
"eval_steps_per_second": 0.781, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 1.0176538228988647, |
|
"eval_mean_acc": 29.058336469348834, |
|
"eval_median_acc": 52.41157556270096, |
|
"eval_runtime": 30.72, |
|
"eval_samples_per_second": 601.464, |
|
"eval_steps_per_second": 0.781, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 0.000162, |
|
"loss": 0.7299, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 1.0147888660430908, |
|
"eval_mean_acc": 32.92298407705084, |
|
"eval_median_acc": 53.6, |
|
"eval_runtime": 30.8969, |
|
"eval_samples_per_second": 598.021, |
|
"eval_steps_per_second": 0.777, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 1.0400363206863403, |
|
"eval_mean_acc": 33.51743615357999, |
|
"eval_median_acc": 53.813559322033896, |
|
"eval_runtime": 31.0611, |
|
"eval_samples_per_second": 594.86, |
|
"eval_steps_per_second": 0.773, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 0.000156, |
|
"loss": 0.7198, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 1.0477961301803589, |
|
"eval_mean_acc": 30.930946796462933, |
|
"eval_median_acc": 53.25443786982249, |
|
"eval_runtime": 31.0217, |
|
"eval_samples_per_second": 595.615, |
|
"eval_steps_per_second": 0.774, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 1.0538278818130493, |
|
"eval_mean_acc": 29.24450853094501, |
|
"eval_median_acc": 52.569169960474305, |
|
"eval_runtime": 30.7608, |
|
"eval_samples_per_second": 600.668, |
|
"eval_steps_per_second": 0.78, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 0.00015, |
|
"loss": 0.7109, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 1.0524476766586304, |
|
"eval_mean_acc": 27.231249267136203, |
|
"eval_median_acc": 0.0, |
|
"eval_runtime": 30.6942, |
|
"eval_samples_per_second": 601.971, |
|
"eval_steps_per_second": 0.782, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 1.0861831903457642, |
|
"eval_mean_acc": 33.350417690919826, |
|
"eval_median_acc": 53.72340425531915, |
|
"eval_runtime": 31.0377, |
|
"eval_samples_per_second": 595.308, |
|
"eval_steps_per_second": 0.773, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"learning_rate": 0.00014399999999999998, |
|
"loss": 0.7036, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_loss": 1.074357271194458, |
|
"eval_mean_acc": 31.877880928875545, |
|
"eval_median_acc": 53.36787564766839, |
|
"eval_runtime": 30.8278, |
|
"eval_samples_per_second": 599.361, |
|
"eval_steps_per_second": 0.779, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_loss": 1.062804937362671, |
|
"eval_mean_acc": 28.372776202894872, |
|
"eval_median_acc": 51.71232876712328, |
|
"eval_runtime": 30.6841, |
|
"eval_samples_per_second": 602.169, |
|
"eval_steps_per_second": 0.782, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"learning_rate": 0.000138, |
|
"loss": 0.6963, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_loss": 1.0586843490600586, |
|
"eval_mean_acc": 30.98220246074368, |
|
"eval_median_acc": 53.125, |
|
"eval_runtime": 31.0347, |
|
"eval_samples_per_second": 595.366, |
|
"eval_steps_per_second": 0.773, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_loss": 1.083398699760437, |
|
"eval_mean_acc": 33.21303099917168, |
|
"eval_median_acc": 53.57142857142857, |
|
"eval_runtime": 30.8467, |
|
"eval_samples_per_second": 598.994, |
|
"eval_steps_per_second": 0.778, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"learning_rate": 0.00013199999999999998, |
|
"loss": 0.69, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_loss": 1.1077489852905273, |
|
"eval_mean_acc": 36.38462165724688, |
|
"eval_median_acc": 54.285714285714285, |
|
"eval_runtime": 31.0446, |
|
"eval_samples_per_second": 595.175, |
|
"eval_steps_per_second": 0.773, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_loss": 1.114971399307251, |
|
"eval_mean_acc": 32.35859059650532, |
|
"eval_median_acc": 53.49544072948328, |
|
"eval_runtime": 30.8614, |
|
"eval_samples_per_second": 598.708, |
|
"eval_steps_per_second": 0.778, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"learning_rate": 0.00012599999999999997, |
|
"loss": 0.6855, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_loss": 1.1352181434631348, |
|
"eval_mean_acc": 36.9014347424795, |
|
"eval_median_acc": 54.406130268199234, |
|
"eval_runtime": 31.0913, |
|
"eval_samples_per_second": 594.282, |
|
"eval_steps_per_second": 0.772, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_loss": 1.1556544303894043, |
|
"eval_mean_acc": 33.88705951883505, |
|
"eval_median_acc": 53.84615384615385, |
|
"eval_runtime": 30.8633, |
|
"eval_samples_per_second": 598.672, |
|
"eval_steps_per_second": 0.778, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.6811, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_loss": 1.1314884424209595, |
|
"eval_mean_acc": 33.595877918258616, |
|
"eval_median_acc": 53.77358490566038, |
|
"eval_runtime": 31.0682, |
|
"eval_samples_per_second": 594.724, |
|
"eval_steps_per_second": 0.772, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_loss": 1.0957316160202026, |
|
"eval_mean_acc": 32.99076589659652, |
|
"eval_median_acc": 53.57142857142857, |
|
"eval_runtime": 30.909, |
|
"eval_samples_per_second": 597.788, |
|
"eval_steps_per_second": 0.776, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"learning_rate": 0.00011399999999999999, |
|
"loss": 0.6768, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_loss": 1.1236425638198853, |
|
"eval_mean_acc": 32.76527042786048, |
|
"eval_median_acc": 53.61216730038023, |
|
"eval_runtime": 31.1533, |
|
"eval_samples_per_second": 593.099, |
|
"eval_steps_per_second": 0.77, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_loss": 1.1152857542037964, |
|
"eval_mean_acc": 34.82832308606056, |
|
"eval_median_acc": 53.94321766561514, |
|
"eval_runtime": 31.0538, |
|
"eval_samples_per_second": 595.001, |
|
"eval_steps_per_second": 0.773, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 0.00010799999999999998, |
|
"loss": 0.6722, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_loss": 1.1300369501113892, |
|
"eval_mean_acc": 35.01100739222209, |
|
"eval_median_acc": 53.96825396825397, |
|
"eval_runtime": 30.8553, |
|
"eval_samples_per_second": 598.827, |
|
"eval_steps_per_second": 0.778, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_loss": 1.1825590133666992, |
|
"eval_mean_acc": 35.92286498667092, |
|
"eval_median_acc": 54.146341463414636, |
|
"eval_runtime": 30.8364, |
|
"eval_samples_per_second": 599.194, |
|
"eval_steps_per_second": 0.778, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"learning_rate": 0.000102, |
|
"loss": 0.6682, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_loss": 1.153441309928894, |
|
"eval_mean_acc": 38.55105344645134, |
|
"eval_median_acc": 54.48504983388705, |
|
"eval_runtime": 31.0294, |
|
"eval_samples_per_second": 595.468, |
|
"eval_steps_per_second": 0.773, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_loss": 1.1635504961013794, |
|
"eval_mean_acc": 35.828909444304365, |
|
"eval_median_acc": 54.09836065573771, |
|
"eval_runtime": 30.7671, |
|
"eval_samples_per_second": 600.544, |
|
"eval_steps_per_second": 0.78, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"learning_rate": 9.599999999999999e-05, |
|
"loss": 0.6653, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_loss": 1.1404204368591309, |
|
"eval_mean_acc": 34.65428003659497, |
|
"eval_median_acc": 53.84615384615385, |
|
"eval_runtime": 30.8901, |
|
"eval_samples_per_second": 598.152, |
|
"eval_steps_per_second": 0.777, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_loss": 1.1473366022109985, |
|
"eval_mean_acc": 36.24934966791388, |
|
"eval_median_acc": 54.12087912087912, |
|
"eval_runtime": 31.0971, |
|
"eval_samples_per_second": 594.172, |
|
"eval_steps_per_second": 0.772, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 0.6624, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_loss": 1.1532074213027954, |
|
"eval_mean_acc": 39.51401788696222, |
|
"eval_median_acc": 54.5774647887324, |
|
"eval_runtime": 31.0363, |
|
"eval_samples_per_second": 595.334, |
|
"eval_steps_per_second": 0.773, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_loss": 1.1715244054794312, |
|
"eval_mean_acc": 36.23543546260951, |
|
"eval_median_acc": 54.12541254125413, |
|
"eval_runtime": 30.897, |
|
"eval_samples_per_second": 598.02, |
|
"eval_steps_per_second": 0.777, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"learning_rate": 8.4e-05, |
|
"loss": 0.6597, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_loss": 1.187477469444275, |
|
"eval_mean_acc": 35.50233390601532, |
|
"eval_median_acc": 54.08560311284047, |
|
"eval_runtime": 30.998, |
|
"eval_samples_per_second": 596.07, |
|
"eval_steps_per_second": 0.774, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_loss": 1.164323329925537, |
|
"eval_mean_acc": 34.50470237269365, |
|
"eval_median_acc": 53.90243902439025, |
|
"eval_runtime": 30.8465, |
|
"eval_samples_per_second": 598.997, |
|
"eval_steps_per_second": 0.778, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"learning_rate": 7.8e-05, |
|
"loss": 0.657, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_loss": 1.1893519163131714, |
|
"eval_mean_acc": 38.75609974678352, |
|
"eval_median_acc": 54.492753623188406, |
|
"eval_runtime": 31.2805, |
|
"eval_samples_per_second": 590.688, |
|
"eval_steps_per_second": 0.767, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_loss": 1.208187222480774, |
|
"eval_mean_acc": 38.215720244506755, |
|
"eval_median_acc": 54.460093896713616, |
|
"eval_runtime": 31.3374, |
|
"eval_samples_per_second": 589.615, |
|
"eval_steps_per_second": 0.766, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"learning_rate": 7.199999999999999e-05, |
|
"loss": 0.6543, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_loss": 1.1842811107635498, |
|
"eval_mean_acc": 34.28566002554328, |
|
"eval_median_acc": 53.883495145631066, |
|
"eval_runtime": 30.9371, |
|
"eval_samples_per_second": 597.244, |
|
"eval_steps_per_second": 0.776, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_loss": 1.1689387559890747, |
|
"eval_mean_acc": 38.42640276827011, |
|
"eval_median_acc": 54.43548387096774, |
|
"eval_runtime": 31.0373, |
|
"eval_samples_per_second": 595.316, |
|
"eval_steps_per_second": 0.773, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"learning_rate": 6.599999999999999e-05, |
|
"loss": 0.652, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_loss": 1.2084593772888184, |
|
"eval_mean_acc": 37.749512857893784, |
|
"eval_median_acc": 54.37499999999999, |
|
"eval_runtime": 31.1448, |
|
"eval_samples_per_second": 593.261, |
|
"eval_steps_per_second": 0.771, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_loss": 1.1984684467315674, |
|
"eval_mean_acc": 39.14441481832044, |
|
"eval_median_acc": 54.518950437317784, |
|
"eval_runtime": 30.9307, |
|
"eval_samples_per_second": 597.368, |
|
"eval_steps_per_second": 0.776, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 0.6497, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_loss": 1.2331078052520752, |
|
"eval_mean_acc": 40.01896688672192, |
|
"eval_median_acc": 54.61254612546126, |
|
"eval_runtime": 31.1205, |
|
"eval_samples_per_second": 593.724, |
|
"eval_steps_per_second": 0.771, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_loss": 1.2402710914611816, |
|
"eval_mean_acc": 39.05107714371045, |
|
"eval_median_acc": 54.52054794520548, |
|
"eval_runtime": 30.8964, |
|
"eval_samples_per_second": 598.031, |
|
"eval_steps_per_second": 0.777, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"learning_rate": 5.399999999999999e-05, |
|
"loss": 0.6476, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_loss": 1.1909747123718262, |
|
"eval_mean_acc": 37.29363801013909, |
|
"eval_median_acc": 54.24528301886793, |
|
"eval_runtime": 31.0069, |
|
"eval_samples_per_second": 595.9, |
|
"eval_steps_per_second": 0.774, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_loss": 1.2035155296325684, |
|
"eval_mean_acc": 41.25721916721249, |
|
"eval_median_acc": 54.7244094488189, |
|
"eval_runtime": 31.1934, |
|
"eval_samples_per_second": 592.336, |
|
"eval_steps_per_second": 0.769, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"learning_rate": 4.7999999999999994e-05, |
|
"loss": 0.6457, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_loss": 1.2123523950576782, |
|
"eval_mean_acc": 38.7564566539536, |
|
"eval_median_acc": 54.4891640866873, |
|
"eval_runtime": 31.1487, |
|
"eval_samples_per_second": 593.187, |
|
"eval_steps_per_second": 0.77, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_loss": 1.232680320739746, |
|
"eval_mean_acc": 39.12864363006366, |
|
"eval_median_acc": 54.495912806539515, |
|
"eval_runtime": 30.8988, |
|
"eval_samples_per_second": 597.985, |
|
"eval_steps_per_second": 0.777, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.6437, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_loss": 1.22517991065979, |
|
"eval_mean_acc": 39.8797101803931, |
|
"eval_median_acc": 54.5774647887324, |
|
"eval_runtime": 31.1149, |
|
"eval_samples_per_second": 593.83, |
|
"eval_steps_per_second": 0.771, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_loss": 1.2346075773239136, |
|
"eval_mean_acc": 38.11276454520886, |
|
"eval_median_acc": 54.385964912280706, |
|
"eval_runtime": 31.1511, |
|
"eval_samples_per_second": 593.142, |
|
"eval_steps_per_second": 0.77, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"learning_rate": 3.5999999999999994e-05, |
|
"loss": 0.642, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_loss": 1.244125247001648, |
|
"eval_mean_acc": 40.43582155286865, |
|
"eval_median_acc": 54.65116279069767, |
|
"eval_runtime": 31.1673, |
|
"eval_samples_per_second": 592.833, |
|
"eval_steps_per_second": 0.77, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_loss": 1.24880850315094, |
|
"eval_mean_acc": 39.99124281027319, |
|
"eval_median_acc": 54.59770114942529, |
|
"eval_runtime": 31.0818, |
|
"eval_samples_per_second": 594.463, |
|
"eval_steps_per_second": 0.772, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.6403, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_loss": 1.2482763528823853, |
|
"eval_mean_acc": 39.91157811070793, |
|
"eval_median_acc": 54.60526315789473, |
|
"eval_runtime": 31.194, |
|
"eval_samples_per_second": 592.325, |
|
"eval_steps_per_second": 0.769, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_loss": 1.2674145698547363, |
|
"eval_mean_acc": 40.94577170886235, |
|
"eval_median_acc": 54.700854700854705, |
|
"eval_runtime": 31.202, |
|
"eval_samples_per_second": 592.174, |
|
"eval_steps_per_second": 0.769, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"learning_rate": 2.3999999999999997e-05, |
|
"loss": 0.6387, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_loss": 1.269442081451416, |
|
"eval_mean_acc": 40.442322594799656, |
|
"eval_median_acc": 54.63576158940398, |
|
"eval_runtime": 31.0407, |
|
"eval_samples_per_second": 595.25, |
|
"eval_steps_per_second": 0.773, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_loss": 1.2716701030731201, |
|
"eval_mean_acc": 40.739734228756824, |
|
"eval_median_acc": 54.67625899280576, |
|
"eval_runtime": 31.2132, |
|
"eval_samples_per_second": 591.961, |
|
"eval_steps_per_second": 0.769, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"learning_rate": 1.7999999999999997e-05, |
|
"loss": 0.6371, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_loss": 1.2819631099700928, |
|
"eval_mean_acc": 40.571867011274925, |
|
"eval_median_acc": 54.63917525773196, |
|
"eval_runtime": 31.0385, |
|
"eval_samples_per_second": 595.292, |
|
"eval_steps_per_second": 0.773, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_loss": 1.28830885887146, |
|
"eval_mean_acc": 40.353439886436945, |
|
"eval_median_acc": 54.666666666666664, |
|
"eval_runtime": 31.183, |
|
"eval_samples_per_second": 592.534, |
|
"eval_steps_per_second": 0.77, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 1.1999999999999999e-05, |
|
"loss": 0.6358, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_loss": 1.30391263961792, |
|
"eval_mean_acc": 40.60459621916925, |
|
"eval_median_acc": 54.666666666666664, |
|
"eval_runtime": 31.0782, |
|
"eval_samples_per_second": 594.532, |
|
"eval_steps_per_second": 0.772, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_loss": 1.3067735433578491, |
|
"eval_mean_acc": 41.452516923874725, |
|
"eval_median_acc": 54.773869346733676, |
|
"eval_runtime": 30.922, |
|
"eval_samples_per_second": 597.536, |
|
"eval_steps_per_second": 0.776, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"learning_rate": 5.999999999999999e-06, |
|
"loss": 0.6347, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_loss": 1.313217282295227, |
|
"eval_mean_acc": 41.42466457051602, |
|
"eval_median_acc": 54.75409836065573, |
|
"eval_runtime": 31.2774, |
|
"eval_samples_per_second": 590.746, |
|
"eval_steps_per_second": 0.767, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_loss": 1.3124245405197144, |
|
"eval_mean_acc": 41.25630205018876, |
|
"eval_median_acc": 54.74452554744526, |
|
"eval_runtime": 30.8857, |
|
"eval_samples_per_second": 598.238, |
|
"eval_steps_per_second": 0.777, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.6339, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 1.3174444437026978, |
|
"eval_mean_acc": 41.405191345713106, |
|
"eval_median_acc": 54.75578406169666, |
|
"eval_runtime": 31.1131, |
|
"eval_samples_per_second": 593.865, |
|
"eval_steps_per_second": 0.771, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"step": 25000, |
|
"total_flos": 1.660761144e+18, |
|
"train_loss": 0.7862733935546875, |
|
"train_runtime": 36507.5594, |
|
"train_samples_per_second": 273.916, |
|
"train_steps_per_second": 0.685 |
|
} |
|
], |
|
"max_steps": 25000, |
|
"num_train_epochs": 100, |
|
"total_flos": 1.660761144e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|