{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9997473471450227, "global_step": 7915, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "eval_accuracy": 0.6880803486829639, "eval_loss": 0.6034769415855408, "eval_runtime": 15.5577, "eval_samples_per_second": 339.188, "eval_steps_per_second": 10.606, "step": 300 }, { "epoch": 0.13, "learning_rate": 4.8736735725113694e-05, "loss": 0.621, "step": 500 }, { "epoch": 0.15, "eval_accuracy": 0.7134735645252984, "eval_loss": 0.5855737328529358, "eval_runtime": 15.4335, "eval_samples_per_second": 341.918, "eval_steps_per_second": 10.691, "step": 600 }, { "epoch": 0.23, "eval_accuracy": 0.7197271176805002, "eval_loss": 0.5775426030158997, "eval_runtime": 15.4427, "eval_samples_per_second": 341.716, "eval_steps_per_second": 10.685, "step": 900 }, { "epoch": 0.25, "learning_rate": 4.747347145022739e-05, "loss": 0.6011, "step": 1000 }, { "epoch": 0.3, "eval_accuracy": 0.7199166192912639, "eval_loss": 0.573106050491333, "eval_runtime": 15.5167, "eval_samples_per_second": 340.085, "eval_steps_per_second": 10.634, "step": 1200 }, { "epoch": 0.38, "learning_rate": 4.6210207175341085e-05, "loss": 0.5993, "step": 1500 }, { "epoch": 0.38, "eval_accuracy": 0.719348114458973, "eval_loss": 0.575564444065094, "eval_runtime": 15.5001, "eval_samples_per_second": 340.45, "eval_steps_per_second": 10.645, "step": 1500 }, { "epoch": 0.45, "eval_accuracy": 0.734697744930832, "eval_loss": 0.5668568015098572, "eval_runtime": 16.1562, "eval_samples_per_second": 326.623, "eval_steps_per_second": 10.213, "step": 1800 }, { "epoch": 0.51, "learning_rate": 4.4946942900454776e-05, "loss": 0.5901, "step": 2000 }, { "epoch": 0.53, "eval_accuracy": 0.7394352851999242, "eval_loss": 0.5582404732704163, "eval_runtime": 15.7671, "eval_samples_per_second": 334.684, "eval_steps_per_second": 10.465, "step": 2100 }, { "epoch": 0.61, "eval_accuracy": 0.7470153496304719, "eval_loss": 0.5583205819129944, "eval_runtime": 15.5702, "eval_samples_per_second": 338.917, "eval_steps_per_second": 10.597, "step": 2400 }, { "epoch": 0.63, "learning_rate": 4.368367862556847e-05, "loss": 0.578, "step": 2500 }, { "epoch": 0.68, "eval_accuracy": 0.7436043206367254, "eval_loss": 0.5834795236587524, "eval_runtime": 15.6308, "eval_samples_per_second": 337.602, "eval_steps_per_second": 10.556, "step": 2700 }, { "epoch": 0.76, "learning_rate": 4.242041435068217e-05, "loss": 0.579, "step": 3000 }, { "epoch": 0.76, "eval_accuracy": 0.7220011370096646, "eval_loss": 0.5708434581756592, "eval_runtime": 15.582, "eval_samples_per_second": 338.66, "eval_steps_per_second": 10.589, "step": 3000 }, { "epoch": 0.83, "eval_accuracy": 0.762933484934622, "eval_loss": 0.5381360650062561, "eval_runtime": 15.5571, "eval_samples_per_second": 339.201, "eval_steps_per_second": 10.606, "step": 3300 }, { "epoch": 0.88, "learning_rate": 4.115715007579586e-05, "loss": 0.5735, "step": 3500 }, { "epoch": 0.91, "eval_accuracy": 0.7642599962099678, "eval_loss": 0.539585530757904, "eval_runtime": 15.6069, "eval_samples_per_second": 338.12, "eval_steps_per_second": 10.572, "step": 3600 }, { "epoch": 0.99, "eval_accuracy": 0.7652075042637863, "eval_loss": 0.5309613347053528, "eval_runtime": 15.6636, "eval_samples_per_second": 336.896, "eval_steps_per_second": 10.534, "step": 3900 }, { "epoch": 1.01, "learning_rate": 3.989388580090955e-05, "loss": 0.5534, "step": 4000 }, { "epoch": 1.06, "eval_accuracy": 0.7638809929884404, "eval_loss": 0.5623990297317505, "eval_runtime": 15.5126, "eval_samples_per_second": 340.174, "eval_steps_per_second": 10.636, "step": 4200 }, { "epoch": 1.14, "learning_rate": 3.863314805457302e-05, "loss": 0.4267, "step": 4500 }, { "epoch": 1.14, "eval_accuracy": 0.7400037900322153, "eval_loss": 0.611989438533783, "eval_runtime": 15.5686, "eval_samples_per_second": 338.951, "eval_steps_per_second": 10.598, "step": 4500 }, { "epoch": 1.21, "eval_accuracy": 0.7657760090960773, "eval_loss": 0.5793063640594482, "eval_runtime": 15.5802, "eval_samples_per_second": 338.7, "eval_steps_per_second": 10.59, "step": 4800 }, { "epoch": 1.26, "learning_rate": 3.736988377968671e-05, "loss": 0.4283, "step": 5000 }, { "epoch": 1.29, "eval_accuracy": 0.7633124881561494, "eval_loss": 0.5923583507537842, "eval_runtime": 15.5465, "eval_samples_per_second": 339.432, "eval_steps_per_second": 10.613, "step": 5100 }, { "epoch": 1.36, "eval_accuracy": 0.7619859768808035, "eval_loss": 0.5652284622192383, "eval_runtime": 15.5292, "eval_samples_per_second": 339.811, "eval_steps_per_second": 10.625, "step": 5400 }, { "epoch": 1.39, "learning_rate": 3.61066195048004e-05, "loss": 0.4306, "step": 5500 }, { "epoch": 1.44, "eval_accuracy": 0.7589539511085844, "eval_loss": 0.571613073348999, "eval_runtime": 15.6239, "eval_samples_per_second": 337.753, "eval_steps_per_second": 10.561, "step": 5700 }, { "epoch": 1.52, "learning_rate": 3.48433552299141e-05, "loss": 0.4268, "step": 6000 }, { "epoch": 1.52, "eval_accuracy": 0.768429031646769, "eval_loss": 0.5530590415000916, "eval_runtime": 15.5281, "eval_samples_per_second": 339.836, "eval_steps_per_second": 10.626, "step": 6000 }, { "epoch": 1.59, "eval_accuracy": 0.7595224559408755, "eval_loss": 0.5949699282646179, "eval_runtime": 15.5065, "eval_samples_per_second": 340.31, "eval_steps_per_second": 10.641, "step": 6300 }, { "epoch": 1.64, "learning_rate": 3.3582617483577566e-05, "loss": 0.4296, "step": 6500 }, { "epoch": 1.67, "eval_accuracy": 0.7748720864127345, "eval_loss": 0.5415990352630615, "eval_runtime": 15.6991, "eval_samples_per_second": 336.134, "eval_steps_per_second": 10.51, "step": 6600 }, { "epoch": 1.74, "eval_accuracy": 0.7686185332575327, "eval_loss": 0.5780544281005859, "eval_runtime": 15.5645, "eval_samples_per_second": 339.04, "eval_steps_per_second": 10.601, "step": 6900 }, { "epoch": 1.77, "learning_rate": 3.231935320869126e-05, "loss": 0.43, "step": 7000 }, { "epoch": 1.82, "eval_accuracy": 0.7744930831912071, "eval_loss": 0.5756574869155884, "eval_runtime": 15.5129, "eval_samples_per_second": 340.168, "eval_steps_per_second": 10.636, "step": 7200 }, { "epoch": 1.89, "learning_rate": 3.105608893380496e-05, "loss": 0.4317, "step": 7500 }, { "epoch": 1.89, "eval_accuracy": 0.7761985976880803, "eval_loss": 0.5252366065979004, "eval_runtime": 15.6912, "eval_samples_per_second": 336.304, "eval_steps_per_second": 10.515, "step": 7500 }, { "epoch": 1.97, "eval_accuracy": 0.7835891605078643, "eval_loss": 0.5250930190086365, "eval_runtime": 15.5911, "eval_samples_per_second": 338.462, "eval_steps_per_second": 10.583, "step": 7800 } ], "max_steps": 19790, "num_train_epochs": 5, "total_flos": 1.69413405769332e+16, "trial_name": null, "trial_params": null }