|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9997473471450227, |
|
"global_step": 7915, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.6880803486829639, |
|
"eval_loss": 0.6034769415855408, |
|
"eval_runtime": 15.5577, |
|
"eval_samples_per_second": 339.188, |
|
"eval_steps_per_second": 10.606, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.8736735725113694e-05, |
|
"loss": 0.621, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.7134735645252984, |
|
"eval_loss": 0.5855737328529358, |
|
"eval_runtime": 15.4335, |
|
"eval_samples_per_second": 341.918, |
|
"eval_steps_per_second": 10.691, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.7197271176805002, |
|
"eval_loss": 0.5775426030158997, |
|
"eval_runtime": 15.4427, |
|
"eval_samples_per_second": 341.716, |
|
"eval_steps_per_second": 10.685, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.747347145022739e-05, |
|
"loss": 0.6011, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.7199166192912639, |
|
"eval_loss": 0.573106050491333, |
|
"eval_runtime": 15.5167, |
|
"eval_samples_per_second": 340.085, |
|
"eval_steps_per_second": 10.634, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.6210207175341085e-05, |
|
"loss": 0.5993, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.719348114458973, |
|
"eval_loss": 0.575564444065094, |
|
"eval_runtime": 15.5001, |
|
"eval_samples_per_second": 340.45, |
|
"eval_steps_per_second": 10.645, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.734697744930832, |
|
"eval_loss": 0.5668568015098572, |
|
"eval_runtime": 16.1562, |
|
"eval_samples_per_second": 326.623, |
|
"eval_steps_per_second": 10.213, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.4946942900454776e-05, |
|
"loss": 0.5901, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.7394352851999242, |
|
"eval_loss": 0.5582404732704163, |
|
"eval_runtime": 15.7671, |
|
"eval_samples_per_second": 334.684, |
|
"eval_steps_per_second": 10.465, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.7470153496304719, |
|
"eval_loss": 0.5583205819129944, |
|
"eval_runtime": 15.5702, |
|
"eval_samples_per_second": 338.917, |
|
"eval_steps_per_second": 10.597, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 4.368367862556847e-05, |
|
"loss": 0.578, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.7436043206367254, |
|
"eval_loss": 0.5834795236587524, |
|
"eval_runtime": 15.6308, |
|
"eval_samples_per_second": 337.602, |
|
"eval_steps_per_second": 10.556, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.242041435068217e-05, |
|
"loss": 0.579, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.7220011370096646, |
|
"eval_loss": 0.5708434581756592, |
|
"eval_runtime": 15.582, |
|
"eval_samples_per_second": 338.66, |
|
"eval_steps_per_second": 10.589, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.762933484934622, |
|
"eval_loss": 0.5381360650062561, |
|
"eval_runtime": 15.5571, |
|
"eval_samples_per_second": 339.201, |
|
"eval_steps_per_second": 10.606, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.115715007579586e-05, |
|
"loss": 0.5735, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.7642599962099678, |
|
"eval_loss": 0.539585530757904, |
|
"eval_runtime": 15.6069, |
|
"eval_samples_per_second": 338.12, |
|
"eval_steps_per_second": 10.572, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.7652075042637863, |
|
"eval_loss": 0.5309613347053528, |
|
"eval_runtime": 15.6636, |
|
"eval_samples_per_second": 336.896, |
|
"eval_steps_per_second": 10.534, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 3.989388580090955e-05, |
|
"loss": 0.5534, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.7638809929884404, |
|
"eval_loss": 0.5623990297317505, |
|
"eval_runtime": 15.5126, |
|
"eval_samples_per_second": 340.174, |
|
"eval_steps_per_second": 10.636, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.863314805457302e-05, |
|
"loss": 0.4267, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_accuracy": 0.7400037900322153, |
|
"eval_loss": 0.611989438533783, |
|
"eval_runtime": 15.5686, |
|
"eval_samples_per_second": 338.951, |
|
"eval_steps_per_second": 10.598, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_accuracy": 0.7657760090960773, |
|
"eval_loss": 0.5793063640594482, |
|
"eval_runtime": 15.5802, |
|
"eval_samples_per_second": 338.7, |
|
"eval_steps_per_second": 10.59, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.736988377968671e-05, |
|
"loss": 0.4283, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.7633124881561494, |
|
"eval_loss": 0.5923583507537842, |
|
"eval_runtime": 15.5465, |
|
"eval_samples_per_second": 339.432, |
|
"eval_steps_per_second": 10.613, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.7619859768808035, |
|
"eval_loss": 0.5652284622192383, |
|
"eval_runtime": 15.5292, |
|
"eval_samples_per_second": 339.811, |
|
"eval_steps_per_second": 10.625, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.61066195048004e-05, |
|
"loss": 0.4306, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_accuracy": 0.7589539511085844, |
|
"eval_loss": 0.571613073348999, |
|
"eval_runtime": 15.6239, |
|
"eval_samples_per_second": 337.753, |
|
"eval_steps_per_second": 10.561, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.48433552299141e-05, |
|
"loss": 0.4268, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.768429031646769, |
|
"eval_loss": 0.5530590415000916, |
|
"eval_runtime": 15.5281, |
|
"eval_samples_per_second": 339.836, |
|
"eval_steps_per_second": 10.626, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_accuracy": 0.7595224559408755, |
|
"eval_loss": 0.5949699282646179, |
|
"eval_runtime": 15.5065, |
|
"eval_samples_per_second": 340.31, |
|
"eval_steps_per_second": 10.641, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.3582617483577566e-05, |
|
"loss": 0.4296, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.7748720864127345, |
|
"eval_loss": 0.5415990352630615, |
|
"eval_runtime": 15.6991, |
|
"eval_samples_per_second": 336.134, |
|
"eval_steps_per_second": 10.51, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"eval_accuracy": 0.7686185332575327, |
|
"eval_loss": 0.5780544281005859, |
|
"eval_runtime": 15.5645, |
|
"eval_samples_per_second": 339.04, |
|
"eval_steps_per_second": 10.601, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.231935320869126e-05, |
|
"loss": 0.43, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_accuracy": 0.7744930831912071, |
|
"eval_loss": 0.5756574869155884, |
|
"eval_runtime": 15.5129, |
|
"eval_samples_per_second": 340.168, |
|
"eval_steps_per_second": 10.636, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.105608893380496e-05, |
|
"loss": 0.4317, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_accuracy": 0.7761985976880803, |
|
"eval_loss": 0.5252366065979004, |
|
"eval_runtime": 15.6912, |
|
"eval_samples_per_second": 336.304, |
|
"eval_steps_per_second": 10.515, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.7835891605078643, |
|
"eval_loss": 0.5250930190086365, |
|
"eval_runtime": 15.5911, |
|
"eval_samples_per_second": 338.462, |
|
"eval_steps_per_second": 10.583, |
|
"step": 7800 |
|
} |
|
], |
|
"max_steps": 19790, |
|
"num_train_epochs": 5, |
|
"total_flos": 1.69413405769332e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|