|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.005759373380176237, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.759373380176237e-05, |
|
"eval_loss": 2.6716082096099854, |
|
"eval_runtime": 1331.768, |
|
"eval_samples_per_second": 21.958, |
|
"eval_steps_per_second": 2.745, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0001727812014052871, |
|
"grad_norm": 1.0326002836227417, |
|
"learning_rate": 3e-05, |
|
"loss": 10.8192, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0003455624028105742, |
|
"grad_norm": 1.0763981342315674, |
|
"learning_rate": 6e-05, |
|
"loss": 10.7332, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0005183436042158614, |
|
"grad_norm": 1.2686634063720703, |
|
"learning_rate": 9e-05, |
|
"loss": 10.432, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0005183436042158614, |
|
"eval_loss": 2.629826784133911, |
|
"eval_runtime": 1338.5029, |
|
"eval_samples_per_second": 21.848, |
|
"eval_steps_per_second": 2.731, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0006911248056211485, |
|
"grad_norm": 1.1208940744400024, |
|
"learning_rate": 9.987820251299122e-05, |
|
"loss": 10.5925, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0008639060070264355, |
|
"grad_norm": 1.1096599102020264, |
|
"learning_rate": 9.924038765061042e-05, |
|
"loss": 9.8268, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0010366872084317227, |
|
"grad_norm": 1.2350603342056274, |
|
"learning_rate": 9.806308479691595e-05, |
|
"loss": 10.5695, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0010366872084317227, |
|
"eval_loss": 2.5420780181884766, |
|
"eval_runtime": 1337.1556, |
|
"eval_samples_per_second": 21.87, |
|
"eval_steps_per_second": 2.734, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0012094684098370098, |
|
"grad_norm": 1.1920151710510254, |
|
"learning_rate": 9.635919272833938e-05, |
|
"loss": 10.2468, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.001382249611242297, |
|
"grad_norm": 0.8807956576347351, |
|
"learning_rate": 9.414737964294636e-05, |
|
"loss": 9.8125, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.001555030812647584, |
|
"grad_norm": 0.8759644627571106, |
|
"learning_rate": 9.145187862775209e-05, |
|
"loss": 9.77, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.001555030812647584, |
|
"eval_loss": 2.4928576946258545, |
|
"eval_runtime": 1338.7702, |
|
"eval_samples_per_second": 21.843, |
|
"eval_steps_per_second": 2.731, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.001727812014052871, |
|
"grad_norm": 0.9653000235557556, |
|
"learning_rate": 8.83022221559489e-05, |
|
"loss": 10.2316, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0019005932154581582, |
|
"grad_norm": 0.8884105682373047, |
|
"learning_rate": 8.473291852294987e-05, |
|
"loss": 10.0839, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0020733744168634455, |
|
"grad_norm": 1.2079200744628906, |
|
"learning_rate": 8.07830737662829e-05, |
|
"loss": 10.2626, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0020733744168634455, |
|
"eval_loss": 2.4689135551452637, |
|
"eval_runtime": 1339.7306, |
|
"eval_samples_per_second": 21.828, |
|
"eval_steps_per_second": 2.729, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0022461556182687323, |
|
"grad_norm": 0.9944831132888794, |
|
"learning_rate": 7.649596321166024e-05, |
|
"loss": 9.843, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0024189368196740196, |
|
"grad_norm": 0.935939371585846, |
|
"learning_rate": 7.191855733945387e-05, |
|
"loss": 9.7281, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0025917180210793065, |
|
"grad_norm": 1.00493323802948, |
|
"learning_rate": 6.710100716628344e-05, |
|
"loss": 10.2203, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0025917180210793065, |
|
"eval_loss": 2.4538426399230957, |
|
"eval_runtime": 1338.817, |
|
"eval_samples_per_second": 21.842, |
|
"eval_steps_per_second": 2.731, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.002764499222484594, |
|
"grad_norm": 0.9063613414764404, |
|
"learning_rate": 6.209609477998338e-05, |
|
"loss": 9.8641, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0029372804238898807, |
|
"grad_norm": 1.0362025499343872, |
|
"learning_rate": 5.695865504800327e-05, |
|
"loss": 9.7865, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.003110061625295168, |
|
"grad_norm": 0.9377679824829102, |
|
"learning_rate": 5.174497483512506e-05, |
|
"loss": 10.0832, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.003110061625295168, |
|
"eval_loss": 2.4470882415771484, |
|
"eval_runtime": 1338.2458, |
|
"eval_samples_per_second": 21.852, |
|
"eval_steps_per_second": 2.732, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.003282842826700455, |
|
"grad_norm": 1.549514651298523, |
|
"learning_rate": 4.6512176312793736e-05, |
|
"loss": 9.8436, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.003455624028105742, |
|
"grad_norm": 0.9654721021652222, |
|
"learning_rate": 4.131759111665349e-05, |
|
"loss": 10.4088, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.003628405229511029, |
|
"grad_norm": 0.9682468771934509, |
|
"learning_rate": 3.6218132209150045e-05, |
|
"loss": 9.6252, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.003628405229511029, |
|
"eval_loss": 2.440478563308716, |
|
"eval_runtime": 1339.1783, |
|
"eval_samples_per_second": 21.837, |
|
"eval_steps_per_second": 2.73, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0038011864309163163, |
|
"grad_norm": 1.087673306465149, |
|
"learning_rate": 3.12696703292044e-05, |
|
"loss": 9.8558, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.003973967632321603, |
|
"grad_norm": 0.9650527834892273, |
|
"learning_rate": 2.6526421860705473e-05, |
|
"loss": 9.4286, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.004146748833726891, |
|
"grad_norm": 1.0395511388778687, |
|
"learning_rate": 2.2040354826462668e-05, |
|
"loss": 9.8884, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.004146748833726891, |
|
"eval_loss": 2.4370479583740234, |
|
"eval_runtime": 1337.1445, |
|
"eval_samples_per_second": 21.87, |
|
"eval_steps_per_second": 2.734, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.004319530035132178, |
|
"grad_norm": 0.8846902251243591, |
|
"learning_rate": 1.7860619515673033e-05, |
|
"loss": 9.9641, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.004492311236537465, |
|
"grad_norm": 1.0415371656417847, |
|
"learning_rate": 1.4033009983067452e-05, |
|
"loss": 9.3965, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.0046650924379427515, |
|
"grad_norm": 1.0671367645263672, |
|
"learning_rate": 1.0599462319663905e-05, |
|
"loss": 9.7664, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.0046650924379427515, |
|
"eval_loss": 2.4352002143859863, |
|
"eval_runtime": 1335.8618, |
|
"eval_samples_per_second": 21.891, |
|
"eval_steps_per_second": 2.737, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.004837873639348039, |
|
"grad_norm": 1.150208830833435, |
|
"learning_rate": 7.597595192178702e-06, |
|
"loss": 9.6133, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.005010654840753326, |
|
"grad_norm": 0.9796428084373474, |
|
"learning_rate": 5.060297685041659e-06, |
|
"loss": 10.1937, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.005183436042158613, |
|
"grad_norm": 0.966742992401123, |
|
"learning_rate": 3.0153689607045845e-06, |
|
"loss": 9.7158, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.005183436042158613, |
|
"eval_loss": 2.434671401977539, |
|
"eval_runtime": 1339.1776, |
|
"eval_samples_per_second": 21.837, |
|
"eval_steps_per_second": 2.73, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0053562172435639, |
|
"grad_norm": 0.9962571859359741, |
|
"learning_rate": 1.4852136862001764e-06, |
|
"loss": 9.2092, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.005528998444969188, |
|
"grad_norm": 1.0119867324829102, |
|
"learning_rate": 4.865965629214819e-07, |
|
"loss": 9.5268, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0057017796463744745, |
|
"grad_norm": 1.0774887800216675, |
|
"learning_rate": 3.04586490452119e-08, |
|
"loss": 9.4073, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0057017796463744745, |
|
"eval_loss": 2.4344372749328613, |
|
"eval_runtime": 1338.1241, |
|
"eval_samples_per_second": 21.854, |
|
"eval_steps_per_second": 2.732, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 7.534154269222502e+16, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|