|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.996941896024465, |
|
"eval_steps": 1000, |
|
"global_step": 1470, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1019367991845056, |
|
"grad_norm": 0.2208942323923111, |
|
"learning_rate": 9.999696229471716e-05, |
|
"loss": 1.1912, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2038735983690112, |
|
"grad_norm": 0.29578036069869995, |
|
"learning_rate": 9.963288406760582e-05, |
|
"loss": 1.0152, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3058103975535168, |
|
"grad_norm": 0.2985854744911194, |
|
"learning_rate": 9.86663298624003e-05, |
|
"loss": 0.957, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4077471967380224, |
|
"grad_norm": 0.2886662185192108, |
|
"learning_rate": 9.710903233782272e-05, |
|
"loss": 0.9539, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.509683995922528, |
|
"grad_norm": 0.29516491293907166, |
|
"learning_rate": 9.497989497625035e-05, |
|
"loss": 0.922, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6116207951070336, |
|
"grad_norm": 0.2972014844417572, |
|
"learning_rate": 9.230476262104677e-05, |
|
"loss": 0.9182, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7135575942915392, |
|
"grad_norm": 0.31259697675704956, |
|
"learning_rate": 8.911610775517382e-05, |
|
"loss": 0.8974, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8154943934760448, |
|
"grad_norm": 0.313388854265213, |
|
"learning_rate": 8.545263632923687e-05, |
|
"loss": 0.8888, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 0.3168061077594757, |
|
"learning_rate": 8.135881792367686e-05, |
|
"loss": 0.8922, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.019367991845056, |
|
"grad_norm": 0.3456219732761383, |
|
"learning_rate": 7.688434594830392e-05, |
|
"loss": 0.8769, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1213047910295617, |
|
"grad_norm": 0.3286343812942505, |
|
"learning_rate": 7.20835344316187e-05, |
|
"loss": 0.8433, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2232415902140672, |
|
"grad_norm": 0.40875253081321716, |
|
"learning_rate": 6.701465872208216e-05, |
|
"loss": 0.8328, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.325178389398573, |
|
"grad_norm": 0.38733330368995667, |
|
"learning_rate": 6.173924810432705e-05, |
|
"loss": 0.8472, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.4271151885830786, |
|
"grad_norm": 0.37286508083343506, |
|
"learning_rate": 5.6321338916992315e-05, |
|
"loss": 0.8246, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.529051987767584, |
|
"grad_norm": 0.3836914300918579, |
|
"learning_rate": 5.0826697238317935e-05, |
|
"loss": 0.8198, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.6309887869520896, |
|
"grad_norm": 0.39856258034706116, |
|
"learning_rate": 4.5322020575044114e-05, |
|
"loss": 0.8271, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.7329255861365953, |
|
"grad_norm": 0.38490578532218933, |
|
"learning_rate": 3.9874128245030404e-05, |
|
"loss": 0.8203, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 0.4273436367511749, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 0.815, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.9367991845056065, |
|
"grad_norm": 0.42424914240837097, |
|
"learning_rate": 2.9411724702784758e-05, |
|
"loss": 0.8196, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.038735983690112, |
|
"grad_norm": 0.4148640036582947, |
|
"learning_rate": 2.4524212896808263e-05, |
|
"loss": 0.7939, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.038735983690112, |
|
"eval_loss": 1.0475335121154785, |
|
"eval_runtime": 239.1942, |
|
"eval_samples_per_second": 0.924, |
|
"eval_steps_per_second": 0.924, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.140672782874618, |
|
"grad_norm": 0.42366823554039, |
|
"learning_rate": 1.9945942635848748e-05, |
|
"loss": 0.7768, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.2426095820591234, |
|
"grad_norm": 0.4487750232219696, |
|
"learning_rate": 1.5732487918985018e-05, |
|
"loss": 0.7848, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.344546381243629, |
|
"grad_norm": 0.4321724474430084, |
|
"learning_rate": 1.1934994378782772e-05, |
|
"loss": 0.777, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.4464831804281344, |
|
"grad_norm": 0.4505467116832733, |
|
"learning_rate": 8.599558442598998e-06, |
|
"loss": 0.7813, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.5484199796126403, |
|
"grad_norm": 0.4411642849445343, |
|
"learning_rate": 5.766667784397706e-06, |
|
"loss": 0.7694, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.650356778797146, |
|
"grad_norm": 0.44770148396492004, |
|
"learning_rate": 3.470709859234084e-06, |
|
"loss": 0.7721, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.7522935779816513, |
|
"grad_norm": 0.40171849727630615, |
|
"learning_rate": 1.7395544861325718e-06, |
|
"loss": 0.775, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.8542303771661572, |
|
"grad_norm": 0.437456876039505, |
|
"learning_rate": 5.9421554623742e-07, |
|
"loss": 0.7672, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.9561671763506627, |
|
"grad_norm": 0.4368380904197693, |
|
"learning_rate": 4.859590276170556e-08, |
|
"loss": 0.7591, |
|
"step": 1450 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1470, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.452871731469353e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|