|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.04697091006138244, |
|
"eval_steps": 9, |
|
"global_step": 99, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00047445363698366103, |
|
"eval_loss": 1.2206629514694214, |
|
"eval_runtime": 2455.084, |
|
"eval_samples_per_second": 2.892, |
|
"eval_steps_per_second": 1.446, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.001423360910950983, |
|
"grad_norm": 42.169185638427734, |
|
"learning_rate": 3e-05, |
|
"loss": 38.8115, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.002846721821901966, |
|
"grad_norm": 30.064516067504883, |
|
"learning_rate": 6e-05, |
|
"loss": 36.5887, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.004270082732852949, |
|
"grad_norm": 24.0466365814209, |
|
"learning_rate": 9e-05, |
|
"loss": 31.7486, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.004270082732852949, |
|
"eval_loss": 0.872442364692688, |
|
"eval_runtime": 2457.1575, |
|
"eval_samples_per_second": 2.89, |
|
"eval_steps_per_second": 1.445, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.005693443643803932, |
|
"grad_norm": 26.121597290039062, |
|
"learning_rate": 0.00012, |
|
"loss": 28.0893, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.007116804554754915, |
|
"grad_norm": 22.931711196899414, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 26.3657, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.008540165465705898, |
|
"grad_norm": 15.313858032226562, |
|
"learning_rate": 0.00018, |
|
"loss": 22.876, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.008540165465705898, |
|
"eval_loss": 0.7377527952194214, |
|
"eval_runtime": 730.2515, |
|
"eval_samples_per_second": 9.723, |
|
"eval_steps_per_second": 4.861, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.009963526376656881, |
|
"grad_norm": 14.844147682189941, |
|
"learning_rate": 0.0001999229036240723, |
|
"loss": 23.8388, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.011386887287607865, |
|
"grad_norm": 13.040925025939941, |
|
"learning_rate": 0.00019876883405951377, |
|
"loss": 23.2438, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.012810248198558846, |
|
"grad_norm": 13.732440948486328, |
|
"learning_rate": 0.00019624552364536473, |
|
"loss": 23.504, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.012810248198558846, |
|
"eval_loss": 0.708083987236023, |
|
"eval_runtime": 2490.0558, |
|
"eval_samples_per_second": 2.851, |
|
"eval_steps_per_second": 1.426, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.01423360910950983, |
|
"grad_norm": 15.219266891479492, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 23.009, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.015656970020460813, |
|
"grad_norm": 11.847440719604492, |
|
"learning_rate": 0.00018724960070727972, |
|
"loss": 21.94, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.017080330931411796, |
|
"grad_norm": 11.252641677856445, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 22.467, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.017080330931411796, |
|
"eval_loss": 0.6968169808387756, |
|
"eval_runtime": 1821.4615, |
|
"eval_samples_per_second": 3.898, |
|
"eval_steps_per_second": 1.949, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.01850369184236278, |
|
"grad_norm": 11.163553237915039, |
|
"learning_rate": 0.00017343225094356855, |
|
"loss": 22.5554, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.019927052753313763, |
|
"grad_norm": 13.20541763305664, |
|
"learning_rate": 0.00016494480483301836, |
|
"loss": 22.9775, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.021350413664264746, |
|
"grad_norm": 10.816298484802246, |
|
"learning_rate": 0.00015555702330196023, |
|
"loss": 22.2379, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.021350413664264746, |
|
"eval_loss": 0.6865644454956055, |
|
"eval_runtime": 673.7961, |
|
"eval_samples_per_second": 10.537, |
|
"eval_steps_per_second": 5.269, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02277377457521573, |
|
"grad_norm": 10.934300422668457, |
|
"learning_rate": 0.00014539904997395468, |
|
"loss": 21.4568, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.024197135486166713, |
|
"grad_norm": 10.668505668640137, |
|
"learning_rate": 0.0001346117057077493, |
|
"loss": 21.2971, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.025620496397117692, |
|
"grad_norm": 10.522425651550293, |
|
"learning_rate": 0.00012334453638559057, |
|
"loss": 22.4085, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.025620496397117692, |
|
"eval_loss": 0.6808061003684998, |
|
"eval_runtime": 674.5537, |
|
"eval_samples_per_second": 10.525, |
|
"eval_steps_per_second": 5.263, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.027043857308068676, |
|
"grad_norm": 10.834729194641113, |
|
"learning_rate": 0.00011175373974578378, |
|
"loss": 22.0063, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.02846721821901966, |
|
"grad_norm": 10.45215892791748, |
|
"learning_rate": 0.0001, |
|
"loss": 21.7114, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.029890579129970642, |
|
"grad_norm": 10.509422302246094, |
|
"learning_rate": 8.824626025421626e-05, |
|
"loss": 22.147, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.029890579129970642, |
|
"eval_loss": 0.6736528873443604, |
|
"eval_runtime": 674.4225, |
|
"eval_samples_per_second": 10.528, |
|
"eval_steps_per_second": 5.264, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.031313940040921626, |
|
"grad_norm": 10.096047401428223, |
|
"learning_rate": 7.66554636144095e-05, |
|
"loss": 21.0243, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03273730095187261, |
|
"grad_norm": 10.614026069641113, |
|
"learning_rate": 6.538829429225069e-05, |
|
"loss": 22.6279, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.03416066186282359, |
|
"grad_norm": 9.915692329406738, |
|
"learning_rate": 5.4600950026045326e-05, |
|
"loss": 21.7931, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03416066186282359, |
|
"eval_loss": 0.6665549278259277, |
|
"eval_runtime": 673.4954, |
|
"eval_samples_per_second": 10.542, |
|
"eval_steps_per_second": 5.271, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03558402277377457, |
|
"grad_norm": 10.606143951416016, |
|
"learning_rate": 4.444297669803981e-05, |
|
"loss": 21.1355, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03700738368472556, |
|
"grad_norm": 9.829690933227539, |
|
"learning_rate": 3.5055195166981645e-05, |
|
"loss": 20.704, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.03843074459567654, |
|
"grad_norm": 9.532461166381836, |
|
"learning_rate": 2.6567749056431467e-05, |
|
"loss": 20.9497, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.03843074459567654, |
|
"eval_loss": 0.6634002327919006, |
|
"eval_runtime": 673.9541, |
|
"eval_samples_per_second": 10.535, |
|
"eval_steps_per_second": 5.267, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.039854105506627525, |
|
"grad_norm": 10.550149917602539, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 21.4862, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.041277466417578505, |
|
"grad_norm": 9.829084396362305, |
|
"learning_rate": 1.2750399292720283e-05, |
|
"loss": 21.2587, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.04270082732852949, |
|
"grad_norm": 10.305970191955566, |
|
"learning_rate": 7.612046748871327e-06, |
|
"loss": 21.4128, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04270082732852949, |
|
"eval_loss": 0.6616738438606262, |
|
"eval_runtime": 674.758, |
|
"eval_samples_per_second": 10.522, |
|
"eval_steps_per_second": 5.261, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04412418823948047, |
|
"grad_norm": 10.540351867675781, |
|
"learning_rate": 3.7544763546352834e-06, |
|
"loss": 22.0735, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.04554754915043146, |
|
"grad_norm": 10.391834259033203, |
|
"learning_rate": 1.231165940486234e-06, |
|
"loss": 22.2369, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04697091006138244, |
|
"grad_norm": 10.5779390335083, |
|
"learning_rate": 7.709637592770991e-08, |
|
"loss": 21.495, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.04697091006138244, |
|
"eval_loss": 0.6612978577613831, |
|
"eval_runtime": 674.8148, |
|
"eval_samples_per_second": 10.521, |
|
"eval_steps_per_second": 5.261, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.2196614900128154e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|