|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.24040267447975358, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002404026744797536, |
|
"eval_loss": 3.228058099746704, |
|
"eval_runtime": 156.333, |
|
"eval_samples_per_second": 8.968, |
|
"eval_steps_per_second": 4.484, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.007212080234392608, |
|
"grad_norm": 1.9650676250457764, |
|
"learning_rate": 3e-05, |
|
"loss": 2.349, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.014424160468785216, |
|
"grad_norm": 1.4064522981643677, |
|
"learning_rate": 6e-05, |
|
"loss": 2.2216, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.021636240703177823, |
|
"grad_norm": 1.3426527976989746, |
|
"learning_rate": 9e-05, |
|
"loss": 1.9405, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.021636240703177823, |
|
"eval_loss": 1.9358376264572144, |
|
"eval_runtime": 156.5558, |
|
"eval_samples_per_second": 8.955, |
|
"eval_steps_per_second": 4.478, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02884832093757043, |
|
"grad_norm": 1.1980935335159302, |
|
"learning_rate": 0.00012, |
|
"loss": 1.7996, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03606040117196304, |
|
"grad_norm": 0.9815410375595093, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 1.6674, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.043272481406355645, |
|
"grad_norm": 1.1842740774154663, |
|
"learning_rate": 0.00018, |
|
"loss": 1.5861, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.043272481406355645, |
|
"eval_loss": 1.4581845998764038, |
|
"eval_runtime": 156.5288, |
|
"eval_samples_per_second": 8.957, |
|
"eval_steps_per_second": 4.478, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05048456164074825, |
|
"grad_norm": 0.9527170062065125, |
|
"learning_rate": 0.0001999229036240723, |
|
"loss": 1.5801, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05769664187514086, |
|
"grad_norm": 0.9741354584693909, |
|
"learning_rate": 0.00019876883405951377, |
|
"loss": 1.6089, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06490872210953347, |
|
"grad_norm": 0.8610622882843018, |
|
"learning_rate": 0.00019624552364536473, |
|
"loss": 1.5059, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06490872210953347, |
|
"eval_loss": 1.3450061082839966, |
|
"eval_runtime": 156.5047, |
|
"eval_samples_per_second": 8.958, |
|
"eval_steps_per_second": 4.479, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07212080234392608, |
|
"grad_norm": 0.988355815410614, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 1.4597, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07933288257831868, |
|
"grad_norm": 0.9052969813346863, |
|
"learning_rate": 0.00018724960070727972, |
|
"loss": 1.3148, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08654496281271129, |
|
"grad_norm": 0.9871351718902588, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 1.4494, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08654496281271129, |
|
"eval_loss": 1.2605727910995483, |
|
"eval_runtime": 156.5558, |
|
"eval_samples_per_second": 8.955, |
|
"eval_steps_per_second": 4.478, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0937570430471039, |
|
"grad_norm": 1.216485619544983, |
|
"learning_rate": 0.00017343225094356855, |
|
"loss": 1.3575, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1009691232814965, |
|
"grad_norm": 1.0345182418823242, |
|
"learning_rate": 0.00016494480483301836, |
|
"loss": 1.316, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.10818120351588911, |
|
"grad_norm": 1.2272484302520752, |
|
"learning_rate": 0.00015555702330196023, |
|
"loss": 1.3834, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10818120351588911, |
|
"eval_loss": 1.199688196182251, |
|
"eval_runtime": 156.5688, |
|
"eval_samples_per_second": 8.955, |
|
"eval_steps_per_second": 4.477, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11539328375028172, |
|
"grad_norm": 0.8153639435768127, |
|
"learning_rate": 0.00014539904997395468, |
|
"loss": 1.3282, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12260536398467432, |
|
"grad_norm": 0.9661998748779297, |
|
"learning_rate": 0.0001346117057077493, |
|
"loss": 1.2335, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12981744421906694, |
|
"grad_norm": 0.9782885313034058, |
|
"learning_rate": 0.00012334453638559057, |
|
"loss": 1.1531, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.12981744421906694, |
|
"eval_loss": 1.1464760303497314, |
|
"eval_runtime": 156.5981, |
|
"eval_samples_per_second": 8.953, |
|
"eval_steps_per_second": 4.476, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13702952445345953, |
|
"grad_norm": 1.0093810558319092, |
|
"learning_rate": 0.00011175373974578378, |
|
"loss": 1.1367, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.14424160468785216, |
|
"grad_norm": 0.9035248160362244, |
|
"learning_rate": 0.0001, |
|
"loss": 1.2007, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15145368492224476, |
|
"grad_norm": 1.1476191282272339, |
|
"learning_rate": 8.824626025421626e-05, |
|
"loss": 1.1702, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15145368492224476, |
|
"eval_loss": 1.1219838857650757, |
|
"eval_runtime": 156.5351, |
|
"eval_samples_per_second": 8.956, |
|
"eval_steps_per_second": 4.478, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15866576515663736, |
|
"grad_norm": 1.004503607749939, |
|
"learning_rate": 7.66554636144095e-05, |
|
"loss": 1.1231, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.16587784539102998, |
|
"grad_norm": 0.8763646483421326, |
|
"learning_rate": 6.538829429225069e-05, |
|
"loss": 1.1959, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.17308992562542258, |
|
"grad_norm": 1.0786497592926025, |
|
"learning_rate": 5.4600950026045326e-05, |
|
"loss": 1.2195, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.17308992562542258, |
|
"eval_loss": 1.0894047021865845, |
|
"eval_runtime": 156.5655, |
|
"eval_samples_per_second": 8.955, |
|
"eval_steps_per_second": 4.477, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.18030200585981518, |
|
"grad_norm": 1.005875587463379, |
|
"learning_rate": 4.444297669803981e-05, |
|
"loss": 1.0467, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1875140860942078, |
|
"grad_norm": 0.9949033260345459, |
|
"learning_rate": 3.5055195166981645e-05, |
|
"loss": 1.3012, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.1947261663286004, |
|
"grad_norm": 0.9470193982124329, |
|
"learning_rate": 2.6567749056431467e-05, |
|
"loss": 1.237, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.1947261663286004, |
|
"eval_loss": 1.070177674293518, |
|
"eval_runtime": 156.4864, |
|
"eval_samples_per_second": 8.959, |
|
"eval_steps_per_second": 4.48, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.201938246562993, |
|
"grad_norm": 0.9711623787879944, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 0.9719, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.20915032679738563, |
|
"grad_norm": 1.0642070770263672, |
|
"learning_rate": 1.2750399292720283e-05, |
|
"loss": 1.2356, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.21636240703177823, |
|
"grad_norm": 0.8378780484199524, |
|
"learning_rate": 7.612046748871327e-06, |
|
"loss": 1.1906, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21636240703177823, |
|
"eval_loss": 1.0622040033340454, |
|
"eval_runtime": 156.5263, |
|
"eval_samples_per_second": 8.957, |
|
"eval_steps_per_second": 4.478, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22357448726617082, |
|
"grad_norm": 0.9879840016365051, |
|
"learning_rate": 3.7544763546352834e-06, |
|
"loss": 1.1005, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.23078656750056345, |
|
"grad_norm": 0.884789764881134, |
|
"learning_rate": 1.231165940486234e-06, |
|
"loss": 1.0352, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.23799864773495605, |
|
"grad_norm": 1.1920340061187744, |
|
"learning_rate": 7.709637592770991e-08, |
|
"loss": 1.117, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.23799864773495605, |
|
"eval_loss": 1.0599479675292969, |
|
"eval_runtime": 156.5531, |
|
"eval_samples_per_second": 8.955, |
|
"eval_steps_per_second": 4.478, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.969948643447276e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|