|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.18248175182481752, |
|
"eval_steps": 13, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0036496350364963502, |
|
"grad_norm": 0.014942636713385582, |
|
"learning_rate": 2e-05, |
|
"loss": 11.9369, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0036496350364963502, |
|
"eval_loss": 11.937646865844727, |
|
"eval_runtime": 0.4523, |
|
"eval_samples_per_second": 256.465, |
|
"eval_steps_per_second": 128.233, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0072992700729927005, |
|
"grad_norm": 0.012954114004969597, |
|
"learning_rate": 4e-05, |
|
"loss": 11.9353, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.010948905109489052, |
|
"grad_norm": 0.012448250316083431, |
|
"learning_rate": 6e-05, |
|
"loss": 11.9411, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.014598540145985401, |
|
"grad_norm": 0.00847446545958519, |
|
"learning_rate": 8e-05, |
|
"loss": 11.9396, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01824817518248175, |
|
"grad_norm": 0.010436758399009705, |
|
"learning_rate": 0.0001, |
|
"loss": 11.9398, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021897810218978103, |
|
"grad_norm": 0.01240889634937048, |
|
"learning_rate": 0.00012, |
|
"loss": 11.9398, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.025547445255474453, |
|
"grad_norm": 0.012278017587959766, |
|
"learning_rate": 0.00014, |
|
"loss": 11.9363, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.029197080291970802, |
|
"grad_norm": 0.01409133430570364, |
|
"learning_rate": 0.00016, |
|
"loss": 11.9362, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.032846715328467155, |
|
"grad_norm": 0.010964547283947468, |
|
"learning_rate": 0.00018, |
|
"loss": 11.9383, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0364963503649635, |
|
"grad_norm": 0.01435481570661068, |
|
"learning_rate": 0.0002, |
|
"loss": 11.9375, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.040145985401459854, |
|
"grad_norm": 0.00981847196817398, |
|
"learning_rate": 0.0001996917333733128, |
|
"loss": 11.9376, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.043795620437956206, |
|
"grad_norm": 0.009977707639336586, |
|
"learning_rate": 0.00019876883405951377, |
|
"loss": 11.9376, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04744525547445255, |
|
"grad_norm": 0.01374763622879982, |
|
"learning_rate": 0.00019723699203976766, |
|
"loss": 11.9375, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04744525547445255, |
|
"eval_loss": 11.937407493591309, |
|
"eval_runtime": 0.461, |
|
"eval_samples_per_second": 251.611, |
|
"eval_steps_per_second": 125.805, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.051094890510948905, |
|
"grad_norm": 0.011579697951674461, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 11.9379, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.05474452554744526, |
|
"grad_norm": 0.010436128824949265, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 11.9376, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.058394160583941604, |
|
"grad_norm": 0.01555589772760868, |
|
"learning_rate": 0.0001891006524188368, |
|
"loss": 11.937, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.06204379562043796, |
|
"grad_norm": 0.012454457581043243, |
|
"learning_rate": 0.00018526401643540922, |
|
"loss": 11.9363, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.06569343065693431, |
|
"grad_norm": 0.016757259145379066, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 11.9351, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06934306569343066, |
|
"grad_norm": 0.012486481107771397, |
|
"learning_rate": 0.0001760405965600031, |
|
"loss": 11.9399, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.072992700729927, |
|
"grad_norm": 0.018843721598386765, |
|
"learning_rate": 0.00017071067811865476, |
|
"loss": 11.9363, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.07664233576642336, |
|
"grad_norm": 0.012065412476658821, |
|
"learning_rate": 0.00016494480483301836, |
|
"loss": 11.935, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.08029197080291971, |
|
"grad_norm": 0.013166313990950584, |
|
"learning_rate": 0.00015877852522924732, |
|
"loss": 11.9383, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.08394160583941605, |
|
"grad_norm": 0.016667328774929047, |
|
"learning_rate": 0.0001522498564715949, |
|
"loss": 11.9342, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.08759124087591241, |
|
"grad_norm": 0.012548817321658134, |
|
"learning_rate": 0.00014539904997395468, |
|
"loss": 11.9384, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.09124087591240876, |
|
"grad_norm": 0.012698717415332794, |
|
"learning_rate": 0.000138268343236509, |
|
"loss": 11.9366, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0948905109489051, |
|
"grad_norm": 0.012354554608464241, |
|
"learning_rate": 0.00013090169943749476, |
|
"loss": 11.937, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0948905109489051, |
|
"eval_loss": 11.93701171875, |
|
"eval_runtime": 0.4615, |
|
"eval_samples_per_second": 251.328, |
|
"eval_steps_per_second": 125.664, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.09854014598540146, |
|
"grad_norm": 0.011796779930591583, |
|
"learning_rate": 0.00012334453638559057, |
|
"loss": 11.9357, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.10218978102189781, |
|
"grad_norm": 0.014917504042387009, |
|
"learning_rate": 0.0001156434465040231, |
|
"loss": 11.9356, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.10583941605839416, |
|
"grad_norm": 0.01455847267061472, |
|
"learning_rate": 0.0001078459095727845, |
|
"loss": 11.9375, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.10948905109489052, |
|
"grad_norm": 0.010509638115763664, |
|
"learning_rate": 0.0001, |
|
"loss": 11.9371, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11313868613138686, |
|
"grad_norm": 0.010377367027103901, |
|
"learning_rate": 9.215409042721552e-05, |
|
"loss": 11.9354, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.11678832116788321, |
|
"grad_norm": 0.014895463362336159, |
|
"learning_rate": 8.435655349597689e-05, |
|
"loss": 11.9354, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.12043795620437957, |
|
"grad_norm": 0.01482429075986147, |
|
"learning_rate": 7.66554636144095e-05, |
|
"loss": 11.9347, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.12408759124087591, |
|
"grad_norm": 0.020593857392668724, |
|
"learning_rate": 6.909830056250527e-05, |
|
"loss": 11.9365, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.12773722627737227, |
|
"grad_norm": 0.013053865171968937, |
|
"learning_rate": 6.173165676349103e-05, |
|
"loss": 11.9348, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.13138686131386862, |
|
"grad_norm": 0.014818524941802025, |
|
"learning_rate": 5.4600950026045326e-05, |
|
"loss": 11.9374, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.13503649635036497, |
|
"grad_norm": 0.018579095602035522, |
|
"learning_rate": 4.7750143528405126e-05, |
|
"loss": 11.9358, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.1386861313868613, |
|
"grad_norm": 0.014956467784941196, |
|
"learning_rate": 4.12214747707527e-05, |
|
"loss": 11.936, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.14233576642335766, |
|
"grad_norm": 0.009931315667927265, |
|
"learning_rate": 3.5055195166981645e-05, |
|
"loss": 11.9358, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.14233576642335766, |
|
"eval_loss": 11.936786651611328, |
|
"eval_runtime": 0.4619, |
|
"eval_samples_per_second": 251.13, |
|
"eval_steps_per_second": 125.565, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.145985401459854, |
|
"grad_norm": 0.010320533998310566, |
|
"learning_rate": 2.9289321881345254e-05, |
|
"loss": 11.9364, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14963503649635038, |
|
"grad_norm": 0.015699736773967743, |
|
"learning_rate": 2.3959403439996907e-05, |
|
"loss": 11.9369, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.15328467153284672, |
|
"grad_norm": 0.015644729137420654, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 11.9351, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.15693430656934307, |
|
"grad_norm": 0.016171997413039207, |
|
"learning_rate": 1.4735983564590783e-05, |
|
"loss": 11.9347, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.16058394160583941, |
|
"grad_norm": 0.01559949479997158, |
|
"learning_rate": 1.0899347581163221e-05, |
|
"loss": 11.9351, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.16423357664233576, |
|
"grad_norm": 0.010892484337091446, |
|
"learning_rate": 7.612046748871327e-06, |
|
"loss": 11.939, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1678832116788321, |
|
"grad_norm": 0.012285547330975533, |
|
"learning_rate": 4.8943483704846475e-06, |
|
"loss": 11.9399, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.17153284671532848, |
|
"grad_norm": 0.011205232702195644, |
|
"learning_rate": 2.7630079602323442e-06, |
|
"loss": 11.9356, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.17518248175182483, |
|
"grad_norm": 0.011147280223667622, |
|
"learning_rate": 1.231165940486234e-06, |
|
"loss": 11.9377, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.17883211678832117, |
|
"grad_norm": 0.012826742604374886, |
|
"learning_rate": 3.0826662668720364e-07, |
|
"loss": 11.9364, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.18248175182481752, |
|
"grad_norm": 0.01375089306384325, |
|
"learning_rate": 0.0, |
|
"loss": 11.9352, |
|
"step": 50 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 50, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 13, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5908070400.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|