|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.02258403732012167, |
|
"eval_steps": 9, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00022584037320121672, |
|
"eval_loss": 10.381133079528809, |
|
"eval_runtime": 113.7963, |
|
"eval_samples_per_second": 131.068, |
|
"eval_steps_per_second": 65.538, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0006775211196036502, |
|
"grad_norm": 0.031333375722169876, |
|
"learning_rate": 3e-05, |
|
"loss": 10.3808, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0013550422392073003, |
|
"grad_norm": 0.031410422176122665, |
|
"learning_rate": 6e-05, |
|
"loss": 10.3818, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0020325633588109504, |
|
"grad_norm": 0.028049860149621964, |
|
"learning_rate": 9e-05, |
|
"loss": 10.3804, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0020325633588109504, |
|
"eval_loss": 10.380571365356445, |
|
"eval_runtime": 114.7582, |
|
"eval_samples_per_second": 129.969, |
|
"eval_steps_per_second": 64.989, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0027100844784146007, |
|
"grad_norm": 0.030115395784378052, |
|
"learning_rate": 0.00012, |
|
"loss": 10.3802, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.003387605598018251, |
|
"grad_norm": 0.030468817800283432, |
|
"learning_rate": 0.00015000000000000001, |
|
"loss": 10.3799, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.004065126717621901, |
|
"grad_norm": 0.03643874451518059, |
|
"learning_rate": 0.00018, |
|
"loss": 10.3789, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.004065126717621901, |
|
"eval_loss": 10.378609657287598, |
|
"eval_runtime": 114.6566, |
|
"eval_samples_per_second": 130.084, |
|
"eval_steps_per_second": 65.046, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.004742647837225551, |
|
"grad_norm": 0.03757527098059654, |
|
"learning_rate": 0.0001999229036240723, |
|
"loss": 10.3778, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.005420168956829201, |
|
"grad_norm": 0.05077025294303894, |
|
"learning_rate": 0.00019876883405951377, |
|
"loss": 10.3773, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.006097690076432852, |
|
"grad_norm": 0.06361916661262512, |
|
"learning_rate": 0.00019624552364536473, |
|
"loss": 10.3755, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.006097690076432852, |
|
"eval_loss": 10.374011993408203, |
|
"eval_runtime": 113.7536, |
|
"eval_samples_per_second": 131.117, |
|
"eval_steps_per_second": 65.563, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.006775211196036502, |
|
"grad_norm": 0.06706535816192627, |
|
"learning_rate": 0.0001923879532511287, |
|
"loss": 10.3735, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.007452732315640151, |
|
"grad_norm": 0.09487693011760712, |
|
"learning_rate": 0.00018724960070727972, |
|
"loss": 10.3707, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.008130253435243802, |
|
"grad_norm": 0.09645885974168777, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 10.3681, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.008130253435243802, |
|
"eval_loss": 10.36545181274414, |
|
"eval_runtime": 113.0342, |
|
"eval_samples_per_second": 131.951, |
|
"eval_steps_per_second": 65.98, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.008807774554847452, |
|
"grad_norm": 0.09872834384441376, |
|
"learning_rate": 0.00017343225094356855, |
|
"loss": 10.3647, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.009485295674451102, |
|
"grad_norm": 0.082485131919384, |
|
"learning_rate": 0.00016494480483301836, |
|
"loss": 10.3616, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.010162816794054752, |
|
"grad_norm": 0.0801018700003624, |
|
"learning_rate": 0.00015555702330196023, |
|
"loss": 10.3585, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.010162816794054752, |
|
"eval_loss": 10.357089042663574, |
|
"eval_runtime": 113.8095, |
|
"eval_samples_per_second": 131.052, |
|
"eval_steps_per_second": 65.531, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.010840337913658403, |
|
"grad_norm": 0.06634080410003662, |
|
"learning_rate": 0.00014539904997395468, |
|
"loss": 10.3567, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.011517859033262053, |
|
"grad_norm": 0.06341370195150375, |
|
"learning_rate": 0.0001346117057077493, |
|
"loss": 10.3552, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.012195380152865703, |
|
"grad_norm": 0.05498109757900238, |
|
"learning_rate": 0.00012334453638559057, |
|
"loss": 10.3544, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.012195380152865703, |
|
"eval_loss": 10.353216171264648, |
|
"eval_runtime": 114.8408, |
|
"eval_samples_per_second": 129.875, |
|
"eval_steps_per_second": 64.942, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.012872901272469353, |
|
"grad_norm": 0.048390697687864304, |
|
"learning_rate": 0.00011175373974578378, |
|
"loss": 10.3538, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.013550422392073004, |
|
"grad_norm": 0.0455036386847496, |
|
"learning_rate": 0.0001, |
|
"loss": 10.3531, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.014227943511676652, |
|
"grad_norm": 0.041428498923778534, |
|
"learning_rate": 8.824626025421626e-05, |
|
"loss": 10.3526, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.014227943511676652, |
|
"eval_loss": 10.351635932922363, |
|
"eval_runtime": 114.7051, |
|
"eval_samples_per_second": 130.029, |
|
"eval_steps_per_second": 65.019, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.014905464631280303, |
|
"grad_norm": 0.040886059403419495, |
|
"learning_rate": 7.66554636144095e-05, |
|
"loss": 10.3521, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.015582985750883953, |
|
"grad_norm": 0.031167220324277878, |
|
"learning_rate": 6.538829429225069e-05, |
|
"loss": 10.3522, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.016260506870487603, |
|
"grad_norm": 0.030341865494847298, |
|
"learning_rate": 5.4600950026045326e-05, |
|
"loss": 10.3524, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.016260506870487603, |
|
"eval_loss": 10.350973129272461, |
|
"eval_runtime": 114.5051, |
|
"eval_samples_per_second": 130.256, |
|
"eval_steps_per_second": 65.132, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.016938027990091253, |
|
"grad_norm": 0.02817876823246479, |
|
"learning_rate": 4.444297669803981e-05, |
|
"loss": 10.3521, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.017615549109694904, |
|
"grad_norm": 0.025949513539671898, |
|
"learning_rate": 3.5055195166981645e-05, |
|
"loss": 10.351, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.018293070229298554, |
|
"grad_norm": 0.022331327199935913, |
|
"learning_rate": 2.6567749056431467e-05, |
|
"loss": 10.3507, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.018293070229298554, |
|
"eval_loss": 10.35069751739502, |
|
"eval_runtime": 113.8246, |
|
"eval_samples_per_second": 131.035, |
|
"eval_steps_per_second": 65.522, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.018970591348902204, |
|
"grad_norm": 0.02584969997406006, |
|
"learning_rate": 1.9098300562505266e-05, |
|
"loss": 10.3516, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.019648112468505854, |
|
"grad_norm": 0.029781047254800797, |
|
"learning_rate": 1.2750399292720283e-05, |
|
"loss": 10.3506, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.020325633588109505, |
|
"grad_norm": 0.022765731438994408, |
|
"learning_rate": 7.612046748871327e-06, |
|
"loss": 10.3504, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.020325633588109505, |
|
"eval_loss": 10.350595474243164, |
|
"eval_runtime": 113.4837, |
|
"eval_samples_per_second": 131.429, |
|
"eval_steps_per_second": 65.719, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.021003154707713155, |
|
"grad_norm": 0.026100726798176765, |
|
"learning_rate": 3.7544763546352834e-06, |
|
"loss": 10.3511, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.021680675827316805, |
|
"grad_norm": 0.0227440744638443, |
|
"learning_rate": 1.231165940486234e-06, |
|
"loss": 10.3517, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.022358196946920456, |
|
"grad_norm": 0.02963266521692276, |
|
"learning_rate": 7.709637592770991e-08, |
|
"loss": 10.3508, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.022358196946920456, |
|
"eval_loss": 10.350578308105469, |
|
"eval_runtime": 114.4993, |
|
"eval_samples_per_second": 130.263, |
|
"eval_steps_per_second": 65.136, |
|
"step": 99 |
|
} |
|
], |
|
"logging_steps": 3, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 9, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 42768059596800.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|