{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.02258403732012167, "eval_steps": 9, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022584037320121672, "eval_loss": 10.381133079528809, "eval_runtime": 113.7963, "eval_samples_per_second": 131.068, "eval_steps_per_second": 65.538, "step": 1 }, { "epoch": 0.0006775211196036502, "grad_norm": 0.031333375722169876, "learning_rate": 3e-05, "loss": 10.3808, "step": 3 }, { "epoch": 0.0013550422392073003, "grad_norm": 0.031410422176122665, "learning_rate": 6e-05, "loss": 10.3818, "step": 6 }, { "epoch": 0.0020325633588109504, "grad_norm": 0.028049860149621964, "learning_rate": 9e-05, "loss": 10.3804, "step": 9 }, { "epoch": 0.0020325633588109504, "eval_loss": 10.380571365356445, "eval_runtime": 114.7582, "eval_samples_per_second": 129.969, "eval_steps_per_second": 64.989, "step": 9 }, { "epoch": 0.0027100844784146007, "grad_norm": 0.030115395784378052, "learning_rate": 0.00012, "loss": 10.3802, "step": 12 }, { "epoch": 0.003387605598018251, "grad_norm": 0.030468817800283432, "learning_rate": 0.00015000000000000001, "loss": 10.3799, "step": 15 }, { "epoch": 0.004065126717621901, "grad_norm": 0.03643874451518059, "learning_rate": 0.00018, "loss": 10.3789, "step": 18 }, { "epoch": 0.004065126717621901, "eval_loss": 10.378609657287598, "eval_runtime": 114.6566, "eval_samples_per_second": 130.084, "eval_steps_per_second": 65.046, "step": 18 }, { "epoch": 0.004742647837225551, "grad_norm": 0.03757527098059654, "learning_rate": 0.0001999229036240723, "loss": 10.3778, "step": 21 }, { "epoch": 0.005420168956829201, "grad_norm": 0.05077025294303894, "learning_rate": 0.00019876883405951377, "loss": 10.3773, "step": 24 }, { "epoch": 0.006097690076432852, "grad_norm": 0.06361916661262512, "learning_rate": 0.00019624552364536473, "loss": 10.3755, "step": 27 }, { "epoch": 0.006097690076432852, "eval_loss": 10.374011993408203, "eval_runtime": 113.7536, "eval_samples_per_second": 131.117, "eval_steps_per_second": 65.563, "step": 27 }, { "epoch": 0.006775211196036502, "grad_norm": 0.06706535816192627, "learning_rate": 0.0001923879532511287, "loss": 10.3735, "step": 30 }, { "epoch": 0.007452732315640151, "grad_norm": 0.09487693011760712, "learning_rate": 0.00018724960070727972, "loss": 10.3707, "step": 33 }, { "epoch": 0.008130253435243802, "grad_norm": 0.09645885974168777, "learning_rate": 0.00018090169943749476, "loss": 10.3681, "step": 36 }, { "epoch": 0.008130253435243802, "eval_loss": 10.36545181274414, "eval_runtime": 113.0342, "eval_samples_per_second": 131.951, "eval_steps_per_second": 65.98, "step": 36 }, { "epoch": 0.008807774554847452, "grad_norm": 0.09872834384441376, "learning_rate": 0.00017343225094356855, "loss": 10.3647, "step": 39 }, { "epoch": 0.009485295674451102, "grad_norm": 0.082485131919384, "learning_rate": 0.00016494480483301836, "loss": 10.3616, "step": 42 }, { "epoch": 0.010162816794054752, "grad_norm": 0.0801018700003624, "learning_rate": 0.00015555702330196023, "loss": 10.3585, "step": 45 }, { "epoch": 0.010162816794054752, "eval_loss": 10.357089042663574, "eval_runtime": 113.8095, "eval_samples_per_second": 131.052, "eval_steps_per_second": 65.531, "step": 45 }, { "epoch": 0.010840337913658403, "grad_norm": 0.06634080410003662, "learning_rate": 0.00014539904997395468, "loss": 10.3567, "step": 48 }, { "epoch": 0.011517859033262053, "grad_norm": 0.06341370195150375, "learning_rate": 0.0001346117057077493, "loss": 10.3552, "step": 51 }, { "epoch": 0.012195380152865703, "grad_norm": 0.05498109757900238, "learning_rate": 0.00012334453638559057, "loss": 10.3544, "step": 54 }, { "epoch": 0.012195380152865703, "eval_loss": 10.353216171264648, "eval_runtime": 114.8408, "eval_samples_per_second": 129.875, "eval_steps_per_second": 64.942, "step": 54 }, { "epoch": 0.012872901272469353, "grad_norm": 0.048390697687864304, "learning_rate": 0.00011175373974578378, "loss": 10.3538, "step": 57 }, { "epoch": 0.013550422392073004, "grad_norm": 0.0455036386847496, "learning_rate": 0.0001, "loss": 10.3531, "step": 60 }, { "epoch": 0.014227943511676652, "grad_norm": 0.041428498923778534, "learning_rate": 8.824626025421626e-05, "loss": 10.3526, "step": 63 }, { "epoch": 0.014227943511676652, "eval_loss": 10.351635932922363, "eval_runtime": 114.7051, "eval_samples_per_second": 130.029, "eval_steps_per_second": 65.019, "step": 63 }, { "epoch": 0.014905464631280303, "grad_norm": 0.040886059403419495, "learning_rate": 7.66554636144095e-05, "loss": 10.3521, "step": 66 }, { "epoch": 0.015582985750883953, "grad_norm": 0.031167220324277878, "learning_rate": 6.538829429225069e-05, "loss": 10.3522, "step": 69 }, { "epoch": 0.016260506870487603, "grad_norm": 0.030341865494847298, "learning_rate": 5.4600950026045326e-05, "loss": 10.3524, "step": 72 }, { "epoch": 0.016260506870487603, "eval_loss": 10.350973129272461, "eval_runtime": 114.5051, "eval_samples_per_second": 130.256, "eval_steps_per_second": 65.132, "step": 72 }, { "epoch": 0.016938027990091253, "grad_norm": 0.02817876823246479, "learning_rate": 4.444297669803981e-05, "loss": 10.3521, "step": 75 }, { "epoch": 0.017615549109694904, "grad_norm": 0.025949513539671898, "learning_rate": 3.5055195166981645e-05, "loss": 10.351, "step": 78 }, { "epoch": 0.018293070229298554, "grad_norm": 0.022331327199935913, "learning_rate": 2.6567749056431467e-05, "loss": 10.3507, "step": 81 }, { "epoch": 0.018293070229298554, "eval_loss": 10.35069751739502, "eval_runtime": 113.8246, "eval_samples_per_second": 131.035, "eval_steps_per_second": 65.522, "step": 81 }, { "epoch": 0.018970591348902204, "grad_norm": 0.02584969997406006, "learning_rate": 1.9098300562505266e-05, "loss": 10.3516, "step": 84 }, { "epoch": 0.019648112468505854, "grad_norm": 0.029781047254800797, "learning_rate": 1.2750399292720283e-05, "loss": 10.3506, "step": 87 }, { "epoch": 0.020325633588109505, "grad_norm": 0.022765731438994408, "learning_rate": 7.612046748871327e-06, "loss": 10.3504, "step": 90 }, { "epoch": 0.020325633588109505, "eval_loss": 10.350595474243164, "eval_runtime": 113.4837, "eval_samples_per_second": 131.429, "eval_steps_per_second": 65.719, "step": 90 }, { "epoch": 0.021003154707713155, "grad_norm": 0.026100726798176765, "learning_rate": 3.7544763546352834e-06, "loss": 10.3511, "step": 93 }, { "epoch": 0.021680675827316805, "grad_norm": 0.0227440744638443, "learning_rate": 1.231165940486234e-06, "loss": 10.3517, "step": 96 }, { "epoch": 0.022358196946920456, "grad_norm": 0.02963266521692276, "learning_rate": 7.709637592770991e-08, "loss": 10.3508, "step": 99 }, { "epoch": 0.022358196946920456, "eval_loss": 10.350578308105469, "eval_runtime": 114.4993, "eval_samples_per_second": 130.263, "eval_steps_per_second": 65.136, "step": 99 } ], "logging_steps": 3, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 42768059596800.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }