{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.03265172850087752, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016325864250438759, "eval_loss": 1.9291274547576904, "eval_runtime": 91.2073, "eval_samples_per_second": 28.276, "eval_steps_per_second": 14.144, "step": 1 }, { "epoch": 0.0008162932125219379, "grad_norm": 0.33012622594833374, "learning_rate": 5e-05, "loss": 1.2595, "step": 5 }, { "epoch": 0.0016325864250438759, "grad_norm": 0.38967302441596985, "learning_rate": 0.0001, "loss": 1.3877, "step": 10 }, { "epoch": 0.0024488796375658137, "grad_norm": 0.39858779311180115, "learning_rate": 9.98292246503335e-05, "loss": 1.3751, "step": 15 }, { "epoch": 0.0032651728500877517, "grad_norm": 0.47579634189605713, "learning_rate": 9.931806517013612e-05, "loss": 1.3325, "step": 20 }, { "epoch": 0.00408146606260969, "grad_norm": 0.6364086866378784, "learning_rate": 9.847001329696653e-05, "loss": 1.4285, "step": 25 }, { "epoch": 0.004897759275131627, "grad_norm": 1.071350336074829, "learning_rate": 9.729086208503174e-05, "loss": 1.4587, "step": 30 }, { "epoch": 0.005714052487653565, "grad_norm": 1.4836760759353638, "learning_rate": 9.578866633275288e-05, "loss": 1.367, "step": 35 }, { "epoch": 0.006530345700175503, "grad_norm": 1.7952219247817993, "learning_rate": 9.397368756032445e-05, "loss": 1.2556, "step": 40 }, { "epoch": 0.007346638912697441, "grad_norm": 3.1271674633026123, "learning_rate": 9.185832391312644e-05, "loss": 1.416, "step": 45 }, { "epoch": 0.00816293212521938, "grad_norm": 5.624019145965576, "learning_rate": 8.945702546981969e-05, "loss": 1.3862, "step": 50 }, { "epoch": 0.00816293212521938, "eval_loss": 1.3404040336608887, "eval_runtime": 91.0926, "eval_samples_per_second": 28.312, "eval_steps_per_second": 14.161, "step": 50 }, { "epoch": 0.008979225337741316, "grad_norm": 0.583979070186615, "learning_rate": 8.678619553365659e-05, "loss": 1.313, "step": 55 }, { "epoch": 0.009795518550263255, "grad_norm": 0.4565134346485138, "learning_rate": 8.386407858128706e-05, "loss": 1.341, "step": 60 }, { "epoch": 0.010611811762785193, "grad_norm": 0.46514594554901123, "learning_rate": 8.07106356344834e-05, "loss": 1.2728, "step": 65 }, { "epoch": 0.01142810497530713, "grad_norm": 0.5233550667762756, "learning_rate": 7.734740790612136e-05, "loss": 1.2577, "step": 70 }, { "epoch": 0.012244398187829068, "grad_norm": 0.624043881893158, "learning_rate": 7.379736965185368e-05, "loss": 1.2946, "step": 75 }, { "epoch": 0.013060691400351007, "grad_norm": 0.8614409565925598, "learning_rate": 7.008477123264848e-05, "loss": 1.1457, "step": 80 }, { "epoch": 0.013876984612872944, "grad_norm": 0.9170352220535278, "learning_rate": 6.623497346023418e-05, "loss": 1.3366, "step": 85 }, { "epoch": 0.014693277825394882, "grad_norm": 1.7850666046142578, "learning_rate": 6.227427435703997e-05, "loss": 1.2967, "step": 90 }, { "epoch": 0.01550957103791682, "grad_norm": 1.96737802028656, "learning_rate": 5.8229729514036705e-05, "loss": 1.4234, "step": 95 }, { "epoch": 0.01632586425043876, "grad_norm": 4.821284294128418, "learning_rate": 5.4128967273616625e-05, "loss": 1.2413, "step": 100 }, { "epoch": 0.01632586425043876, "eval_loss": 1.256380558013916, "eval_runtime": 91.0509, "eval_samples_per_second": 28.325, "eval_steps_per_second": 14.168, "step": 100 }, { "epoch": 0.017142157462960696, "grad_norm": 0.37310275435447693, "learning_rate": 5e-05, "loss": 1.1309, "step": 105 }, { "epoch": 0.017958450675482632, "grad_norm": 0.3515789806842804, "learning_rate": 4.5871032726383386e-05, "loss": 1.1598, "step": 110 }, { "epoch": 0.018774743888004573, "grad_norm": 0.4006565511226654, "learning_rate": 4.17702704859633e-05, "loss": 1.2816, "step": 115 }, { "epoch": 0.01959103710052651, "grad_norm": 0.4665372967720032, "learning_rate": 3.772572564296005e-05, "loss": 1.3579, "step": 120 }, { "epoch": 0.020407330313048446, "grad_norm": 0.6170455813407898, "learning_rate": 3.3765026539765834e-05, "loss": 1.2506, "step": 125 }, { "epoch": 0.021223623525570386, "grad_norm": 0.9777705073356628, "learning_rate": 2.991522876735154e-05, "loss": 1.1281, "step": 130 }, { "epoch": 0.022039916738092323, "grad_norm": 1.1337064504623413, "learning_rate": 2.6202630348146324e-05, "loss": 1.3309, "step": 135 }, { "epoch": 0.02285620995061426, "grad_norm": 2.501723289489746, "learning_rate": 2.2652592093878666e-05, "loss": 1.3117, "step": 140 }, { "epoch": 0.0236725031631362, "grad_norm": 3.4476728439331055, "learning_rate": 1.928936436551661e-05, "loss": 1.2624, "step": 145 }, { "epoch": 0.024488796375658137, "grad_norm": 6.442051887512207, "learning_rate": 1.6135921418712956e-05, "loss": 1.4187, "step": 150 }, { "epoch": 0.024488796375658137, "eval_loss": 1.2430131435394287, "eval_runtime": 91.0302, "eval_samples_per_second": 28.331, "eval_steps_per_second": 14.171, "step": 150 }, { "epoch": 0.025305089588180073, "grad_norm": 0.3390170633792877, "learning_rate": 1.3213804466343421e-05, "loss": 1.1149, "step": 155 }, { "epoch": 0.026121382800702014, "grad_norm": 0.38943567872047424, "learning_rate": 1.0542974530180327e-05, "loss": 1.2427, "step": 160 }, { "epoch": 0.02693767601322395, "grad_norm": 0.4515894949436188, "learning_rate": 8.141676086873572e-06, "loss": 1.1704, "step": 165 }, { "epoch": 0.027753969225745887, "grad_norm": 0.5294841527938843, "learning_rate": 6.026312439675552e-06, "loss": 1.2927, "step": 170 }, { "epoch": 0.028570262438267827, "grad_norm": 0.8594310283660889, "learning_rate": 4.2113336672471245e-06, "loss": 1.2846, "step": 175 }, { "epoch": 0.029386555650789764, "grad_norm": 0.920279324054718, "learning_rate": 2.7091379149682685e-06, "loss": 1.3665, "step": 180 }, { "epoch": 0.0302028488633117, "grad_norm": 1.575203537940979, "learning_rate": 1.5299867030334814e-06, "loss": 1.2451, "step": 185 }, { "epoch": 0.03101914207583364, "grad_norm": 2.434324026107788, "learning_rate": 6.819348298638839e-07, "loss": 1.2098, "step": 190 }, { "epoch": 0.03183543528835558, "grad_norm": 1.815964937210083, "learning_rate": 1.7077534966650766e-07, "loss": 0.9787, "step": 195 }, { "epoch": 0.03265172850087752, "grad_norm": 18.76615333557129, "learning_rate": 0.0, "loss": 1.2319, "step": 200 }, { "epoch": 0.03265172850087752, "eval_loss": 1.2376748323440552, "eval_runtime": 91.0977, "eval_samples_per_second": 28.31, "eval_steps_per_second": 14.161, "step": 200 } ], "logging_steps": 5, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2988145253482496e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }