{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.35398230088495575, "eval_steps": 9, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0035398230088495575, "eval_loss": 1.2176878452301025, "eval_runtime": 52.3124, "eval_samples_per_second": 19.192, "eval_steps_per_second": 2.409, "step": 1 }, { "epoch": 0.017699115044247787, "grad_norm": 1.2977772951126099, "learning_rate": 5e-05, "loss": 1.1881, "step": 5 }, { "epoch": 0.03185840707964602, "eval_loss": 0.843097448348999, "eval_runtime": 51.9126, "eval_samples_per_second": 19.34, "eval_steps_per_second": 2.427, "step": 9 }, { "epoch": 0.035398230088495575, "grad_norm": 1.1104620695114136, "learning_rate": 0.0001, "loss": 0.9227, "step": 10 }, { "epoch": 0.05309734513274336, "grad_norm": 0.627830445766449, "learning_rate": 0.00015000000000000001, "loss": 0.7526, "step": 15 }, { "epoch": 0.06371681415929203, "eval_loss": 0.7407240271568298, "eval_runtime": 51.9417, "eval_samples_per_second": 19.329, "eval_steps_per_second": 2.426, "step": 18 }, { "epoch": 0.07079646017699115, "grad_norm": 0.631891667842865, "learning_rate": 0.0002, "loss": 0.7507, "step": 20 }, { "epoch": 0.08849557522123894, "grad_norm": 0.5757474303245544, "learning_rate": 0.00019807852804032305, "loss": 0.6623, "step": 25 }, { "epoch": 0.09557522123893805, "eval_loss": 0.7096454501152039, "eval_runtime": 51.8532, "eval_samples_per_second": 19.362, "eval_steps_per_second": 2.43, "step": 27 }, { "epoch": 0.10619469026548672, "grad_norm": 0.5146051645278931, "learning_rate": 0.0001923879532511287, "loss": 0.6991, "step": 30 }, { "epoch": 0.12389380530973451, "grad_norm": 0.4929647445678711, "learning_rate": 0.00018314696123025454, "loss": 0.6832, "step": 35 }, { "epoch": 0.12743362831858407, "eval_loss": 0.6971020102500916, "eval_runtime": 51.931, "eval_samples_per_second": 19.333, "eval_steps_per_second": 2.426, "step": 36 }, { "epoch": 0.1415929203539823, "grad_norm": 0.497424453496933, "learning_rate": 0.00017071067811865476, "loss": 0.6967, "step": 40 }, { "epoch": 0.1592920353982301, "grad_norm": 0.524443507194519, "learning_rate": 0.00015555702330196023, "loss": 0.6789, "step": 45 }, { "epoch": 0.1592920353982301, "eval_loss": 0.6859140396118164, "eval_runtime": 51.9295, "eval_samples_per_second": 19.334, "eval_steps_per_second": 2.426, "step": 45 }, { "epoch": 0.17699115044247787, "grad_norm": 0.5028303265571594, "learning_rate": 0.000138268343236509, "loss": 0.6702, "step": 50 }, { "epoch": 0.1911504424778761, "eval_loss": 0.6808338761329651, "eval_runtime": 51.8875, "eval_samples_per_second": 19.35, "eval_steps_per_second": 2.428, "step": 54 }, { "epoch": 0.19469026548672566, "grad_norm": 0.4547295868396759, "learning_rate": 0.00011950903220161285, "loss": 0.6871, "step": 55 }, { "epoch": 0.21238938053097345, "grad_norm": 0.49500009417533875, "learning_rate": 0.0001, "loss": 0.6723, "step": 60 }, { "epoch": 0.22300884955752212, "eval_loss": 0.6712765097618103, "eval_runtime": 51.9038, "eval_samples_per_second": 19.343, "eval_steps_per_second": 2.428, "step": 63 }, { "epoch": 0.23008849557522124, "grad_norm": 0.513264000415802, "learning_rate": 8.049096779838719e-05, "loss": 0.6926, "step": 65 }, { "epoch": 0.24778761061946902, "grad_norm": 0.43856874108314514, "learning_rate": 6.173165676349103e-05, "loss": 0.6308, "step": 70 }, { "epoch": 0.25486725663716814, "eval_loss": 0.666566014289856, "eval_runtime": 51.9326, "eval_samples_per_second": 19.333, "eval_steps_per_second": 2.426, "step": 72 }, { "epoch": 0.26548672566371684, "grad_norm": 0.43767687678337097, "learning_rate": 4.444297669803981e-05, "loss": 0.6488, "step": 75 }, { "epoch": 0.2831858407079646, "grad_norm": 0.4309018850326538, "learning_rate": 2.9289321881345254e-05, "loss": 0.6491, "step": 80 }, { "epoch": 0.2867256637168142, "eval_loss": 0.659369170665741, "eval_runtime": 51.9861, "eval_samples_per_second": 19.313, "eval_steps_per_second": 2.424, "step": 81 }, { "epoch": 0.3008849557522124, "grad_norm": 0.46855592727661133, "learning_rate": 1.6853038769745467e-05, "loss": 0.7041, "step": 85 }, { "epoch": 0.3185840707964602, "grad_norm": 0.4220588505268097, "learning_rate": 7.612046748871327e-06, "loss": 0.6782, "step": 90 }, { "epoch": 0.3185840707964602, "eval_loss": 0.6560170650482178, "eval_runtime": 51.9348, "eval_samples_per_second": 19.332, "eval_steps_per_second": 2.426, "step": 90 }, { "epoch": 0.336283185840708, "grad_norm": 0.41629549860954285, "learning_rate": 1.921471959676957e-06, "loss": 0.6308, "step": 95 }, { "epoch": 0.3504424778761062, "eval_loss": 0.6551244854927063, "eval_runtime": 51.9745, "eval_samples_per_second": 19.317, "eval_steps_per_second": 2.424, "step": 99 }, { "epoch": 0.35398230088495575, "grad_norm": 0.4104834496974945, "learning_rate": 0.0, "loss": 0.6769, "step": 100 } ], "logging_steps": 5, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 9, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.508513578745856e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }