{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.041701417848206836, "eval_steps": 9, "global_step": 25, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0016680567139282735, "grad_norm": 6.4218597412109375, "learning_rate": 1e-05, "loss": 8.8157, "step": 1 }, { "epoch": 0.0016680567139282735, "eval_loss": 10.53897476196289, "eval_runtime": 5.0335, "eval_samples_per_second": 100.327, "eval_steps_per_second": 12.715, "step": 1 }, { "epoch": 0.003336113427856547, "grad_norm": 8.344048500061035, "learning_rate": 2e-05, "loss": 9.9237, "step": 2 }, { "epoch": 0.0050041701417848205, "grad_norm": 5.752366065979004, "learning_rate": 3e-05, "loss": 10.2868, "step": 3 }, { "epoch": 0.006672226855713094, "grad_norm": 12.08169174194336, "learning_rate": 4e-05, "loss": 11.6577, "step": 4 }, { "epoch": 0.008340283569641367, "grad_norm": 7.6307902336120605, "learning_rate": 5e-05, "loss": 11.6049, "step": 5 }, { "epoch": 0.010008340283569641, "grad_norm": 13.488511085510254, "learning_rate": 6e-05, "loss": 11.6571, "step": 6 }, { "epoch": 0.011676396997497914, "grad_norm": 6.870922565460205, "learning_rate": 7e-05, "loss": 10.9165, "step": 7 }, { "epoch": 0.013344453711426188, "grad_norm": 6.744866371154785, "learning_rate": 8e-05, "loss": 9.6312, "step": 8 }, { "epoch": 0.015012510425354461, "grad_norm": 8.185052871704102, "learning_rate": 9e-05, "loss": 9.159, "step": 9 }, { "epoch": 0.015012510425354461, "eval_loss": 9.887859344482422, "eval_runtime": 4.3895, "eval_samples_per_second": 115.047, "eval_steps_per_second": 14.58, "step": 9 }, { "epoch": 0.016680567139282735, "grad_norm": 9.440922737121582, "learning_rate": 0.0001, "loss": 10.1835, "step": 10 }, { "epoch": 0.01834862385321101, "grad_norm": 5.682453155517578, "learning_rate": 9.99695413509548e-05, "loss": 9.355, "step": 11 }, { "epoch": 0.020016680567139282, "grad_norm": 5.6532368659973145, "learning_rate": 9.987820251299122e-05, "loss": 9.1637, "step": 12 }, { "epoch": 0.021684737281067557, "grad_norm": 8.35978889465332, "learning_rate": 9.972609476841367e-05, "loss": 8.5444, "step": 13 }, { "epoch": 0.02335279399499583, "grad_norm": 7.7980217933654785, "learning_rate": 9.951340343707852e-05, "loss": 9.1922, "step": 14 }, { "epoch": 0.025020850708924104, "grad_norm": 7.174959659576416, "learning_rate": 9.924038765061042e-05, "loss": 9.0129, "step": 15 }, { "epoch": 0.026688907422852376, "grad_norm": 5.547351837158203, "learning_rate": 9.890738003669029e-05, "loss": 7.7701, "step": 16 }, { "epoch": 0.02835696413678065, "grad_norm": 5.53154993057251, "learning_rate": 9.851478631379982e-05, "loss": 7.7904, "step": 17 }, { "epoch": 0.030025020850708923, "grad_norm": 5.278870105743408, "learning_rate": 9.806308479691595e-05, "loss": 8.5489, "step": 18 }, { "epoch": 0.030025020850708923, "eval_loss": 7.729805946350098, "eval_runtime": 4.3901, "eval_samples_per_second": 115.03, "eval_steps_per_second": 14.578, "step": 18 }, { "epoch": 0.0316930775646372, "grad_norm": 6.058969497680664, "learning_rate": 9.755282581475769e-05, "loss": 7.761, "step": 19 }, { "epoch": 0.03336113427856547, "grad_norm": 4.8587822914123535, "learning_rate": 9.698463103929542e-05, "loss": 7.8479, "step": 20 }, { "epoch": 0.03502919099249374, "grad_norm": 8.1661376953125, "learning_rate": 9.635919272833938e-05, "loss": 8.6839, "step": 21 }, { "epoch": 0.03669724770642202, "grad_norm": 5.563140392303467, "learning_rate": 9.567727288213005e-05, "loss": 7.5815, "step": 22 }, { "epoch": 0.03836530442035029, "grad_norm": 6.0711350440979, "learning_rate": 9.493970231495835e-05, "loss": 7.1114, "step": 23 }, { "epoch": 0.040033361134278564, "grad_norm": 4.127212047576904, "learning_rate": 9.414737964294636e-05, "loss": 5.8192, "step": 24 }, { "epoch": 0.041701417848206836, "grad_norm": 6.469174861907959, "learning_rate": 9.330127018922194e-05, "loss": 5.9953, "step": 25 } ], "logging_steps": 1, "max_steps": 100, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 347093965209600.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }