{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 7660, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.9878163251157847e-05, "loss": 2.5054, "step": 383 }, { "epoch": 1.0, "eval_loss": 2.3042960166931152, "eval_runtime": 6.5074, "eval_samples_per_second": 941.694, "eval_steps_per_second": 14.752, "step": 383 }, { "epoch": 2.0, "learning_rate": 1.9513096703453338e-05, "loss": 2.3885, "step": 766 }, { "epoch": 2.0, "eval_loss": 2.2446680068969727, "eval_runtime": 6.5762, "eval_samples_per_second": 931.841, "eval_steps_per_second": 14.598, "step": 766 }, { "epoch": 3.0, "learning_rate": 1.8913786142751957e-05, "loss": 2.3418, "step": 1149 }, { "epoch": 3.0, "eval_loss": 2.231914758682251, "eval_runtime": 6.5679, "eval_samples_per_second": 933.023, "eval_steps_per_second": 14.617, "step": 1149 }, { "epoch": 4.0, "learning_rate": 1.809498858405589e-05, "loss": 2.3045, "step": 1532 }, { "epoch": 4.0, "eval_loss": 2.188257932662964, "eval_runtime": 6.6128, "eval_samples_per_second": 926.691, "eval_steps_per_second": 14.517, "step": 1532 }, { "epoch": 5.0, "learning_rate": 1.7076865540693534e-05, "loss": 2.2772, "step": 1915 }, { "epoch": 5.0, "eval_loss": 2.189318895339966, "eval_runtime": 6.8537, "eval_samples_per_second": 894.11, "eval_steps_per_second": 14.007, "step": 1915 }, { "epoch": 6.0, "learning_rate": 1.588448658094913e-05, "loss": 2.2543, "step": 2298 }, { "epoch": 6.0, "eval_loss": 2.168344736099243, "eval_runtime": 6.7894, "eval_samples_per_second": 902.58, "eval_steps_per_second": 14.14, "step": 2298 }, { "epoch": 7.0, "learning_rate": 1.4547212032090465e-05, "loss": 2.2308, "step": 2681 }, { "epoch": 7.0, "eval_loss": 2.1453945636749268, "eval_runtime": 6.5512, "eval_samples_per_second": 935.407, "eval_steps_per_second": 14.654, "step": 2681 }, { "epoch": 8.0, "learning_rate": 1.3097970031670223e-05, "loss": 2.2139, "step": 3064 }, { "epoch": 8.0, "eval_loss": 2.1402783393859863, "eval_runtime": 6.5858, "eval_samples_per_second": 930.487, "eval_steps_per_second": 14.577, "step": 3064 }, { "epoch": 9.0, "learning_rate": 1.1572445727497194e-05, "loss": 2.2008, "step": 3447 }, { "epoch": 9.0, "eval_loss": 2.11649489402771, "eval_runtime": 6.5807, "eval_samples_per_second": 931.208, "eval_steps_per_second": 14.588, "step": 3447 }, { "epoch": 10.0, "learning_rate": 1.0008202590865013e-05, "loss": 2.1937, "step": 3830 }, { "epoch": 10.0, "eval_loss": 2.1280927658081055, "eval_runtime": 6.6743, "eval_samples_per_second": 918.143, "eval_steps_per_second": 14.383, "step": 3830 }, { "epoch": 11.0, "learning_rate": 8.443757479222894e-06, "loss": 2.1778, "step": 4213 }, { "epoch": 11.0, "eval_loss": 2.1188840866088867, "eval_runtime": 6.8275, "eval_samples_per_second": 897.543, "eval_steps_per_second": 14.061, "step": 4213 }, { "epoch": 12.0, "learning_rate": 6.917632223315125e-06, "loss": 2.1742, "step": 4596 }, { "epoch": 12.0, "eval_loss": 2.121837854385376, "eval_runtime": 6.6231, "eval_samples_per_second": 925.24, "eval_steps_per_second": 14.495, "step": 4596 }, { "epoch": 13.0, "learning_rate": 5.467405091861484e-06, "loss": 2.1611, "step": 4979 }, { "epoch": 13.0, "eval_loss": 2.099580764770508, "eval_runtime": 6.7959, "eval_samples_per_second": 901.723, "eval_steps_per_second": 14.126, "step": 4979 }, { "epoch": 14.0, "learning_rate": 4.12878548986627e-06, "loss": 2.1562, "step": 5362 }, { "epoch": 14.0, "eval_loss": 2.099181890487671, "eval_runtime": 6.8991, "eval_samples_per_second": 888.236, "eval_steps_per_second": 13.915, "step": 5362 }, { "epoch": 15.0, "learning_rate": 2.934734674554366e-06, "loss": 2.1508, "step": 5745 }, { "epoch": 15.0, "eval_loss": 2.1012661457061768, "eval_runtime": 6.8384, "eval_samples_per_second": 896.11, "eval_steps_per_second": 14.038, "step": 5745 }, { "epoch": 16.0, "learning_rate": 1.9146541398261966e-06, "loss": 2.1469, "step": 6128 }, { "epoch": 16.0, "eval_loss": 2.094507932662964, "eval_runtime": 6.7332, "eval_samples_per_second": 910.114, "eval_steps_per_second": 14.258, "step": 6128 }, { "epoch": 17.0, "learning_rate": 1.0936616538999777e-06, "loss": 2.1437, "step": 6511 }, { "epoch": 17.0, "eval_loss": 2.089902639389038, "eval_runtime": 6.6174, "eval_samples_per_second": 926.05, "eval_steps_per_second": 14.507, "step": 6511 }, { "epoch": 18.0, "learning_rate": 4.919727764970517e-07, "loss": 2.1436, "step": 6894 }, { "epoch": 18.0, "eval_loss": 2.082735061645508, "eval_runtime": 6.5958, "eval_samples_per_second": 929.079, "eval_steps_per_second": 14.555, "step": 6894 }, { "epoch": 19.0, "learning_rate": 1.2440308466997952e-07, "loss": 2.1443, "step": 7277 }, { "epoch": 19.0, "eval_loss": 2.0934832096099854, "eval_runtime": 6.5708, "eval_samples_per_second": 932.611, "eval_steps_per_second": 14.61, "step": 7277 }, { "epoch": 20.0, "learning_rate": 3.3641254104654906e-12, "loss": 2.1389, "step": 7660 }, { "epoch": 20.0, "eval_loss": 2.0939252376556396, "eval_runtime": 6.5634, "eval_samples_per_second": 933.664, "eval_steps_per_second": 14.627, "step": 7660 }, { "epoch": 20.0, "step": 7660, "total_flos": 3.24907393591296e+16, "train_loss": 2.2224227227990374, "train_runtime": 2024.9575, "train_samples_per_second": 242.079, "train_steps_per_second": 3.783 } ], "max_steps": 7660, "num_train_epochs": 20, "total_flos": 3.24907393591296e+16, "trial_name": null, "trial_params": null }