{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.03666361136571952, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0036663611365719525, "grad_norm": 11.825860977172852, "learning_rate": 2e-05, "loss": 10.8658, "step": 1 }, { "epoch": 0.0036663611365719525, "eval_loss": 11.236878395080566, "eval_runtime": 12.396, "eval_samples_per_second": 9.277, "eval_steps_per_second": 4.679, "step": 1 }, { "epoch": 0.007332722273143905, "grad_norm": 11.643694877624512, "learning_rate": 4e-05, "loss": 11.5997, "step": 2 }, { "epoch": 0.010999083409715857, "grad_norm": 13.432502746582031, "learning_rate": 6e-05, "loss": 12.1139, "step": 3 }, { "epoch": 0.010999083409715857, "eval_loss": 11.0986967086792, "eval_runtime": 11.9026, "eval_samples_per_second": 9.662, "eval_steps_per_second": 4.873, "step": 3 }, { "epoch": 0.01466544454628781, "grad_norm": 16.266033172607422, "learning_rate": 8e-05, "loss": 12.442, "step": 4 }, { "epoch": 0.01833180568285976, "grad_norm": 14.242290496826172, "learning_rate": 0.0001, "loss": 9.6975, "step": 5 }, { "epoch": 0.021998166819431713, "grad_norm": 19.96150779724121, "learning_rate": 0.00012, "loss": 9.8631, "step": 6 }, { "epoch": 0.021998166819431713, "eval_loss": 8.199483871459961, "eval_runtime": 12.1413, "eval_samples_per_second": 9.472, "eval_steps_per_second": 4.777, "step": 6 }, { "epoch": 0.025664527956003668, "grad_norm": 21.698095321655273, "learning_rate": 0.00014, "loss": 8.2158, "step": 7 }, { "epoch": 0.02933088909257562, "grad_norm": 32.11186218261719, "learning_rate": 0.00016, "loss": 6.0142, "step": 8 }, { "epoch": 0.03299725022914757, "grad_norm": 14.958884239196777, "learning_rate": 0.00018, "loss": 2.9791, "step": 9 }, { "epoch": 0.03299725022914757, "eval_loss": 2.25606632232666, "eval_runtime": 12.2047, "eval_samples_per_second": 9.423, "eval_steps_per_second": 4.752, "step": 9 }, { "epoch": 0.03666361136571952, "grad_norm": 13.798932075500488, "learning_rate": 0.0002, "loss": 2.7228, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1849564248145920.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }