{ "best_metric": 0.8381595689513368, "best_model_checkpoint": "test-klue/ynat/run-2/checkpoint-500", "epoch": 0.8756567425569177, "eval_steps": 50, "global_step": 500, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 9.520888692829572e-06, "loss": 1.9542, "step": 50 }, { "epoch": 0.09, "eval_f1": 0.04583937972534243, "eval_loss": 1.94540536403656, "eval_runtime": 12.7715, "eval_samples_per_second": 713.073, "eval_steps_per_second": 1.409, "step": 50 }, { "epoch": 0.18, "learning_rate": 9.307798077502143e-06, "loss": 1.6697, "step": 100 }, { "epoch": 0.18, "eval_f1": 0.5702339339536844, "eval_loss": 1.5132834911346436, "eval_runtime": 13.3022, "eval_samples_per_second": 684.621, "eval_steps_per_second": 1.353, "step": 100 }, { "epoch": 0.26, "learning_rate": 9.094707462174712e-06, "loss": 0.8971, "step": 150 }, { "epoch": 0.26, "eval_f1": 0.7432151839966054, "eval_loss": 0.97530198097229, "eval_runtime": 12.9877, "eval_samples_per_second": 701.204, "eval_steps_per_second": 1.386, "step": 150 }, { "epoch": 0.35, "learning_rate": 8.881616846847283e-06, "loss": 0.6418, "step": 200 }, { "epoch": 0.35, "eval_f1": 0.7297934771119111, "eval_loss": 0.8931246995925903, "eval_runtime": 12.7184, "eval_samples_per_second": 716.049, "eval_steps_per_second": 1.415, "step": 200 }, { "epoch": 0.44, "learning_rate": 8.668526231519852e-06, "loss": 0.6438, "step": 250 }, { "epoch": 0.44, "eval_f1": 0.7761230585561165, "eval_loss": 0.7596781849861145, "eval_runtime": 12.6711, "eval_samples_per_second": 718.722, "eval_steps_per_second": 1.421, "step": 250 }, { "epoch": 0.53, "learning_rate": 8.455435616192421e-06, "loss": 0.5509, "step": 300 }, { "epoch": 0.53, "eval_f1": 0.8095769257080263, "eval_loss": 0.6825068593025208, "eval_runtime": 12.713, "eval_samples_per_second": 716.351, "eval_steps_per_second": 1.416, "step": 300 }, { "epoch": 0.61, "learning_rate": 8.242345000864992e-06, "loss": 0.45, "step": 350 }, { "epoch": 0.61, "eval_f1": 0.820467350641947, "eval_loss": 0.6184367537498474, "eval_runtime": 12.8329, "eval_samples_per_second": 709.662, "eval_steps_per_second": 1.403, "step": 350 }, { "epoch": 0.7, "learning_rate": 8.029254385537563e-06, "loss": 0.4877, "step": 400 }, { "epoch": 0.7, "eval_f1": 0.8192647088110643, "eval_loss": 0.5932053327560425, "eval_runtime": 12.9021, "eval_samples_per_second": 705.857, "eval_steps_per_second": 1.395, "step": 400 }, { "epoch": 0.79, "learning_rate": 7.816163770210132e-06, "loss": 0.5672, "step": 450 }, { "epoch": 0.79, "eval_f1": 0.8008517524970246, "eval_loss": 0.6429001092910767, "eval_runtime": 12.7777, "eval_samples_per_second": 712.728, "eval_steps_per_second": 1.409, "step": 450 }, { "epoch": 0.88, "learning_rate": 7.603073154882702e-06, "loss": 0.5631, "step": 500 }, { "epoch": 0.88, "eval_f1": 0.8381595689513368, "eval_loss": 0.5315341353416443, "eval_runtime": 12.733, "eval_samples_per_second": 715.231, "eval_steps_per_second": 1.414, "step": 500 } ], "logging_steps": 50, "max_steps": 2284, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 40570257086400.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "learning_rate": 9.520888692829572e-06, "num_train_epochs": 4, "per_device_train_batch_size": 8, "seed": 4 } }