{ "best_metric": 0.8284272507637513, "best_model_checkpoint": "test-klue/ynat/run-1/checkpoint-500", "epoch": 3.4965034965034967, "eval_steps": 50, "global_step": 500, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.35, "learning_rate": 5.859444109925402e-05, "loss": 1.497, "step": 50 }, { "epoch": 0.35, "eval_f1": 0.6734208042191661, "eval_loss": 1.089546799659729, "eval_runtime": 13.0037, "eval_samples_per_second": 700.339, "eval_steps_per_second": 1.384, "step": 50 }, { "epoch": 0.7, "learning_rate": 5.418884402412214e-05, "loss": 0.5665, "step": 100 }, { "epoch": 0.7, "eval_f1": 0.8012954608718272, "eval_loss": 0.6758251190185547, "eval_runtime": 13.0152, "eval_samples_per_second": 699.72, "eval_steps_per_second": 1.383, "step": 100 }, { "epoch": 1.05, "learning_rate": 4.9783246948990256e-05, "loss": 0.5083, "step": 150 }, { "epoch": 1.05, "eval_f1": 0.7895493614371508, "eval_loss": 0.6960522532463074, "eval_runtime": 12.9373, "eval_samples_per_second": 703.936, "eval_steps_per_second": 1.391, "step": 150 }, { "epoch": 1.4, "learning_rate": 4.537764987385838e-05, "loss": 0.3865, "step": 200 }, { "epoch": 1.4, "eval_f1": 0.8201909522421974, "eval_loss": 0.6086650490760803, "eval_runtime": 12.7238, "eval_samples_per_second": 715.746, "eval_steps_per_second": 1.415, "step": 200 }, { "epoch": 1.75, "learning_rate": 4.09720527987265e-05, "loss": 0.385, "step": 250 }, { "epoch": 1.75, "eval_f1": 0.8359538639496097, "eval_loss": 0.50257807970047, "eval_runtime": 12.8233, "eval_samples_per_second": 710.19, "eval_steps_per_second": 1.404, "step": 250 }, { "epoch": 2.1, "learning_rate": 3.656645572359462e-05, "loss": 0.357, "step": 300 }, { "epoch": 2.1, "eval_f1": 0.8294020659039051, "eval_loss": 0.5500179529190063, "eval_runtime": 12.8836, "eval_samples_per_second": 706.867, "eval_steps_per_second": 1.397, "step": 300 }, { "epoch": 2.45, "learning_rate": 3.216085864846273e-05, "loss": 0.2337, "step": 350 }, { "epoch": 2.45, "eval_f1": 0.8324393645838855, "eval_loss": 0.5851877927780151, "eval_runtime": 12.9246, "eval_samples_per_second": 704.625, "eval_steps_per_second": 1.393, "step": 350 }, { "epoch": 2.8, "learning_rate": 2.775526157333085e-05, "loss": 0.2136, "step": 400 }, { "epoch": 2.8, "eval_f1": 0.8449098012505971, "eval_loss": 0.5285059809684753, "eval_runtime": 12.7922, "eval_samples_per_second": 711.919, "eval_steps_per_second": 1.407, "step": 400 }, { "epoch": 3.15, "learning_rate": 2.3349664498198968e-05, "loss": 0.1926, "step": 450 }, { "epoch": 3.15, "eval_f1": 0.8313597336118761, "eval_loss": 0.6143582463264465, "eval_runtime": 12.8169, "eval_samples_per_second": 710.548, "eval_steps_per_second": 1.404, "step": 450 }, { "epoch": 3.5, "learning_rate": 1.894406742306709e-05, "loss": 0.1359, "step": 500 }, { "epoch": 3.5, "eval_f1": 0.8284272507637513, "eval_loss": 0.7059531211853027, "eval_runtime": 12.7876, "eval_samples_per_second": 712.173, "eval_steps_per_second": 1.408, "step": 500 } ], "logging_steps": 50, "max_steps": 715, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 178689205892400.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "learning_rate": 5.859444109925402e-05, "num_train_epochs": 5, "per_device_train_batch_size": 32, "seed": 11 } }