{ "best_metric": 0.17114591920857378, "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2705", "epoch": 10.0, "eval_steps": 500, "global_step": 5410, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9242144177449169, "grad_norm": 1.4528056383132935, "learning_rate": 4.537892791127542e-05, "loss": 0.3191, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.7762986255097697, "eval_f1": 0.15197789412449098, "eval_loss": 0.815065860748291, "eval_precision": 0.08251737207833228, "eval_recall": 0.9604779411764706, "eval_runtime": 14.2875, "eval_samples_per_second": 476.64, "eval_steps_per_second": 59.632, "step": 541 }, { "epoch": 1.8484288354898335, "grad_norm": 1.5587466955184937, "learning_rate": 4.075785582255083e-05, "loss": 0.1619, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.7900778557403161, "eval_f1": 0.168, "eval_loss": 0.833220362663269, "eval_precision": 0.09218777996774771, "eval_recall": 0.9457720588235294, "eval_runtime": 14.245, "eval_samples_per_second": 478.064, "eval_steps_per_second": 59.811, "step": 1082 }, { "epoch": 2.7726432532347505, "grad_norm": 0.9616082310676575, "learning_rate": 3.613678373382625e-05, "loss": 0.11, "step": 1500 }, { "epoch": 3.0, "eval_accuracy": 0.7738201491205184, "eval_f1": 0.16426810845193607, "eval_loss": 1.1093989610671997, "eval_precision": 0.08991208982505004, "eval_recall": 0.9494485294117647, "eval_runtime": 14.3321, "eval_samples_per_second": 475.159, "eval_steps_per_second": 59.447, "step": 1623 }, { "epoch": 3.6968576709796674, "grad_norm": 1.4018645286560059, "learning_rate": 3.1515711645101665e-05, "loss": 0.0764, "step": 2000 }, { "epoch": 4.0, "eval_accuracy": 0.7740055199307949, "eval_f1": 0.16182605273514364, "eval_loss": 1.1206157207489014, "eval_precision": 0.08849100456227942, "eval_recall": 0.9448529411764706, "eval_runtime": 14.4251, "eval_samples_per_second": 472.092, "eval_steps_per_second": 59.064, "step": 2164 }, { "epoch": 4.621072088724584, "grad_norm": 1.0904265642166138, "learning_rate": 2.6894639556377083e-05, "loss": 0.0567, "step": 2500 }, { "epoch": 5.0, "eval_accuracy": 0.7890274211487498, "eval_f1": 0.17114591920857378, "eval_loss": 1.1805996894836426, "eval_precision": 0.09400470929179497, "eval_recall": 0.9540441176470589, "eval_runtime": 14.1983, "eval_samples_per_second": 479.634, "eval_steps_per_second": 60.007, "step": 2705 }, { "epoch": 5.545286506469501, "grad_norm": 1.240962028503418, "learning_rate": 2.2273567467652497e-05, "loss": 0.0428, "step": 3000 }, { "epoch": 6.0, "eval_accuracy": 0.7827454103560493, "eval_f1": 0.1644558094933674, "eval_loss": 1.31381094455719, "eval_precision": 0.09005776299667426, "eval_recall": 0.9457720588235294, "eval_runtime": 14.1863, "eval_samples_per_second": 480.042, "eval_steps_per_second": 60.058, "step": 3246 }, { "epoch": 6.469500924214418, "grad_norm": 0.44737720489501953, "learning_rate": 1.7652495378927914e-05, "loss": 0.0332, "step": 3500 }, { "epoch": 7.0, "eval_accuracy": 0.7874071429552226, "eval_f1": 0.16785861076859843, "eval_loss": 1.4008890390396118, "eval_precision": 0.09217297102103457, "eval_recall": 0.9384191176470589, "eval_runtime": 14.2845, "eval_samples_per_second": 476.742, "eval_steps_per_second": 59.645, "step": 3787 }, { "epoch": 7.393715341959335, "grad_norm": 0.7116318941116333, "learning_rate": 1.3031423290203328e-05, "loss": 0.0257, "step": 4000 }, { "epoch": 8.0, "eval_accuracy": 0.7790997844206132, "eval_f1": 0.16497502819397453, "eval_loss": 1.5610512495040894, "eval_precision": 0.0904114426982165, "eval_recall": 0.9411764705882353, "eval_runtime": 14.5465, "eval_samples_per_second": 468.153, "eval_steps_per_second": 58.571, "step": 4328 }, { "epoch": 8.317929759704251, "grad_norm": 0.9520462155342102, "learning_rate": 8.410351201478742e-06, "loss": 0.022, "step": 4500 }, { "epoch": 9.0, "eval_accuracy": 0.7835212215249838, "eval_f1": 0.16788916055419725, "eval_loss": 1.5933887958526611, "eval_precision": 0.09211232337685567, "eval_recall": 0.9466911764705882, "eval_runtime": 14.1983, "eval_samples_per_second": 479.635, "eval_steps_per_second": 60.007, "step": 4869 }, { "epoch": 9.242144177449168, "grad_norm": 0.6707109212875366, "learning_rate": 3.789279112754159e-06, "loss": 0.0181, "step": 5000 }, { "epoch": 10.0, "eval_accuracy": 0.7862743213368668, "eval_f1": 0.16804385175488834, "eval_loss": 1.6103088855743408, "eval_precision": 0.09223170184104176, "eval_recall": 0.9439338235294118, "eval_runtime": 14.5612, "eval_samples_per_second": 467.681, "eval_steps_per_second": 58.512, "step": 5410 }, { "epoch": 10.0, "step": 5410, "total_flos": 1.7176580067661056e+16, "train_loss": 0.0812657987344287, "train_runtime": 1542.5562, "train_samples_per_second": 224.329, "train_steps_per_second": 3.507 } ], "logging_steps": 500, "max_steps": 5410, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7176580067661056e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }