{ "best_metric": 0.4624414693662204, "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2127", "epoch": 9.988249118683902, "eval_steps": 500, "global_step": 4250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9988249118683902, "eval_accuracy": 0.8517239485355707, "eval_f1": 0.4251637936518033, "eval_loss": 0.3833567500114441, "eval_precision": 0.29195422381409974, "eval_recall": 0.7819372952737482, "eval_runtime": 14.7317, "eval_samples_per_second": 462.27, "eval_steps_per_second": 57.835, "step": 425 }, { "epoch": 1.1750881316098707, "grad_norm": 1.4406206607818604, "learning_rate": 4.411764705882353e-05, "loss": 0.3349, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.822140140332569, "eval_f1": 0.40249737425603915, "eval_loss": 0.5730458498001099, "eval_precision": 0.2681125621890547, "eval_recall": 0.8069723912026205, "eval_runtime": 14.9036, "eval_samples_per_second": 456.937, "eval_steps_per_second": 57.167, "step": 851 }, { "epoch": 2.3501762632197414, "grad_norm": 2.988006591796875, "learning_rate": 3.8235294117647055e-05, "loss": 0.1788, "step": 1000 }, { "epoch": 2.99882491186839, "eval_accuracy": 0.8338185013799827, "eval_f1": 0.42018044559013074, "eval_loss": 0.5796028971672058, "eval_precision": 0.2847990681421083, "eval_recall": 0.8008890968647637, "eval_runtime": 14.7541, "eval_samples_per_second": 461.567, "eval_steps_per_second": 57.747, "step": 1276 }, { "epoch": 3.525264394829612, "grad_norm": 1.1645787954330444, "learning_rate": 3.235294117647059e-05, "loss": 0.1227, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.8388097820863142, "eval_f1": 0.43759863644971914, "eval_loss": 0.6590859889984131, "eval_precision": 0.2996455433560993, "eval_recall": 0.8109499298081423, "eval_runtime": 14.8634, "eval_samples_per_second": 458.172, "eval_steps_per_second": 57.322, "step": 1702 }, { "epoch": 4.700352526439483, "grad_norm": 1.1214195489883423, "learning_rate": 2.647058823529412e-05, "loss": 0.0856, "step": 2000 }, { "epoch": 4.9988249118683905, "eval_accuracy": 0.8601754843670617, "eval_f1": 0.4624414693662204, "eval_loss": 0.6266195774078369, "eval_precision": 0.32197630636422075, "eval_recall": 0.8203088441740758, "eval_runtime": 14.7766, "eval_samples_per_second": 460.863, "eval_steps_per_second": 57.659, "step": 2127 }, { "epoch": 5.875440658049354, "grad_norm": 1.1390776634216309, "learning_rate": 2.058823529411765e-05, "loss": 0.0597, "step": 2500 }, { "epoch": 6.0, "eval_accuracy": 0.8475977316105291, "eval_f1": 0.44600244420145363, "eval_loss": 0.7858611941337585, "eval_precision": 0.3075490109110263, "eval_recall": 0.8111839026672906, "eval_runtime": 14.8163, "eval_samples_per_second": 459.628, "eval_steps_per_second": 57.504, "step": 2553 }, { "epoch": 6.9988249118683905, "eval_accuracy": 0.8507970944841886, "eval_f1": 0.45324675324675323, "eval_loss": 0.829707145690918, "eval_precision": 0.3136796692432141, "eval_recall": 0.8165652784277024, "eval_runtime": 14.8867, "eval_samples_per_second": 457.456, "eval_steps_per_second": 57.232, "step": 2978 }, { "epoch": 7.050528789659224, "grad_norm": 0.5195357799530029, "learning_rate": 1.4705882352941177e-05, "loss": 0.0458, "step": 3000 }, { "epoch": 8.0, "eval_accuracy": 0.8532206461889135, "eval_f1": 0.453628249902988, "eval_loss": 0.8468152284622192, "eval_precision": 0.3134608509116911, "eval_recall": 0.8205428170332242, "eval_runtime": 15.05, "eval_samples_per_second": 452.493, "eval_steps_per_second": 56.611, "step": 3404 }, { "epoch": 8.225616921269095, "grad_norm": 0.8723571300506592, "learning_rate": 8.823529411764707e-06, "loss": 0.0343, "step": 3500 }, { "epoch": 8.99882491186839, "eval_accuracy": 0.8493827838576352, "eval_f1": 0.44807482862451153, "eval_loss": 0.9241161346435547, "eval_precision": 0.30851345390383766, "eval_recall": 0.8182030884417407, "eval_runtime": 14.662, "eval_samples_per_second": 464.465, "eval_steps_per_second": 58.109, "step": 3829 }, { "epoch": 9.400705052878966, "grad_norm": 0.6075822710990906, "learning_rate": 2.9411764705882355e-06, "loss": 0.0292, "step": 4000 }, { "epoch": 9.988249118683902, "eval_accuracy": 0.8498565092616749, "eval_f1": 0.44938176197836166, "eval_loss": 0.938378632068634, "eval_precision": 0.31002310289674784, "eval_recall": 0.816331305568554, "eval_runtime": 15.4697, "eval_samples_per_second": 440.215, "eval_steps_per_second": 55.075, "step": 4250 }, { "epoch": 9.988249118683902, "step": 4250, "total_flos": 1.2649810588547778e+16, "train_loss": 0.10639642311544979, "train_runtime": 1205.6346, "train_samples_per_second": 225.848, "train_steps_per_second": 3.525 } ], "logging_steps": 500, "max_steps": 4250, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.2649810588547778e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }