{ "best_metric": 0.930276087973795, "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-2016", "epoch": 10.0, "eval_steps": 500, "global_step": 5040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9920634920634921, "grad_norm": 0.06281786412000656, "learning_rate": 4.503968253968254e-05, "loss": 0.0189, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.9983953339100828, "eval_f1": 0.9040358744394619, "eval_loss": 0.00518822530284524, "eval_precision": 0.8712186689714779, "eval_recall": 0.9394221808014911, "eval_runtime": 13.2131, "eval_samples_per_second": 525.689, "eval_steps_per_second": 65.768, "step": 504 }, { "epoch": 1.9841269841269842, "grad_norm": 0.20689290761947632, "learning_rate": 4.007936507936508e-05, "loss": 0.0047, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.9986883598917199, "eval_f1": 0.9244402985074627, "eval_loss": 0.004834321793168783, "eval_precision": 0.9253034547152195, "eval_recall": 0.923578751164958, "eval_runtime": 13.2434, "eval_samples_per_second": 524.487, "eval_steps_per_second": 65.618, "step": 1008 }, { "epoch": 2.9761904761904763, "grad_norm": 0.02256501279771328, "learning_rate": 3.511904761904762e-05, "loss": 0.0027, "step": 1500 }, { "epoch": 3.0, "eval_accuracy": 0.9986255686099406, "eval_f1": 0.9239384041063929, "eval_loss": 0.005881821736693382, "eval_precision": 0.9252336448598131, "eval_recall": 0.9226467847157502, "eval_runtime": 13.1715, "eval_samples_per_second": 527.351, "eval_steps_per_second": 65.976, "step": 1512 }, { "epoch": 3.9682539682539684, "grad_norm": 0.0326182059943676, "learning_rate": 3.0158730158730158e-05, "loss": 0.0015, "step": 2000 }, { "epoch": 4.0, "eval_accuracy": 0.9987162671280663, "eval_f1": 0.930276087973795, "eval_loss": 0.0064844791777431965, "eval_precision": 0.9342105263157895, "eval_recall": 0.9263746505125815, "eval_runtime": 13.5939, "eval_samples_per_second": 510.963, "eval_steps_per_second": 63.926, "step": 2016 }, { "epoch": 4.9603174603174605, "grad_norm": 0.1600140929222107, "learning_rate": 2.5198412698412697e-05, "loss": 0.0011, "step": 2500 }, { "epoch": 5.0, "eval_accuracy": 0.9986116149917673, "eval_f1": 0.923076923076923, "eval_loss": 0.007346163038164377, "eval_precision": 0.9072907290729073, "eval_recall": 0.9394221808014911, "eval_runtime": 13.3438, "eval_samples_per_second": 520.543, "eval_steps_per_second": 65.124, "step": 2520 }, { "epoch": 5.9523809523809526, "grad_norm": 0.09641193598508835, "learning_rate": 2.023809523809524e-05, "loss": 0.0005, "step": 3000 }, { "epoch": 6.0, "eval_accuracy": 0.998444171573689, "eval_f1": 0.9204281060958585, "eval_loss": 0.009004838764667511, "eval_precision": 0.9191449814126395, "eval_recall": 0.9217148182665424, "eval_runtime": 13.3195, "eval_samples_per_second": 521.491, "eval_steps_per_second": 65.243, "step": 3024 }, { "epoch": 6.944444444444445, "grad_norm": 0.029784763231873512, "learning_rate": 1.527777777777778e-05, "loss": 0.0007, "step": 3500 }, { "epoch": 7.0, "eval_accuracy": 0.9985767309463344, "eval_f1": 0.9190432382704691, "eval_loss": 0.008385799825191498, "eval_precision": 0.9073569482288828, "eval_recall": 0.9310344827586207, "eval_runtime": 13.4485, "eval_samples_per_second": 516.487, "eval_steps_per_second": 64.617, "step": 3528 }, { "epoch": 7.936507936507937, "grad_norm": 0.0010318646673113108, "learning_rate": 1.0317460317460318e-05, "loss": 0.0004, "step": 4000 }, { "epoch": 8.0, "eval_accuracy": 0.9985558005190746, "eval_f1": 0.9213793103448276, "eval_loss": 0.008501913398504257, "eval_precision": 0.9092558983666061, "eval_recall": 0.9338303821062441, "eval_runtime": 13.3613, "eval_samples_per_second": 519.86, "eval_steps_per_second": 65.039, "step": 4032 }, { "epoch": 8.928571428571429, "grad_norm": 0.0005677491426467896, "learning_rate": 5.357142857142857e-06, "loss": 0.0003, "step": 4500 }, { "epoch": 9.0, "eval_accuracy": 0.9987023135098931, "eval_f1": 0.9270544783010157, "eval_loss": 0.008021777495741844, "eval_precision": 0.918572735590119, "eval_recall": 0.9356943150046598, "eval_runtime": 13.487, "eval_samples_per_second": 515.014, "eval_steps_per_second": 64.432, "step": 4536 }, { "epoch": 9.920634920634921, "grad_norm": 0.0005077613168396056, "learning_rate": 3.9682539682539683e-07, "loss": 0.0002, "step": 5000 }, { "epoch": 10.0, "eval_accuracy": 0.9986883598917199, "eval_f1": 0.9278445883441258, "eval_loss": 0.008253143168985844, "eval_precision": 0.921028466483012, "eval_recall": 0.934762348555452, "eval_runtime": 13.7257, "eval_samples_per_second": 506.057, "eval_steps_per_second": 63.312, "step": 5040 }, { "epoch": 10.0, "step": 5040, "total_flos": 1.394320679130096e+16, "train_loss": 0.0030765269683407886, "train_runtime": 1249.6681, "train_samples_per_second": 257.924, "train_steps_per_second": 4.033 } ], "logging_steps": 500, "max_steps": 5040, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.394320679130096e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }