Rodrigo1771's picture
End of training
dbe731b verified
{
"best_metric": 0.8040201005025126,
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-3996",
"epoch": 9.98998998998999,
"eval_steps": 500,
"global_step": 4990,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.998998998998999,
"eval_accuracy": 0.9730869045820918,
"eval_f1": 0.75963794837412,
"eval_loss": 0.07385822385549545,
"eval_precision": 0.7270588235294118,
"eval_recall": 0.7952737482452036,
"eval_runtime": 14.2023,
"eval_samples_per_second": 479.501,
"eval_steps_per_second": 59.99,
"step": 499
},
{
"epoch": 1.001001001001001,
"grad_norm": 0.8427119851112366,
"learning_rate": 4.4989979959919844e-05,
"loss": 0.105,
"step": 500
},
{
"epoch": 2.0,
"eval_accuracy": 0.972949592870776,
"eval_f1": 0.7655806561471223,
"eval_loss": 0.09075114130973816,
"eval_precision": 0.7435501653803749,
"eval_recall": 0.7889564810481984,
"eval_runtime": 14.1492,
"eval_samples_per_second": 481.299,
"eval_steps_per_second": 60.215,
"step": 999
},
{
"epoch": 2.002002002002002,
"grad_norm": 0.9121108651161194,
"learning_rate": 3.997995991983968e-05,
"loss": 0.0448,
"step": 1000
},
{
"epoch": 2.998998998998999,
"eval_accuracy": 0.9743913658395924,
"eval_f1": 0.7829875042989798,
"eval_loss": 0.09297410398721695,
"eval_precision": 0.7675882220723759,
"eval_recall": 0.7990173139915769,
"eval_runtime": 14.1376,
"eval_samples_per_second": 481.693,
"eval_steps_per_second": 60.265,
"step": 1498
},
{
"epoch": 3.003003003003003,
"grad_norm": 0.5380845069885254,
"learning_rate": 3.496993987975952e-05,
"loss": 0.0255,
"step": 1500
},
{
"epoch": 4.0,
"eval_accuracy": 0.9757438861960537,
"eval_f1": 0.789358010410642,
"eval_loss": 0.10520397126674652,
"eval_precision": 0.7805994051704416,
"eval_recall": 0.798315395414132,
"eval_runtime": 14.3621,
"eval_samples_per_second": 474.163,
"eval_steps_per_second": 59.323,
"step": 1998
},
{
"epoch": 4.004004004004004,
"grad_norm": 0.2705754339694977,
"learning_rate": 2.9959919839679363e-05,
"loss": 0.0164,
"step": 2000
},
{
"epoch": 4.998998998998999,
"eval_accuracy": 0.9750435964683428,
"eval_f1": 0.7879346074142298,
"eval_loss": 0.10997848957777023,
"eval_precision": 0.7756119673617408,
"eval_recall": 0.8006551240056153,
"eval_runtime": 14.2341,
"eval_samples_per_second": 478.428,
"eval_steps_per_second": 59.856,
"step": 2497
},
{
"epoch": 5.005005005005005,
"grad_norm": 0.27666428685188293,
"learning_rate": 2.49498997995992e-05,
"loss": 0.0112,
"step": 2500
},
{
"epoch": 6.0,
"eval_accuracy": 0.9768011863731858,
"eval_f1": 0.799447386599125,
"eval_loss": 0.12663568556308746,
"eval_precision": 0.7869446962828649,
"eval_recall": 0.8123537669630323,
"eval_runtime": 14.5018,
"eval_samples_per_second": 469.597,
"eval_steps_per_second": 58.751,
"step": 2997
},
{
"epoch": 6.006006006006006,
"grad_norm": 0.24552026391029358,
"learning_rate": 1.993987975951904e-05,
"loss": 0.0073,
"step": 3000
},
{
"epoch": 6.998998998998999,
"eval_accuracy": 0.976293133041317,
"eval_f1": 0.7968804562914678,
"eval_loss": 0.12882493436336517,
"eval_precision": 0.792911744266852,
"eval_recall": 0.8008890968647637,
"eval_runtime": 14.3476,
"eval_samples_per_second": 474.643,
"eval_steps_per_second": 59.383,
"step": 3496
},
{
"epoch": 7.007007007007007,
"grad_norm": 0.2008085697889328,
"learning_rate": 1.492985971943888e-05,
"loss": 0.0054,
"step": 3500
},
{
"epoch": 8.0,
"eval_accuracy": 0.9764853694371592,
"eval_f1": 0.8040201005025126,
"eval_loss": 0.14244574308395386,
"eval_precision": 0.803175344384777,
"eval_recall": 0.8048666354702855,
"eval_runtime": 14.514,
"eval_samples_per_second": 469.202,
"eval_steps_per_second": 58.702,
"step": 3996
},
{
"epoch": 8.008008008008009,
"grad_norm": 0.12597906589508057,
"learning_rate": 9.919839679358718e-06,
"loss": 0.0038,
"step": 4000
},
{
"epoch": 8.998998998999,
"eval_accuracy": 0.9765059661938567,
"eval_f1": 0.7970779220779219,
"eval_loss": 0.14552859961986542,
"eval_precision": 0.7901149425287356,
"eval_recall": 0.8041647168928404,
"eval_runtime": 14.2396,
"eval_samples_per_second": 478.242,
"eval_steps_per_second": 59.833,
"step": 4495
},
{
"epoch": 9.00900900900901,
"grad_norm": 0.2577208876609802,
"learning_rate": 4.9098196392785576e-06,
"loss": 0.0028,
"step": 4500
},
{
"epoch": 9.98998998998999,
"eval_accuracy": 0.9768286487154489,
"eval_f1": 0.7984262902105993,
"eval_loss": 0.14972682297229767,
"eval_precision": 0.7898351648351648,
"eval_recall": 0.8072063640617688,
"eval_runtime": 14.3927,
"eval_samples_per_second": 473.158,
"eval_steps_per_second": 59.197,
"step": 4990
},
{
"epoch": 9.98998998998999,
"step": 4990,
"total_flos": 1.5071241212671032e+16,
"train_loss": 0.022475468706272407,
"train_runtime": 1385.1143,
"train_samples_per_second": 230.645,
"train_steps_per_second": 3.603
}
],
"logging_steps": 500,
"max_steps": 4990,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.5071241212671032e+16,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}