|
{ |
|
"best_metric": 0.8040201005025126, |
|
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-3996", |
|
"epoch": 9.98998998998999, |
|
"eval_steps": 500, |
|
"global_step": 4990, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.998998998998999, |
|
"eval_accuracy": 0.9730869045820918, |
|
"eval_f1": 0.75963794837412, |
|
"eval_loss": 0.07385822385549545, |
|
"eval_precision": 0.7270588235294118, |
|
"eval_recall": 0.7952737482452036, |
|
"eval_runtime": 14.2023, |
|
"eval_samples_per_second": 479.501, |
|
"eval_steps_per_second": 59.99, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.001001001001001, |
|
"grad_norm": 0.8427119851112366, |
|
"learning_rate": 4.4989979959919844e-05, |
|
"loss": 0.105, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.972949592870776, |
|
"eval_f1": 0.7655806561471223, |
|
"eval_loss": 0.09075114130973816, |
|
"eval_precision": 0.7435501653803749, |
|
"eval_recall": 0.7889564810481984, |
|
"eval_runtime": 14.1492, |
|
"eval_samples_per_second": 481.299, |
|
"eval_steps_per_second": 60.215, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.002002002002002, |
|
"grad_norm": 0.9121108651161194, |
|
"learning_rate": 3.997995991983968e-05, |
|
"loss": 0.0448, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.998998998998999, |
|
"eval_accuracy": 0.9743913658395924, |
|
"eval_f1": 0.7829875042989798, |
|
"eval_loss": 0.09297410398721695, |
|
"eval_precision": 0.7675882220723759, |
|
"eval_recall": 0.7990173139915769, |
|
"eval_runtime": 14.1376, |
|
"eval_samples_per_second": 481.693, |
|
"eval_steps_per_second": 60.265, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 3.003003003003003, |
|
"grad_norm": 0.5380845069885254, |
|
"learning_rate": 3.496993987975952e-05, |
|
"loss": 0.0255, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9757438861960537, |
|
"eval_f1": 0.789358010410642, |
|
"eval_loss": 0.10520397126674652, |
|
"eval_precision": 0.7805994051704416, |
|
"eval_recall": 0.798315395414132, |
|
"eval_runtime": 14.3621, |
|
"eval_samples_per_second": 474.163, |
|
"eval_steps_per_second": 59.323, |
|
"step": 1998 |
|
}, |
|
{ |
|
"epoch": 4.004004004004004, |
|
"grad_norm": 0.2705754339694977, |
|
"learning_rate": 2.9959919839679363e-05, |
|
"loss": 0.0164, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.998998998998999, |
|
"eval_accuracy": 0.9750435964683428, |
|
"eval_f1": 0.7879346074142298, |
|
"eval_loss": 0.10997848957777023, |
|
"eval_precision": 0.7756119673617408, |
|
"eval_recall": 0.8006551240056153, |
|
"eval_runtime": 14.2341, |
|
"eval_samples_per_second": 478.428, |
|
"eval_steps_per_second": 59.856, |
|
"step": 2497 |
|
}, |
|
{ |
|
"epoch": 5.005005005005005, |
|
"grad_norm": 0.27666428685188293, |
|
"learning_rate": 2.49498997995992e-05, |
|
"loss": 0.0112, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9768011863731858, |
|
"eval_f1": 0.799447386599125, |
|
"eval_loss": 0.12663568556308746, |
|
"eval_precision": 0.7869446962828649, |
|
"eval_recall": 0.8123537669630323, |
|
"eval_runtime": 14.5018, |
|
"eval_samples_per_second": 469.597, |
|
"eval_steps_per_second": 58.751, |
|
"step": 2997 |
|
}, |
|
{ |
|
"epoch": 6.006006006006006, |
|
"grad_norm": 0.24552026391029358, |
|
"learning_rate": 1.993987975951904e-05, |
|
"loss": 0.0073, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 6.998998998998999, |
|
"eval_accuracy": 0.976293133041317, |
|
"eval_f1": 0.7968804562914678, |
|
"eval_loss": 0.12882493436336517, |
|
"eval_precision": 0.792911744266852, |
|
"eval_recall": 0.8008890968647637, |
|
"eval_runtime": 14.3476, |
|
"eval_samples_per_second": 474.643, |
|
"eval_steps_per_second": 59.383, |
|
"step": 3496 |
|
}, |
|
{ |
|
"epoch": 7.007007007007007, |
|
"grad_norm": 0.2008085697889328, |
|
"learning_rate": 1.492985971943888e-05, |
|
"loss": 0.0054, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9764853694371592, |
|
"eval_f1": 0.8040201005025126, |
|
"eval_loss": 0.14244574308395386, |
|
"eval_precision": 0.803175344384777, |
|
"eval_recall": 0.8048666354702855, |
|
"eval_runtime": 14.514, |
|
"eval_samples_per_second": 469.202, |
|
"eval_steps_per_second": 58.702, |
|
"step": 3996 |
|
}, |
|
{ |
|
"epoch": 8.008008008008009, |
|
"grad_norm": 0.12597906589508057, |
|
"learning_rate": 9.919839679358718e-06, |
|
"loss": 0.0038, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 8.998998998999, |
|
"eval_accuracy": 0.9765059661938567, |
|
"eval_f1": 0.7970779220779219, |
|
"eval_loss": 0.14552859961986542, |
|
"eval_precision": 0.7901149425287356, |
|
"eval_recall": 0.8041647168928404, |
|
"eval_runtime": 14.2396, |
|
"eval_samples_per_second": 478.242, |
|
"eval_steps_per_second": 59.833, |
|
"step": 4495 |
|
}, |
|
{ |
|
"epoch": 9.00900900900901, |
|
"grad_norm": 0.2577208876609802, |
|
"learning_rate": 4.9098196392785576e-06, |
|
"loss": 0.0028, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 9.98998998998999, |
|
"eval_accuracy": 0.9768286487154489, |
|
"eval_f1": 0.7984262902105993, |
|
"eval_loss": 0.14972682297229767, |
|
"eval_precision": 0.7898351648351648, |
|
"eval_recall": 0.8072063640617688, |
|
"eval_runtime": 14.3927, |
|
"eval_samples_per_second": 473.158, |
|
"eval_steps_per_second": 59.197, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 9.98998998998999, |
|
"step": 4990, |
|
"total_flos": 1.5071241212671032e+16, |
|
"train_loss": 0.022475468706272407, |
|
"train_runtime": 1385.1143, |
|
"train_samples_per_second": 230.645, |
|
"train_steps_per_second": 3.603 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 4990, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.5071241212671032e+16, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|