End of training

Browse files

Files changed (9) hide show

README.md +13 -12
all_results.json +26 -0
eval_results.json +12 -0
predict_results.json +10 -0
predictions.txt +0 -0
tb/events.out.tfevents.1725475205.a5c501872057.1590.1 +3 -0
train.log +48 -0
train_results.json +9 -0
trainer_state.json +190 -0

README.md CHANGED Viewed

@@ -3,9 +3,10 @@ library_name: transformers
 license: apache-2.0
 base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
 tags:
 - generated_from_trainer
 datasets:
-- symptemist-75-ner
 metrics:
 - precision
 - recall
@@ -18,24 +19,24 @@ model-index:
       name: Token Classification
       type: token-classification
     dataset:
-      name: symptemist-75-ner
-      type: symptemist-75-ner
       config: SympTEMIST NER
       split: validation
       args: SympTEMIST NER
     metrics:
     - name: Precision
       type: precision
-      value: 0.6814159292035398
     - name: Recall
       type: recall
-      value: 0.7164750957854407
     - name: F1
       type: f1
-      value: 0.6985058697972252
     - name: Accuracy
       type: accuracy
-      value: 0.9498861047835991
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -43,13 +44,13 @@ should probably proofread and complete it, then remove this comment. -->
 # output
-This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the symptemist-75-ner dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.3328
-- Precision: 0.6814
-- Recall: 0.7165
 - F1: 0.6985
-- Accuracy: 0.9499
 ## Model description

 license: apache-2.0
 base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
 tags:
+- token-classification
 - generated_from_trainer
 datasets:
+- Rodrigo1771/symptemist-75-ner
 metrics:
 - precision
 - recall
       name: Token Classification
       type: token-classification
     dataset:
+      name: Rodrigo1771/symptemist-75-ner
+      type: Rodrigo1771/symptemist-75-ner
       config: SympTEMIST NER
       split: validation
       args: SympTEMIST NER
     metrics:
     - name: Precision
       type: precision
+      value: 0.6896
     - name: Recall
       type: recall
+      value: 0.7077175697865353
     - name: F1
       type: f1
+      value: 0.6985413290113451
     - name: Accuracy
       type: accuracy
+      value: 0.9496936058263018
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # output
+This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-75-ner dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.3089
+- Precision: 0.6896
+- Recall: 0.7077
 - F1: 0.6985
+- Accuracy: 0.9497
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "epoch": 10.0,
+    "eval_accuracy": 0.9496936058263018,
+    "eval_f1": 0.6985413290113451,
+    "eval_loss": 0.3088673949241638,
+    "eval_precision": 0.6896,
+    "eval_recall": 0.7077175697865353,
+    "eval_runtime": 5.5622,
+    "eval_samples": 2519,
+    "eval_samples_per_second": 452.882,
+    "eval_steps_per_second": 56.633,
+    "predict_accuracy": 0.9466344311112421,
+    "predict_f1": 0.6937328822297406,
+    "predict_loss": 0.33691754937171936,
+    "predict_precision": 0.694516129032258,
+    "predict_recall": 0.6929514000643707,
+    "predict_runtime": 8.8756,
+    "predict_samples_per_second": 455.971,
+    "predict_steps_per_second": 57.01,
+    "total_flos": 7718163558521760.0,
+    "train_loss": 0.03527186407196906,
+    "train_runtime": 706.0488,
+    "train_samples": 15848,
+    "train_samples_per_second": 224.46,
+    "train_steps_per_second": 3.513
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 10.0,
+    "eval_accuracy": 0.9496936058263018,
+    "eval_f1": 0.6985413290113451,
+    "eval_loss": 0.3088673949241638,
+    "eval_precision": 0.6896,
+    "eval_recall": 0.7077175697865353,
+    "eval_runtime": 5.5622,
+    "eval_samples": 2519,
+    "eval_samples_per_second": 452.882,
+    "eval_steps_per_second": 56.633
+}

predict_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "predict_accuracy": 0.9466344311112421,
+    "predict_f1": 0.6937328822297406,
+    "predict_loss": 0.33691754937171936,
+    "predict_precision": 0.694516129032258,
+    "predict_recall": 0.6929514000643707,
+    "predict_runtime": 8.8756,
+    "predict_samples_per_second": 455.971,
+    "predict_steps_per_second": 57.01
+}

predictions.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tb/events.out.tfevents.1725475205.a5c501872057.1590.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ac1c4dd87538fa491bda1fedf86880bd54d355c94e361fe11b34dd49f825123b
+size 560

train.log CHANGED Viewed

@@ -860,3 +860,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
 {'eval_loss': 0.33284899592399597, 'eval_precision': 0.6814159292035398, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6985058697972252, 'eval_accuracy': 0.9498861047835991, 'eval_runtime': 5.5975, 'eval_samples_per_second': 450.025, 'eval_steps_per_second': 56.276, 'epoch': 10.0}
 {'train_runtime': 706.0488, 'train_samples_per_second': 224.46, 'train_steps_per_second': 3.513, 'train_loss': 0.03527186407196906, 'epoch': 10.0}
  0%|          | 0/315 [00:00<?, ?it/s]
  3%|▎         | 9/315 [00:00<00:03, 80.04it/s]
  6%|▌         | 18/315 [00:00<00:03, 81.66it/s]
  9%|▊         | 27/315 [00:00<00:03, 81.02it/s]
 11%|█▏        | 36/315 [00:00<00:03, 82.17it/s]
 14%|█▍        | 45/315 [00:00<00:03, 82.54it/s]
 17%|█▋        | 54/315 [00:00<00:03, 82.93it/s]
 20%|██        | 63/315 [00:00<00:03, 80.55it/s]
 23%|██▎       | 72/315 [00:00<00:02, 81.25it/s]
 26%|██▌       | 81/315 [00:01<00:02, 79.65it/s]
 28%|██▊       | 89/315 [00:01<00:02, 79.00it/s]
 31%|███       | 97/315 [00:01<00:02, 78.04it/s]
 34%|███▎      | 106/315 [00:01<00:02, 79.53it/s]
 37%|███▋      | 115/315 [00:01<00:02, 80.97it/s]
 39%|███▉      | 124/315 [00:01<00:02, 79.09it/s]
 42%|████▏     | 133/315 [00:01<00:02, 79.74it/s]
 45%|████▌     | 142/315 [00:01<00:02, 79.98it/s]
 48%|████▊     | 151/315 [00:01<00:02, 81.79it/s]
 51%|█████     | 160/315 [00:01<00:01, 81.15it/s]
 54%|█████▎    | 169/315 [00:02<00:01, 81.16it/s]
 57%|█████▋    | 178/315 [00:02<00:01, 81.46it/s]
 59%|█████▉    | 187/315 [00:02<00:01, 81.11it/s]
 62%|██████▏   | 196/315 [00:02<00:01, 80.61it/s]
 65%|██████▌   | 205/315 [00:02<00:01, 78.27it/s]
 68%|██████▊   | 214/315 [00:02<00:01, 80.07it/s]
 71%|███████   | 223/315 [00:02<00:01, 80.98it/s]
 74%|███████▎  | 232/315 [00:02<00:01, 82.35it/s]
 77%|███████▋  | 241/315 [00:02<00:00, 81.14it/s]
 79%|███████▉  | 250/315 [00:03<00:00, 81.51it/s]
 82%|████████▏ | 259/315 [00:03<00:00, 81.21it/s]
 85%|████████▌ | 268/315 [00:03<00:00, 81.50it/s]
 88%|████████▊ | 277/315 [00:03<00:00, 82.85it/s]
 91%|█████████ | 286/315 [00:03<00:00, 80.71it/s]
 94%|█████████▎| 295/315 [00:03<00:00, 80.77it/s]
 97%|█████████▋| 304/315 [00:03<00:00, 81.83it/s]
 99%|█████████▉| 313/315 [00:03<00:00, 82.13it/s]
  0%|          | 0/506 [00:00<?, ?it/s]
  2%|▏         | 10/506 [00:00<00:05, 95.19it/s]
  4%|▍         | 20/506 [00:00<00:05, 85.84it/s]
  6%|▌         | 29/506 [00:00<00:05, 85.46it/s]
  8%|▊         | 38/506 [00:00<00:05, 83.57it/s]
  9%|▉         | 47/506 [00:00<00:05, 83.68it/s]
 11%|█         | 56/506 [00:00<00:05, 84.03it/s]
 13%|█▎        | 65/506 [00:00<00:05, 82.78it/s]
 15%|█▍        | 74/506 [00:00<00:05, 82.71it/s]
 16%|█▋        | 83/506 [00:01<00:05, 77.99it/s]
 18%|█▊        | 91/506 [00:01<00:05, 78.20it/s]
 20%|█▉        | 100/506 [00:01<00:05, 79.97it/s]
 22%|██▏       | 109/506 [00:01<00:04, 79.45it/s]
 23%|██▎       | 118/506 [00:01<00:04, 80.16it/s]
 25%|██▌       | 127/506 [00:01<00:04, 78.08it/s]
 27%|██▋       | 135/506 [00:01<00:05, 72.06it/s]
 28%|██▊       | 144/506 [00:01<00:04, 75.38it/s]
 30%|███       | 152/506 [00:01<00:04, 75.99it/s]
 32%|███▏      | 160/506 [00:02<00:04, 74.53it/s]
 33%|███▎      | 168/506 [00:02<00:04, 76.02it/s]
 35%|███▍      | 177/506 [00:02<00:04, 77.49it/s]
 37%|███▋      | 186/506 [00:02<00:04, 79.41it/s]
 39%|███▊      | 195/506 [00:02<00:03, 79.79it/s]
 40%|████      | 204/506 [00:02<00:03, 80.39it/s]
 42%|████▏     | 213/506 [00:02<00:03, 80.49it/s]
 44%|████▍     | 222/506 [00:02<00:03, 78.46it/s]
 45%|████▌     | 230/506 [00:02<00:03, 77.13it/s]
 47%|████▋     | 238/506 [00:03<00:03, 77.47it/s]
 49%|████▉     | 247/506 [00:03<00:03, 79.02it/s]
 50%|█████     | 255/506 [00:03<00:03, 78.64it/s]
 52%|█████▏    | 264/506 [00:03<00:03, 80.02it/s]
 54%|█████▍    | 273/506 [00:03<00:02, 81.15it/s]
 56%|█████▌    | 282/506 [00:03<00:02, 80.47it/s]
 58%|█████▊    | 291/506 [00:03<00:02, 80.42it/s]
 59%|█████▉    | 300/506 [00:03<00:02, 81.29it/s]
 61%|██████    | 309/506 [00:03<00:02, 80.93it/s]
 63%|██████▎   | 318/506 [00:03<00:02, 81.34it/s]
 65%|██████▍   | 327/506 [00:04<00:02, 82.42it/s]
 66%|██████▋   | 336/506 [00:04<00:02, 82.95it/s]
 68%|██████▊   | 345/506 [00:04<00:01, 83.14it/s]
 70%|██████▉   | 354/506 [00:04<00:01, 79.24it/s]
 72%|███████▏  | 362/506 [00:04<00:01, 75.32it/s]
 73%|███████▎  | 370/506 [00:04<00:01, 72.65it/s]
 75%|███████▍  | 378/506 [00:04<00:01, 71.57it/s]
 76%|███████▋  | 386/506 [00:04<00:01, 69.45it/s]
 78%|███████▊  | 393/506 [00:05<00:01, 69.06it/s]
 79%|███████▉  | 401/506 [00:05<00:01, 69.56it/s]
 81%|████████  | 409/506 [00:05<00:01, 70.60it/s]
 82%|████████▏ | 417/506 [00:05<00:01, 73.12it/s]
 84%|████████▍ | 425/506 [00:05<00:01, 74.82it/s]
 86%|████████▌ | 434/506 [00:05<00:00, 76.80it/s]
 88%|████████▊ | 443/506 [00:05<00:00, 78.43it/s]
 89%|████████▉ | 451/506 [00:05<00:00, 78.15it/s]
 91%|█████████ | 460/506 [00:05<00:00, 80.40it/s]
 93%|█████████▎| 469/506 [00:05<00:00, 81.42it/s]
 94%|█████████▍| 478/506 [00:06<00:00, 82.24it/s]
 96%|█████████▌| 487/506 [00:06<00:00, 80.66it/s]
 98%|█████████▊| 496/506 [00:06<00:00, 81.16it/s]

 {'eval_loss': 0.33284899592399597, 'eval_precision': 0.6814159292035398, 'eval_recall': 0.7164750957854407, 'eval_f1': 0.6985058697972252, 'eval_accuracy': 0.9498861047835991, 'eval_runtime': 5.5975, 'eval_samples_per_second': 450.025, 'eval_steps_per_second': 56.276, 'epoch': 10.0}
 {'train_runtime': 706.0488, 'train_samples_per_second': 224.46, 'train_steps_per_second': 3.513, 'train_loss': 0.03527186407196906, 'epoch': 10.0}
+***** train metrics *****
+  epoch                    =       10.0
+  total_flos               =  7188099GF
+  train_loss               =     0.0353
+  train_runtime            = 0:11:46.04
+  train_samples            =      15848
+  train_samples_per_second =     224.46
+  train_steps_per_second   =      3.513
+09/04/2024 18:39:59 - INFO - __main__ -   *** Evaluate ***
+[INFO|trainer.py:811] 2024-09-04 18:39:59,625 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
+[INFO|trainer.py:3819] 2024-09-04 18:39:59,627 >>
+***** Running Evaluation *****
+[INFO|trainer.py:3821] 2024-09-04 18:39:59,627 >>   Num examples = 2519
+[INFO|trainer.py:3824] 2024-09-04 18:39:59,627 >>   Batch size = 8
  0%|          | 0/315 [00:00<?, ?it/s]
  3%|▎         | 9/315 [00:00<00:03, 80.04it/s]
  6%|▌         | 18/315 [00:00<00:03, 81.66it/s]
  9%|▊         | 27/315 [00:00<00:03, 81.02it/s]
 11%|█▏        | 36/315 [00:00<00:03, 82.17it/s]
 14%|█▍        | 45/315 [00:00<00:03, 82.54it/s]
 17%|█▋        | 54/315 [00:00<00:03, 82.93it/s]
 20%|██        | 63/315 [00:00<00:03, 80.55it/s]
 23%|██▎       | 72/315 [00:00<00:02, 81.25it/s]
 26%|██▌       | 81/315 [00:01<00:02, 79.65it/s]
 28%|██▊       | 89/315 [00:01<00:02, 79.00it/s]
 31%|███       | 97/315 [00:01<00:02, 78.04it/s]
 34%|███▎      | 106/315 [00:01<00:02, 79.53it/s]
 37%|███▋      | 115/315 [00:01<00:02, 80.97it/s]
 39%|███▉      | 124/315 [00:01<00:02, 79.09it/s]
 42%|████▏     | 133/315 [00:01<00:02, 79.74it/s]
 45%|████▌     | 142/315 [00:01<00:02, 79.98it/s]
 48%|████▊     | 151/315 [00:01<00:02, 81.79it/s]
 51%|█████     | 160/315 [00:01<00:01, 81.15it/s]
 54%|█████▎    | 169/315 [00:02<00:01, 81.16it/s]
 57%|█████▋    | 178/315 [00:02<00:01, 81.46it/s]
 59%|█████▉    | 187/315 [00:02<00:01, 81.11it/s]
 62%|██████▏   | 196/315 [00:02<00:01, 80.61it/s]
 65%|██████▌   | 205/315 [00:02<00:01, 78.27it/s]
 68%|██████▊   | 214/315 [00:02<00:01, 80.07it/s]
 71%|███████   | 223/315 [00:02<00:01, 80.98it/s]
 74%|███████▎  | 232/315 [00:02<00:01, 82.35it/s]
 77%|███████▋  | 241/315 [00:02<00:00, 81.14it/s]
 79%|███████▉  | 250/315 [00:03<00:00, 81.51it/s]
 82%|████████▏ | 259/315 [00:03<00:00, 81.21it/s]
 85%|████████▌ | 268/315 [00:03<00:00, 81.50it/s]
 88%|████████▊ | 277/315 [00:03<00:00, 82.85it/s]
 91%|█████████ | 286/315 [00:03<00:00, 80.71it/s]
 94%|█████████▎| 295/315 [00:03<00:00, 80.77it/s]
 97%|█████████▋| 304/315 [00:03<00:00, 81.83it/s]
 99%|█████████▉| 313/315 [00:03<00:00, 82.13it/s]
+***** eval metrics *****
+  epoch                   =       10.0
+  eval_accuracy           =     0.9497
+  eval_f1                 =     0.6985
+  eval_loss               =     0.3089
+  eval_precision          =     0.6896
+  eval_recall             =     0.7077
+  eval_runtime            = 0:00:05.56
+  eval_samples            =       2519
+  eval_samples_per_second =    452.882
+  eval_steps_per_second   =     56.633
+09/04/2024 18:40:05 - INFO - __main__ -   *** Predict ***
+[INFO|trainer.py:811] 2024-09-04 18:40:05,192 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
+[INFO|trainer.py:3819] 2024-09-04 18:40:05,194 >>
+***** Running Prediction *****
+[INFO|trainer.py:3821] 2024-09-04 18:40:05,194 >>   Num examples = 4047
+[INFO|trainer.py:3824] 2024-09-04 18:40:05,194 >>   Batch size = 8
  0%|          | 0/506 [00:00<?, ?it/s]
  2%|▏         | 10/506 [00:00<00:05, 95.19it/s]
  4%|▍         | 20/506 [00:00<00:05, 85.84it/s]
  6%|▌         | 29/506 [00:00<00:05, 85.46it/s]
  8%|▊         | 38/506 [00:00<00:05, 83.57it/s]
  9%|▉         | 47/506 [00:00<00:05, 83.68it/s]
 11%|█         | 56/506 [00:00<00:05, 84.03it/s]
 13%|█▎        | 65/506 [00:00<00:05, 82.78it/s]
 15%|█▍        | 74/506 [00:00<00:05, 82.71it/s]
 16%|█▋        | 83/506 [00:01<00:05, 77.99it/s]
 18%|█▊        | 91/506 [00:01<00:05, 78.20it/s]
 20%|█▉        | 100/506 [00:01<00:05, 79.97it/s]
 22%|██▏       | 109/506 [00:01<00:04, 79.45it/s]
 23%|██▎       | 118/506 [00:01<00:04, 80.16it/s]
 25%|██▌       | 127/506 [00:01<00:04, 78.08it/s]
 27%|██▋       | 135/506 [00:01<00:05, 72.06it/s]
 28%|██▊       | 144/506 [00:01<00:04, 75.38it/s]
 30%|███       | 152/506 [00:01<00:04, 75.99it/s]
 32%|███▏      | 160/506 [00:02<00:04, 74.53it/s]
 33%|███▎      | 168/506 [00:02<00:04, 76.02it/s]
 35%|███▍      | 177/506 [00:02<00:04, 77.49it/s]
 37%|███▋      | 186/506 [00:02<00:04, 79.41it/s]
 39%|███▊      | 195/506 [00:02<00:03, 79.79it/s]
 40%|████      | 204/506 [00:02<00:03, 80.39it/s]
 42%|████▏     | 213/506 [00:02<00:03, 80.49it/s]
 44%|████▍     | 222/506 [00:02<00:03, 78.46it/s]
 45%|████▌     | 230/506 [00:02<00:03, 77.13it/s]
 47%|████▋     | 238/506 [00:03<00:03, 77.47it/s]
 49%|████▉     | 247/506 [00:03<00:03, 79.02it/s]
 50%|█████     | 255/506 [00:03<00:03, 78.64it/s]
 52%|█████▏    | 264/506 [00:03<00:03, 80.02it/s]
 54%|█████▍    | 273/506 [00:03<00:02, 81.15it/s]
 56%|█████▌    | 282/506 [00:03<00:02, 80.47it/s]
 58%|█████▊    | 291/506 [00:03<00:02, 80.42it/s]
 59%|█████▉    | 300/506 [00:03<00:02, 81.29it/s]
 61%|██████    | 309/506 [00:03<00:02, 80.93it/s]
 63%|██████▎   | 318/506 [00:03<00:02, 81.34it/s]
 65%|██████▍   | 327/506 [00:04<00:02, 82.42it/s]
 66%|██████▋   | 336/506 [00:04<00:02, 82.95it/s]
 68%|██████▊   | 345/506 [00:04<00:01, 83.14it/s]
 70%|██████▉   | 354/506 [00:04<00:01, 79.24it/s]
 72%|███████▏  | 362/506 [00:04<00:01, 75.32it/s]
 73%|███████▎  | 370/506 [00:04<00:01, 72.65it/s]
 75%|███████▍  | 378/506 [00:04<00:01, 71.57it/s]
 76%|███████▋  | 386/506 [00:04<00:01, 69.45it/s]
 78%|███████▊  | 393/506 [00:05<00:01, 69.06it/s]
 79%|███████▉  | 401/506 [00:05<00:01, 69.56it/s]
 81%|████████  | 409/506 [00:05<00:01, 70.60it/s]
 82%|████████▏ | 417/506 [00:05<00:01, 73.12it/s]
 84%|████████▍ | 425/506 [00:05<00:01, 74.82it/s]
 86%|████████▌ | 434/506 [00:05<00:00, 76.80it/s]
 88%|████████▊ | 443/506 [00:05<00:00, 78.43it/s]
 89%|████████▉ | 451/506 [00:05<00:00, 78.15it/s]
 91%|█████████ | 460/506 [00:05<00:00, 80.40it/s]
 93%|█████████▎| 469/506 [00:05<00:00, 81.42it/s]
 94%|█████████▍| 478/506 [00:06<00:00, 82.24it/s]
 96%|█████████▌| 487/506 [00:06<00:00, 80.66it/s]
 98%|█████████▊| 496/506 [00:06<00:00, 81.16it/s]
+[INFO|trainer.py:3503] 2024-09-04 18:40:14,231 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
+[INFO|configuration_utils.py:472] 2024-09-04 18:40:14,233 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
+[INFO|modeling_utils.py:2799] 2024-09-04 18:40:15,628 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
+[INFO|tokenization_utils_base.py:2684] 2024-09-04 18:40:15,629 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2693] 2024-09-04 18:40:15,629 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
+***** predict metrics *****
+  predict_accuracy           =     0.9466
+  predict_f1                 =     0.6937
+  predict_loss               =     0.3369
+  predict_precision          =     0.6945
+  predict_recall             =      0.693
+  predict_runtime            = 0:00:08.87
+  predict_samples_per_second =    455.971
+  predict_steps_per_second   =      57.01

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 10.0,
+    "total_flos": 7718163558521760.0,
+    "train_loss": 0.03527186407196906,
+    "train_runtime": 706.0488,
+    "train_samples": 15848,
+    "train_samples_per_second": 224.46,
+    "train_steps_per_second": 3.513
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,190 @@

+{
+  "best_metric": 0.6985413290113451,
+  "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1736",
+  "epoch": 10.0,
+  "eval_steps": 500,
+  "global_step": 2480,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9477686162533286,
+      "eval_f1": 0.63506625891947,
+      "eval_loss": 0.16486208140850067,
+      "eval_precision": 0.5941821649976157,
+      "eval_recall": 0.6819923371647509,
+      "eval_runtime": 5.5136,
+      "eval_samples_per_second": 456.871,
+      "eval_steps_per_second": 57.132,
+      "step": 248
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9476402836151304,
+      "eval_f1": 0.6630581867388362,
+      "eval_loss": 0.18148483335971832,
+      "eval_precision": 0.6557815845824411,
+      "eval_recall": 0.6704980842911877,
+      "eval_runtime": 5.4181,
+      "eval_samples_per_second": 464.925,
+      "eval_steps_per_second": 58.139,
+      "step": 496
+    },
+    {
+      "epoch": 2.0161290322580645,
+      "grad_norm": 0.6250831484794617,
+      "learning_rate": 3.991935483870968e-05,
+      "loss": 0.134,
+      "step": 500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9491802752735089,
+      "eval_f1": 0.6810897435897436,
+      "eval_loss": 0.2111387550830841,
+      "eval_precision": 0.6651017214397497,
+      "eval_recall": 0.6978653530377669,
+      "eval_runtime": 5.4844,
+      "eval_samples_per_second": 459.302,
+      "eval_steps_per_second": 57.435,
+      "step": 744
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9488434020982386,
+      "eval_f1": 0.6900026518164943,
+      "eval_loss": 0.25230270624160767,
+      "eval_precision": 0.6692386831275721,
+      "eval_recall": 0.7120963327859879,
+      "eval_runtime": 5.4481,
+      "eval_samples_per_second": 462.36,
+      "eval_steps_per_second": 57.818,
+      "step": 992
+    },
+    {
+      "epoch": 4.032258064516129,
+      "grad_norm": 0.7998089790344238,
+      "learning_rate": 2.9838709677419357e-05,
+      "loss": 0.026,
+      "step": 1000
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.9490840257948603,
+      "eval_f1": 0.6847083552285864,
+      "eval_loss": 0.27709877490997314,
+      "eval_precision": 0.6584133400707428,
+      "eval_recall": 0.7131910235358512,
+      "eval_runtime": 5.6532,
+      "eval_samples_per_second": 445.585,
+      "eval_steps_per_second": 55.72,
+      "step": 1240
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.9486348615611665,
+      "eval_f1": 0.6907651715039579,
+      "eval_loss": 0.2968369126319885,
+      "eval_precision": 0.6668364747834946,
+      "eval_recall": 0.7164750957854407,
+      "eval_runtime": 5.4549,
+      "eval_samples_per_second": 461.787,
+      "eval_steps_per_second": 57.746,
+      "step": 1488
+    },
+    {
+      "epoch": 6.048387096774194,
+      "grad_norm": 0.15673314034938812,
+      "learning_rate": 1.975806451612903e-05,
+      "loss": 0.0084,
+      "step": 1500
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.9496936058263018,
+      "eval_f1": 0.6985413290113451,
+      "eval_loss": 0.3088673949241638,
+      "eval_precision": 0.6896,
+      "eval_recall": 0.7077175697865353,
+      "eval_runtime": 5.5771,
+      "eval_samples_per_second": 451.669,
+      "eval_steps_per_second": 56.481,
+      "step": 1736
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.9498861047835991,
+      "eval_f1": 0.6946236559139785,
+      "eval_loss": 0.31877079606056213,
+      "eval_precision": 0.6825145272054939,
+      "eval_recall": 0.7071702244116037,
+      "eval_runtime": 5.3015,
+      "eval_samples_per_second": 475.15,
+      "eval_steps_per_second": 59.417,
+      "step": 1984
+    },
+    {
+      "epoch": 8.064516129032258,
+      "grad_norm": 0.22283445298671722,
+      "learning_rate": 9.67741935483871e-06,
+      "loss": 0.0042,
+      "step": 2000
+    },
+    {
+      "epoch": 9.0,
+      "eval_accuracy": 0.9494529821296801,
+      "eval_f1": 0.6979722518676629,
+      "eval_loss": 0.3295721411705017,
+      "eval_precision": 0.6808953669963561,
+      "eval_recall": 0.715927750410509,
+      "eval_runtime": 5.4512,
+      "eval_samples_per_second": 462.102,
+      "eval_steps_per_second": 57.786,
+      "step": 2232
+    },
+    {
+      "epoch": 10.0,
+      "eval_accuracy": 0.9498861047835991,
+      "eval_f1": 0.6985058697972252,
+      "eval_loss": 0.33284899592399597,
+      "eval_precision": 0.6814159292035398,
+      "eval_recall": 0.7164750957854407,
+      "eval_runtime": 5.5975,
+      "eval_samples_per_second": 450.025,
+      "eval_steps_per_second": 56.276,
+      "step": 2480
+    },
+    {
+      "epoch": 10.0,
+      "step": 2480,
+      "total_flos": 7718163558521760.0,
+      "train_loss": 0.03527186407196906,
+      "train_runtime": 706.0488,
+      "train_samples_per_second": 224.46,
+      "train_steps_per_second": 3.513
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2480,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7718163558521760.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}