End of training

Browse files

Files changed (9) hide show

README.md +14 -13
all_results.json +23 -23
eval_results.json +9 -9
predict_results.json +8 -8
predictions.txt +0 -0
tb/events.out.tfevents.1725476139.a5c501872057.6105.1 +3 -0
train.log +48 -0
train_results.json +7 -7
trainer_state.json +126 -126

README.md CHANGED Viewed

@@ -3,9 +3,10 @@ library_name: transformers
 license: apache-2.0
 base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
 tags:
 - generated_from_trainer
 datasets:
-- symptemist-8-ner
 metrics:
 - precision
 - recall
@@ -18,24 +19,24 @@ model-index:
       name: Token Classification
       type: token-classification
     dataset:
-      name: symptemist-8-ner
-      type: symptemist-8-ner
       config: SympTEMIST NER
       split: validation
       args: SympTEMIST NER
     metrics:
     - name: Precision
       type: precision
-      value: 0.6711271230056614
     - name: Recall
       type: recall
-      value: 0.7137383689107827
     - name: F1
       type: f1
-      value: 0.6917771883289126
     - name: Accuracy
       type: accuracy
-      value: 0.9491963168532838
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -43,13 +44,13 @@ should probably proofread and complete it, then remove this comment. -->
 # output
-This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the symptemist-8-ner dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.3125
-- Precision: 0.6711
-- Recall: 0.7137
-- F1: 0.6918
-- Accuracy: 0.9492
 ## Model description

 license: apache-2.0
 base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
 tags:
+- token-classification
 - generated_from_trainer
 datasets:
+- Rodrigo1771/symptemist-8-ner
 metrics:
 - precision
 - recall
       name: Token Classification
       type: token-classification
     dataset:
+      name: Rodrigo1771/symptemist-8-ner
+      type: Rodrigo1771/symptemist-8-ner
       config: SympTEMIST NER
       split: validation
       args: SympTEMIST NER
     metrics:
     - name: Precision
       type: precision
+      value: 0.6832101372756072
     - name: Recall
       type: recall
+      value: 0.7082649151614668
     - name: F1
       type: f1
+      value: 0.6955119591507659
     - name: Accuracy
       type: accuracy
+      value: 0.9498058968847252
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 # output
+This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-8-ner dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.3003
+- Precision: 0.6832
+- Recall: 0.7083
+- F1: 0.6955
+- Accuracy: 0.9498
 ## Model description

all_results.json CHANGED Viewed

@@ -1,26 +1,26 @@
 {
-    "epoch": 10.0,
-    "eval_accuracy": 0.9496936058263018,
-    "eval_f1": 0.6985413290113451,
-    "eval_loss": 0.3088673949241638,
-    "eval_precision": 0.6896,
-    "eval_recall": 0.7077175697865353,
-    "eval_runtime": 5.5622,
     "eval_samples": 2519,
-    "eval_samples_per_second": 452.882,
-    "eval_steps_per_second": 56.633,
-    "predict_accuracy": 0.9466344311112421,
-    "predict_f1": 0.6937328822297406,
-    "predict_loss": 0.33691754937171936,
-    "predict_precision": 0.694516129032258,
-    "predict_recall": 0.6929514000643707,
-    "predict_runtime": 8.8756,
-    "predict_samples_per_second": 455.971,
-    "predict_steps_per_second": 57.01,
-    "total_flos": 7718163558521760.0,
-    "train_loss": 0.03527186407196906,
-    "train_runtime": 706.0488,
-    "train_samples": 15848,
-    "train_samples_per_second": 224.46,
-    "train_steps_per_second": 3.513
 }

 {
+    "epoch": 9.976133651551313,
+    "eval_accuracy": 0.9498058968847252,
+    "eval_f1": 0.6955119591507659,
+    "eval_loss": 0.300260066986084,
+    "eval_precision": 0.6832101372756072,
+    "eval_recall": 0.7082649151614668,
+    "eval_runtime": 5.3532,
     "eval_samples": 2519,
+    "eval_samples_per_second": 470.558,
+    "eval_steps_per_second": 58.843,
+    "predict_accuracy": 0.9465066682391328,
+    "predict_f1": 0.6871925091255355,
+    "predict_loss": 0.32003945112228394,
+    "predict_precision": 0.6778334376956794,
+    "predict_recall": 0.6968136466044416,
+    "predict_runtime": 9.1273,
+    "predict_samples_per_second": 443.395,
+    "predict_steps_per_second": 55.438,
+    "total_flos": 6479980841102670.0,
+    "train_loss": 0.03990328233493002,
+    "train_runtime": 605.7066,
+    "train_samples": 13389,
+    "train_samples_per_second": 221.048,
+    "train_steps_per_second": 3.451
 }

eval_results.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
-    "epoch": 10.0,
-    "eval_accuracy": 0.9496936058263018,
-    "eval_f1": 0.6985413290113451,
-    "eval_loss": 0.3088673949241638,
-    "eval_precision": 0.6896,
-    "eval_recall": 0.7077175697865353,
-    "eval_runtime": 5.5622,
     "eval_samples": 2519,
-    "eval_samples_per_second": 452.882,
-    "eval_steps_per_second": 56.633
 }

 {
+    "epoch": 9.976133651551313,
+    "eval_accuracy": 0.9498058968847252,
+    "eval_f1": 0.6955119591507659,
+    "eval_loss": 0.300260066986084,
+    "eval_precision": 0.6832101372756072,
+    "eval_recall": 0.7082649151614668,
+    "eval_runtime": 5.3532,
     "eval_samples": 2519,
+    "eval_samples_per_second": 470.558,
+    "eval_steps_per_second": 58.843
 }

predict_results.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
-    "predict_accuracy": 0.9466344311112421,
-    "predict_f1": 0.6937328822297406,
-    "predict_loss": 0.33691754937171936,
-    "predict_precision": 0.694516129032258,
-    "predict_recall": 0.6929514000643707,
-    "predict_runtime": 8.8756,
-    "predict_samples_per_second": 455.971,
-    "predict_steps_per_second": 57.01
 }

 {
+    "predict_accuracy": 0.9465066682391328,
+    "predict_f1": 0.6871925091255355,
+    "predict_loss": 0.32003945112228394,
+    "predict_precision": 0.6778334376956794,
+    "predict_recall": 0.6968136466044416,
+    "predict_runtime": 9.1273,
+    "predict_samples_per_second": 443.395,
+    "predict_steps_per_second": 55.438
 }

predictions.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff

tb/events.out.tfevents.1725476139.a5c501872057.6105.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e7c46d59ae0e93e011e758737f2c949894e6db42b7b574263d9a4123ea40093f
+size 560

train.log CHANGED Viewed

@@ -860,3 +860,51 @@ Training completed. Do not forget to share your model on huggingface.co/models =
 {'eval_loss': 0.31253090500831604, 'eval_precision': 0.6711271230056614, 'eval_recall': 0.7137383689107827, 'eval_f1': 0.6917771883289126, 'eval_accuracy': 0.9491963168532838, 'eval_runtime': 5.6829, 'eval_samples_per_second': 443.26, 'eval_steps_per_second': 55.429, 'epoch': 9.98}
 {'train_runtime': 605.7066, 'train_samples_per_second': 221.048, 'train_steps_per_second': 3.451, 'train_loss': 0.03990328233493002, 'epoch': 9.98}
  0%|          | 0/315 [00:00<?, ?it/s]
  3%|▎         | 8/315 [00:00<00:03, 79.67it/s]
  5%|▌         | 16/315 [00:00<00:03, 77.77it/s]
  8%|▊         | 25/315 [00:00<00:03, 79.52it/s]
 11%|█         | 34/315 [00:00<00:03, 81.49it/s]
 14%|█▎        | 43/315 [00:00<00:03, 83.27it/s]
 17%|█▋        | 52/315 [00:00<00:03, 81.72it/s]
 19%|█▉        | 61/315 [00:00<00:03, 79.31it/s]
 22%|██▏       | 69/315 [00:00<00:03, 78.47it/s]
 25%|██▍       | 78/315 [00:00<00:02, 79.72it/s]
 27%|██▋       | 86/315 [00:01<00:02, 78.27it/s]
 30%|███       | 95/315 [00:01<00:02, 79.80it/s]
 33%|███▎      | 103/315 [00:01<00:02, 78.32it/s]
 36%|███▌      | 112/315 [00:01<00:02, 80.29it/s]
 38%|███▊      | 121/315 [00:01<00:02, 79.55it/s]
 41%|████      | 129/315 [00:01<00:02, 78.75it/s]
 43%|████▎     | 137/315 [00:01<00:02, 78.82it/s]
 46%|████▋     | 146/315 [00:01<00:02, 79.39it/s]
 49%|████▉     | 155/315 [00:01<00:01, 81.02it/s]
 52%|█████▏    | 164/315 [00:02<00:01, 80.30it/s]
 55%|█████▍    | 173/315 [00:02<00:01, 80.93it/s]
 58%|█████▊    | 182/315 [00:02<00:01, 80.02it/s]
 61%|██████    | 191/315 [00:02<00:01, 80.51it/s]
 63%|██████▎   | 200/315 [00:02<00:01, 79.29it/s]
 66%|██████▌   | 208/315 [00:02<00:01, 78.49it/s]
 69%|██████▉   | 217/315 [00:02<00:01, 80.52it/s]
 72%|███████▏  | 226/315 [00:02<00:01, 82.31it/s]
 75%|███████▍  | 235/315 [00:02<00:00, 83.74it/s]
 77%|███████▋  | 244/315 [00:03<00:00, 81.82it/s]
 80%|████████  | 253/315 [00:03<00:00, 81.65it/s]
 83%|████████▎ | 262/315 [00:03<00:00, 82.45it/s]
 86%|████████▌ | 271/315 [00:03<00:00, 81.81it/s]
 89%|████████▉ | 280/315 [00:03<00:00, 82.55it/s]
 92%|█████████▏| 289/315 [00:03<00:00, 80.62it/s]
 95%|█████████▍| 298/315 [00:03<00:00, 80.79it/s]
 97%|█████████▋| 307/315 [00:03<00:00, 81.16it/s]
  0%|          | 0/506 [00:00<?, ?it/s]
  2%|▏         | 10/506 [00:00<00:05, 94.71it/s]
  4%|▍         | 20/506 [00:00<00:05, 81.40it/s]
  6%|▌         | 29/506 [00:00<00:05, 82.81it/s]
  8%|▊         | 38/506 [00:00<00:05, 81.92it/s]
  9%|▉         | 47/506 [00:00<00:05, 82.66it/s]
 11%|█         | 56/506 [00:00<00:05, 83.27it/s]
 13%|█▎        | 65/506 [00:00<00:05, 82.44it/s]
 15%|█▍        | 74/506 [00:00<00:05, 83.07it/s]
 16%|█▋        | 83/506 [00:01<00:05, 78.19it/s]
 18%|█▊        | 91/506 [00:01<00:05, 78.64it/s]
 20%|█▉        | 99/506 [00:01<00:05, 78.19it/s]
 21%|██▏       | 108/506 [00:01<00:05, 78.67it/s]
 23%|██▎       | 117/506 [00:01<00:04, 80.48it/s]
 25%|██▍       | 126/506 [00:01<00:04, 78.13it/s]
 26%|██▋       | 134/506 [00:01<00:05, 72.17it/s]
 28%|██▊       | 142/506 [00:01<00:04, 74.13it/s]
 30%|██▉       | 151/506 [00:01<00:04, 76.31it/s]
 31%|███▏      | 159/506 [00:02<00:04, 73.31it/s]
 33%|███▎      | 167/506 [00:02<00:04, 73.99it/s]
 35%|███▍      | 176/506 [00:02<00:04, 75.97it/s]
 37%|███▋      | 185/506 [00:02<00:04, 78.12it/s]
 38%|███▊      | 194/506 [00:02<00:03, 78.82it/s]
 40%|████      | 203/506 [00:02<00:03, 80.17it/s]
 42%|████▏     | 212/506 [00:02<00:03, 79.80it/s]
 44%|████▎     | 221/506 [00:02<00:03, 79.76it/s]
 45%|████▌     | 229/506 [00:02<00:03, 78.81it/s]
 47%|████▋     | 237/506 [00:03<00:03, 78.04it/s]
 49%|████▊     | 246/506 [00:03<00:03, 79.77it/s]
 50%|█████     | 254/506 [00:03<00:03, 78.96it/s]
 52%|█████▏    | 263/506 [00:03<00:03, 79.11it/s]
 54%|█████▎    | 271/506 [00:03<00:03, 75.85it/s]
 55%|█████▌    | 279/506 [00:03<00:03, 74.36it/s]
 57%|█████▋    | 287/506 [00:03<00:03, 72.60it/s]
 58%|█████▊    | 295/506 [00:03<00:02, 71.46it/s]
 60%|█████▉    | 303/506 [00:03<00:02, 70.59it/s]
 61%|██████▏   | 311/506 [00:04<00:02, 70.00it/s]
 63%|██████▎   | 320/506 [00:04<00:02, 73.65it/s]
 65%|██████▌   | 329/506 [00:04<00:02, 76.95it/s]
 67%|██████▋   | 338/506 [00:04<00:02, 78.75it/s]
 69%|██████▊   | 347/506 [00:04<00:01, 80.66it/s]
 70%|███████   | 356/506 [00:04<00:01, 82.02it/s]
 72%|███████▏  | 365/506 [00:04<00:01, 81.74it/s]
 74%|███████▍  | 374/506 [00:04<00:01, 79.69it/s]
 75%|███████▌  | 382/506 [00:04<00:01, 79.52it/s]
 77%|███████▋  | 390/506 [00:05<00:01, 77.40it/s]
 79%|███████▊  | 398/506 [00:05<00:01, 71.06it/s]
 80%|████████  | 407/506 [00:05<00:01, 73.71it/s]
 82%|████████▏ | 415/506 [00:05<00:01, 74.33it/s]
 84%|████████▎ | 423/506 [00:05<00:01, 75.66it/s]
 85%|████████▌ | 432/506 [00:05<00:00, 77.90it/s]
 87%|████████▋ | 440/506 [00:05<00:00, 78.07it/s]
 89%|████████▊ | 448/506 [00:05<00:00, 78.35it/s]
 90%|█████████ | 457/506 [00:05<00:00, 79.22it/s]
 92%|█████████▏| 466/506 [00:05<00:00, 80.30it/s]
 94%|█████████▍| 475/506 [00:06<00:00, 81.34it/s]
 96%|█████████▌| 484/506 [00:06<00:00, 77.78it/s]
 97%|█████████▋| 492/506 [00:06<00:00, 74.43it/s]
 99%|█████████▉| 500/506 [00:06<00:00, 72.82it/s]

 {'eval_loss': 0.31253090500831604, 'eval_precision': 0.6711271230056614, 'eval_recall': 0.7137383689107827, 'eval_f1': 0.6917771883289126, 'eval_accuracy': 0.9491963168532838, 'eval_runtime': 5.6829, 'eval_samples_per_second': 443.26, 'eval_steps_per_second': 55.429, 'epoch': 9.98}
 {'train_runtime': 605.7066, 'train_samples_per_second': 221.048, 'train_steps_per_second': 3.451, 'train_loss': 0.03990328233493002, 'epoch': 9.98}
+***** train metrics *****
+  epoch                    =     9.9761
+  total_flos               =  6034952GF
+  train_loss               =     0.0399
+  train_runtime            = 0:10:05.70
+  train_samples            =      13389
+  train_samples_per_second =    221.048
+  train_steps_per_second   =      3.451
+09/04/2024 18:55:34 - INFO - __main__ -   *** Evaluate ***
+[INFO|trainer.py:811] 2024-09-04 18:55:34,018 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
+[INFO|trainer.py:3819] 2024-09-04 18:55:34,020 >>
+***** Running Evaluation *****
+[INFO|trainer.py:3821] 2024-09-04 18:55:34,020 >>   Num examples = 2519
+[INFO|trainer.py:3824] 2024-09-04 18:55:34,020 >>   Batch size = 8
  0%|          | 0/315 [00:00<?, ?it/s]
  3%|▎         | 8/315 [00:00<00:03, 79.67it/s]
  5%|▌         | 16/315 [00:00<00:03, 77.77it/s]
  8%|▊         | 25/315 [00:00<00:03, 79.52it/s]
 11%|█         | 34/315 [00:00<00:03, 81.49it/s]
 14%|█▎        | 43/315 [00:00<00:03, 83.27it/s]
 17%|█▋        | 52/315 [00:00<00:03, 81.72it/s]
 19%|█▉        | 61/315 [00:00<00:03, 79.31it/s]
 22%|██▏       | 69/315 [00:00<00:03, 78.47it/s]
 25%|██▍       | 78/315 [00:00<00:02, 79.72it/s]
 27%|██▋       | 86/315 [00:01<00:02, 78.27it/s]
 30%|███       | 95/315 [00:01<00:02, 79.80it/s]
 33%|███▎      | 103/315 [00:01<00:02, 78.32it/s]
 36%|███▌      | 112/315 [00:01<00:02, 80.29it/s]
 38%|███▊      | 121/315 [00:01<00:02, 79.55it/s]
 41%|████      | 129/315 [00:01<00:02, 78.75it/s]
 43%|████▎     | 137/315 [00:01<00:02, 78.82it/s]
 46%|████▋     | 146/315 [00:01<00:02, 79.39it/s]
 49%|████▉     | 155/315 [00:01<00:01, 81.02it/s]
 52%|█████▏    | 164/315 [00:02<00:01, 80.30it/s]
 55%|█████▍    | 173/315 [00:02<00:01, 80.93it/s]
 58%|█████▊    | 182/315 [00:02<00:01, 80.02it/s]
 61%|██████    | 191/315 [00:02<00:01, 80.51it/s]
 63%|██████▎   | 200/315 [00:02<00:01, 79.29it/s]
 66%|██████▌   | 208/315 [00:02<00:01, 78.49it/s]
 69%|██████▉   | 217/315 [00:02<00:01, 80.52it/s]
 72%|███████▏  | 226/315 [00:02<00:01, 82.31it/s]
 75%|███████▍  | 235/315 [00:02<00:00, 83.74it/s]
 77%|███████▋  | 244/315 [00:03<00:00, 81.82it/s]
 80%|████████  | 253/315 [00:03<00:00, 81.65it/s]
 83%|████████▎ | 262/315 [00:03<00:00, 82.45it/s]
 86%|████████▌ | 271/315 [00:03<00:00, 81.81it/s]
 89%|████████▉ | 280/315 [00:03<00:00, 82.55it/s]
 92%|█████████▏| 289/315 [00:03<00:00, 80.62it/s]
 95%|█████████▍| 298/315 [00:03<00:00, 80.79it/s]
 97%|█████████▋| 307/315 [00:03<00:00, 81.16it/s]
+***** eval metrics *****
+  epoch                   =     9.9761
+  eval_accuracy           =     0.9498
+  eval_f1                 =     0.6955
+  eval_loss               =     0.3003
+  eval_precision          =     0.6832
+  eval_recall             =     0.7083
+  eval_runtime            = 0:00:05.35
+  eval_samples            =       2519
+  eval_samples_per_second =    470.558
+  eval_steps_per_second   =     58.843
+09/04/2024 18:55:39 - INFO - __main__ -   *** Predict ***
+[INFO|trainer.py:811] 2024-09-04 18:55:39,376 >> The following columns in the test set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
+[INFO|trainer.py:3819] 2024-09-04 18:55:39,378 >>
+***** Running Prediction *****
+[INFO|trainer.py:3821] 2024-09-04 18:55:39,378 >>   Num examples = 4047
+[INFO|trainer.py:3824] 2024-09-04 18:55:39,378 >>   Batch size = 8
  0%|          | 0/506 [00:00<?, ?it/s]
  2%|▏         | 10/506 [00:00<00:05, 94.71it/s]
  4%|▍         | 20/506 [00:00<00:05, 81.40it/s]
  6%|▌         | 29/506 [00:00<00:05, 82.81it/s]
  8%|▊         | 38/506 [00:00<00:05, 81.92it/s]
  9%|▉         | 47/506 [00:00<00:05, 82.66it/s]
 11%|█         | 56/506 [00:00<00:05, 83.27it/s]
 13%|█▎        | 65/506 [00:00<00:05, 82.44it/s]
 15%|█▍        | 74/506 [00:00<00:05, 83.07it/s]
 16%|█▋        | 83/506 [00:01<00:05, 78.19it/s]
 18%|█▊        | 91/506 [00:01<00:05, 78.64it/s]
 20%|█▉        | 99/506 [00:01<00:05, 78.19it/s]
 21%|██▏       | 108/506 [00:01<00:05, 78.67it/s]
 23%|██▎       | 117/506 [00:01<00:04, 80.48it/s]
 25%|██▍       | 126/506 [00:01<00:04, 78.13it/s]
 26%|██▋       | 134/506 [00:01<00:05, 72.17it/s]
 28%|██▊       | 142/506 [00:01<00:04, 74.13it/s]
 30%|██▉       | 151/506 [00:01<00:04, 76.31it/s]
 31%|███▏      | 159/506 [00:02<00:04, 73.31it/s]
 33%|███▎      | 167/506 [00:02<00:04, 73.99it/s]
 35%|███▍      | 176/506 [00:02<00:04, 75.97it/s]
 37%|███▋      | 185/506 [00:02<00:04, 78.12it/s]
 38%|███▊      | 194/506 [00:02<00:03, 78.82it/s]
 40%|████      | 203/506 [00:02<00:03, 80.17it/s]
 42%|████▏     | 212/506 [00:02<00:03, 79.80it/s]
 44%|████▎     | 221/506 [00:02<00:03, 79.76it/s]
 45%|████▌     | 229/506 [00:02<00:03, 78.81it/s]
 47%|████▋     | 237/506 [00:03<00:03, 78.04it/s]
 49%|████▊     | 246/506 [00:03<00:03, 79.77it/s]
 50%|█████     | 254/506 [00:03<00:03, 78.96it/s]
 52%|█████▏    | 263/506 [00:03<00:03, 79.11it/s]
 54%|█████▎    | 271/506 [00:03<00:03, 75.85it/s]
 55%|█████▌    | 279/506 [00:03<00:03, 74.36it/s]
 57%|█████▋    | 287/506 [00:03<00:03, 72.60it/s]
 58%|█████▊    | 295/506 [00:03<00:02, 71.46it/s]
 60%|█████▉    | 303/506 [00:03<00:02, 70.59it/s]
 61%|██████▏   | 311/506 [00:04<00:02, 70.00it/s]
 63%|██████▎   | 320/506 [00:04<00:02, 73.65it/s]
 65%|██████▌   | 329/506 [00:04<00:02, 76.95it/s]
 67%|██████▋   | 338/506 [00:04<00:02, 78.75it/s]
 69%|██████▊   | 347/506 [00:04<00:01, 80.66it/s]
 70%|███████   | 356/506 [00:04<00:01, 82.02it/s]
 72%|███████▏  | 365/506 [00:04<00:01, 81.74it/s]
 74%|███████▍  | 374/506 [00:04<00:01, 79.69it/s]
 75%|███████▌  | 382/506 [00:04<00:01, 79.52it/s]
 77%|███████▋  | 390/506 [00:05<00:01, 77.40it/s]
 79%|███████▊  | 398/506 [00:05<00:01, 71.06it/s]
 80%|████████  | 407/506 [00:05<00:01, 73.71it/s]
 82%|████████▏ | 415/506 [00:05<00:01, 74.33it/s]
 84%|████████▎ | 423/506 [00:05<00:01, 75.66it/s]
 85%|████████▌ | 432/506 [00:05<00:00, 77.90it/s]
 87%|████████▋ | 440/506 [00:05<00:00, 78.07it/s]
 89%|████████▊ | 448/506 [00:05<00:00, 78.35it/s]
 90%|█████████ | 457/506 [00:05<00:00, 79.22it/s]
 92%|█████████▏| 466/506 [00:05<00:00, 80.30it/s]
 94%|█████████▍| 475/506 [00:06<00:00, 81.34it/s]
 96%|█████████▌| 484/506 [00:06<00:00, 77.78it/s]
 97%|█████████▋| 492/506 [00:06<00:00, 74.43it/s]
 99%|█████████▉| 500/506 [00:06<00:00, 72.82it/s]
+[INFO|trainer.py:3503] 2024-09-04 18:55:48,667 >> Saving model checkpoint to /content/dissertation/scripts/ner/output
+[INFO|configuration_utils.py:472] 2024-09-04 18:55:48,669 >> Configuration saved in /content/dissertation/scripts/ner/output/config.json
+[INFO|modeling_utils.py:2799] 2024-09-04 18:55:50,042 >> Model weights saved in /content/dissertation/scripts/ner/output/model.safetensors
+[INFO|tokenization_utils_base.py:2684] 2024-09-04 18:55:50,043 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2693] 2024-09-04 18:55:50,043 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
+***** predict metrics *****
+  predict_accuracy           =     0.9465
+  predict_f1                 =     0.6872
+  predict_loss               =       0.32
+  predict_precision          =     0.6778
+  predict_recall             =     0.6968
+  predict_runtime            = 0:00:09.12
+  predict_samples_per_second =    443.395
+  predict_steps_per_second   =     55.438

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 10.0,
-    "total_flos": 7718163558521760.0,
-    "train_loss": 0.03527186407196906,
-    "train_runtime": 706.0488,
-    "train_samples": 15848,
-    "train_samples_per_second": 224.46,
-    "train_steps_per_second": 3.513
 }

 {
+    "epoch": 9.976133651551313,
+    "total_flos": 6479980841102670.0,
+    "train_loss": 0.03990328233493002,
+    "train_runtime": 605.7066,
+    "train_samples": 13389,
+    "train_samples_per_second": 221.048,
+    "train_steps_per_second": 3.451
 }

trainer_state.json CHANGED Viewed

@@ -1,173 +1,173 @@
 {
-  "best_metric": 0.6985413290113451,
-  "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1736",
-  "epoch": 10.0,
   "eval_steps": 500,
-  "global_step": 2480,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 1.0,
-      "eval_accuracy": 0.9477686162533286,
-      "eval_f1": 0.63506625891947,
-      "eval_loss": 0.16486208140850067,
-      "eval_precision": 0.5941821649976157,
-      "eval_recall": 0.6819923371647509,
-      "eval_runtime": 5.5136,
-      "eval_samples_per_second": 456.871,
-      "eval_steps_per_second": 57.132,
-      "step": 248
     },
     {
       "epoch": 2.0,
-      "eval_accuracy": 0.9476402836151304,
-      "eval_f1": 0.6630581867388362,
-      "eval_loss": 0.18148483335971832,
-      "eval_precision": 0.6557815845824411,
-      "eval_recall": 0.6704980842911877,
-      "eval_runtime": 5.4181,
-      "eval_samples_per_second": 464.925,
-      "eval_steps_per_second": 58.139,
-      "step": 496
     },
     {
-      "epoch": 2.0161290322580645,
-      "grad_norm": 0.6250831484794617,
-      "learning_rate": 3.991935483870968e-05,
-      "loss": 0.134,
       "step": 500
     },
     {
-      "epoch": 3.0,
-      "eval_accuracy": 0.9491802752735089,
-      "eval_f1": 0.6810897435897436,
-      "eval_loss": 0.2111387550830841,
-      "eval_precision": 0.6651017214397497,
       "eval_recall": 0.6978653530377669,
-      "eval_runtime": 5.4844,
-      "eval_samples_per_second": 459.302,
-      "eval_steps_per_second": 57.435,
-      "step": 744
     },
     {
       "epoch": 4.0,
-      "eval_accuracy": 0.9488434020982386,
-      "eval_f1": 0.6900026518164943,
-      "eval_loss": 0.25230270624160767,
-      "eval_precision": 0.6692386831275721,
-      "eval_recall": 0.7120963327859879,
-      "eval_runtime": 5.4481,
-      "eval_samples_per_second": 462.36,
-      "eval_steps_per_second": 57.818,
-      "step": 992
     },
     {
-      "epoch": 4.032258064516129,
-      "grad_norm": 0.7998089790344238,
-      "learning_rate": 2.9838709677419357e-05,
-      "loss": 0.026,
       "step": 1000
     },
     {
-      "epoch": 5.0,
-      "eval_accuracy": 0.9490840257948603,
-      "eval_f1": 0.6847083552285864,
-      "eval_loss": 0.27709877490997314,
-      "eval_precision": 0.6584133400707428,
-      "eval_recall": 0.7131910235358512,
-      "eval_runtime": 5.6532,
-      "eval_samples_per_second": 445.585,
-      "eval_steps_per_second": 55.72,
-      "step": 1240
     },
     {
       "epoch": 6.0,
-      "eval_accuracy": 0.9486348615611665,
-      "eval_f1": 0.6907651715039579,
-      "eval_loss": 0.2968369126319885,
-      "eval_precision": 0.6668364747834946,
-      "eval_recall": 0.7164750957854407,
-      "eval_runtime": 5.4549,
-      "eval_samples_per_second": 461.787,
-      "eval_steps_per_second": 57.746,
-      "step": 1488
     },
     {
-      "epoch": 6.048387096774194,
-      "grad_norm": 0.15673314034938812,
-      "learning_rate": 1.975806451612903e-05,
-      "loss": 0.0084,
-      "step": 1500
     },
     {
-      "epoch": 7.0,
-      "eval_accuracy": 0.9496936058263018,
-      "eval_f1": 0.6985413290113451,
-      "eval_loss": 0.3088673949241638,
-      "eval_precision": 0.6896,
-      "eval_recall": 0.7077175697865353,
-      "eval_runtime": 5.5771,
-      "eval_samples_per_second": 451.669,
-      "eval_steps_per_second": 56.481,
-      "step": 1736
     },
     {
       "epoch": 8.0,
-      "eval_accuracy": 0.9498861047835991,
-      "eval_f1": 0.6946236559139785,
-      "eval_loss": 0.31877079606056213,
-      "eval_precision": 0.6825145272054939,
-      "eval_recall": 0.7071702244116037,
-      "eval_runtime": 5.3015,
-      "eval_samples_per_second": 475.15,
-      "eval_steps_per_second": 59.417,
-      "step": 1984
     },
     {
-      "epoch": 8.064516129032258,
-      "grad_norm": 0.22283445298671722,
-      "learning_rate": 9.67741935483871e-06,
-      "loss": 0.0042,
-      "step": 2000
     },
     {
-      "epoch": 9.0,
-      "eval_accuracy": 0.9494529821296801,
-      "eval_f1": 0.6979722518676629,
-      "eval_loss": 0.3295721411705017,
-      "eval_precision": 0.6808953669963561,
-      "eval_recall": 0.715927750410509,
-      "eval_runtime": 5.4512,
-      "eval_samples_per_second": 462.102,
-      "eval_steps_per_second": 57.786,
-      "step": 2232
     },
     {
-      "epoch": 10.0,
-      "eval_accuracy": 0.9498861047835991,
-      "eval_f1": 0.6985058697972252,
-      "eval_loss": 0.33284899592399597,
-      "eval_precision": 0.6814159292035398,
-      "eval_recall": 0.7164750957854407,
-      "eval_runtime": 5.5975,
-      "eval_samples_per_second": 450.025,
-      "eval_steps_per_second": 56.276,
-      "step": 2480
     },
     {
-      "epoch": 10.0,
-      "step": 2480,
-      "total_flos": 7718163558521760.0,
-      "train_loss": 0.03527186407196906,
-      "train_runtime": 706.0488,
-      "train_samples_per_second": 224.46,
-      "train_steps_per_second": 3.513
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2480,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 10,
   "save_steps": 500,
@@ -183,7 +183,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 7718163558521760.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.6955119591507659,
+  "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1676",
+  "epoch": 9.976133651551313,
   "eval_steps": 500,
+  "global_step": 2090,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.9976133651551312,
+      "eval_accuracy": 0.9457473772017069,
+      "eval_f1": 0.6225502672435734,
+      "eval_loss": 0.15105590224266052,
+      "eval_precision": 0.5818268315889629,
+      "eval_recall": 0.6694033935413246,
+      "eval_runtime": 5.6572,
+      "eval_samples_per_second": 445.271,
+      "eval_steps_per_second": 55.681,
+      "step": 209
     },
     {
       "epoch": 2.0,
+      "eval_accuracy": 0.9464852898713465,
+      "eval_f1": 0.653414882772681,
+      "eval_loss": 0.1794203370809555,
+      "eval_precision": 0.6113495469718646,
+      "eval_recall": 0.7016967706622879,
+      "eval_runtime": 5.5563,
+      "eval_samples_per_second": 453.36,
+      "eval_steps_per_second": 56.692,
+      "step": 419
     },
     {
+      "epoch": 2.386634844868735,
+      "grad_norm": 1.321442723274231,
+      "learning_rate": 3.8038277511961725e-05,
+      "loss": 0.1282,
       "step": 500
     },
     {
+      "epoch": 2.9976133651551313,
+      "eval_accuracy": 0.9470146620039142,
+      "eval_f1": 0.6769312450225644,
+      "eval_loss": 0.21023894846439362,
+      "eval_precision": 0.6572164948453608,
       "eval_recall": 0.6978653530377669,
+      "eval_runtime": 5.4361,
+      "eval_samples_per_second": 463.385,
+      "eval_steps_per_second": 57.946,
+      "step": 628
     },
     {
       "epoch": 4.0,
+      "eval_accuracy": 0.9489075684173377,
+      "eval_f1": 0.6789039638201649,
+      "eval_loss": 0.23289668560028076,
+      "eval_precision": 0.660455486542443,
+      "eval_recall": 0.6984126984126984,
+      "eval_runtime": 5.4098,
+      "eval_samples_per_second": 465.634,
+      "eval_steps_per_second": 58.227,
+      "step": 838
     },
     {
+      "epoch": 4.77326968973747,
+      "grad_norm": 0.4721558690071106,
+      "learning_rate": 2.6076555023923443e-05,
+      "loss": 0.0255,
       "step": 1000
     },
     {
+      "epoch": 4.997613365155131,
+      "eval_accuracy": 0.9490679842150855,
+      "eval_f1": 0.6805555555555556,
+      "eval_loss": 0.2591019570827484,
+      "eval_precision": 0.6645800730307773,
+      "eval_recall": 0.6973180076628352,
+      "eval_runtime": 5.2875,
+      "eval_samples_per_second": 476.407,
+      "eval_steps_per_second": 59.575,
+      "step": 1047
     },
     {
       "epoch": 6.0,
+      "eval_accuracy": 0.9491481921139594,
+      "eval_f1": 0.6778487752928647,
+      "eval_loss": 0.27368706464767456,
+      "eval_precision": 0.6599274235355106,
+      "eval_recall": 0.6967706622879036,
+      "eval_runtime": 5.2916,
+      "eval_samples_per_second": 476.037,
+      "eval_steps_per_second": 59.528,
+      "step": 1257
     },
     {
+      "epoch": 6.997613365155131,
+      "eval_accuracy": 0.949244441592608,
+      "eval_f1": 0.6884462151394423,
+      "eval_loss": 0.28756192326545715,
+      "eval_precision": 0.6687306501547987,
+      "eval_recall": 0.7093596059113301,
+      "eval_runtime": 5.2943,
+      "eval_samples_per_second": 475.796,
+      "eval_steps_per_second": 59.498,
+      "step": 1466
     },
     {
+      "epoch": 7.159904534606206,
+      "grad_norm": 0.6170095205307007,
+      "learning_rate": 1.4114832535885167e-05,
+      "loss": 0.0085,
+      "step": 1500
     },
     {
       "epoch": 8.0,
+      "eval_accuracy": 0.9498058968847252,
+      "eval_f1": 0.6955119591507659,
+      "eval_loss": 0.300260066986084,
+      "eval_precision": 0.6832101372756072,
+      "eval_recall": 0.7082649151614668,
+      "eval_runtime": 5.5232,
+      "eval_samples_per_second": 456.079,
+      "eval_steps_per_second": 57.032,
+      "step": 1676
     },
     {
+      "epoch": 8.99761336515513,
+      "eval_accuracy": 0.9491802752735089,
+      "eval_f1": 0.6925720327782183,
+      "eval_loss": 0.30722716450691223,
+      "eval_precision": 0.6697341513292433,
+      "eval_recall": 0.7170224411603722,
+      "eval_runtime": 5.6022,
+      "eval_samples_per_second": 449.646,
+      "eval_steps_per_second": 56.228,
+      "step": 1885
     },
     {
+      "epoch": 9.54653937947494,
+      "grad_norm": 0.3939690589904785,
+      "learning_rate": 2.15311004784689e-06,
+      "loss": 0.004,
+      "step": 2000
     },
     {
+      "epoch": 9.976133651551313,
+      "eval_accuracy": 0.9491963168532838,
+      "eval_f1": 0.6917771883289126,
+      "eval_loss": 0.31253090500831604,
+      "eval_precision": 0.6711271230056614,
+      "eval_recall": 0.7137383689107827,
+      "eval_runtime": 5.6829,
+      "eval_samples_per_second": 443.26,
+      "eval_steps_per_second": 55.429,
+      "step": 2090
     },
     {
+      "epoch": 9.976133651551313,
+      "step": 2090,
+      "total_flos": 6479980841102670.0,
+      "train_loss": 0.03990328233493002,
+      "train_runtime": 605.7066,
+      "train_samples_per_second": 221.048,
+      "train_steps_per_second": 3.451
     }
   ],
   "logging_steps": 500,
+  "max_steps": 2090,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 10,
   "save_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 6479980841102670.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null