Rodrigo1771 commited on Sep 9, 2024

Commit

08d4796

verified ·

1 Parent(s): 14df036

Training in progress, epoch 1

Browse files

Files changed (21) hide show

README.md +103 -0
all_results.json +26 -0
config.json +39 -0
eval_results.json +12 -0
merges.txt +0 -0
model.safetensors +3 -0
predict_results.json +10 -0
predictions.txt +0 -0
special_tokens_map.json +51 -0
tb/events.out.tfevents.1725881335.0a1c9bec2a53.3232.0 +3 -0
tb/events.out.tfevents.1725882696.0a1c9bec2a53.3232.1 +3 -0
tb/events.out.tfevents.1725882852.0a1c9bec2a53.9893.0 +3 -0
tb/events.out.tfevents.1725883955.0a1c9bec2a53.9893.1 +3 -0
tb/events.out.tfevents.1725884095.0a1c9bec2a53.15221.0 +3 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
train.log +357 -0
train_results.json +9 -0
trainer_state.json +190 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,103 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
+tags:
+- token-classification
+- generated_from_trainer
+datasets:
+- Rodrigo1771/symptemist-fasttext-8-ner
+metrics:
+- precision
+- recall
+- f1
+- accuracy
+model-index:
+- name: output
+  results:
+  - task:
+      name: Token Classification
+      type: token-classification
+    dataset:
+      name: Rodrigo1771/symptemist-fasttext-8-ner
+      type: Rodrigo1771/symptemist-fasttext-8-ner
+      config: SympTEMIST NER
+      split: validation
+      args: SympTEMIST NER
+    metrics:
+    - name: Precision
+      type: precision
+      value: 0.6764102564102564
+    - name: Recall
+      type: recall
+      value: 0.7219485495347564
+    - name: F1
+      type: f1
+      value: 0.6984379136881121
+    - name: Accuracy
+      type: accuracy
+      value: 0.9500465205813469
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# output
+This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-fasttext-8-ner dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.3073
+- Precision: 0.6764
+- Recall: 0.7219
+- F1: 0.6984
+- Accuracy: 0.9500
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 32
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 64
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 10.0
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Precision | Recall | F1     | Accuracy |
+|:-------------:|:------:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
+| No log        | 0.9975 | 203  | 0.1501          | 0.5960    | 0.6338 | 0.6143 | 0.9468   |
+| No log        | 2.0    | 407  | 0.1761          | 0.6529    | 0.6940 | 0.6729 | 0.9492   |
+| 0.1312        | 2.9975 | 610  | 0.1995          | 0.6322    | 0.7170 | 0.6720 | 0.9470   |
+| 0.1312        | 4.0    | 814  | 0.2182          | 0.6446    | 0.7137 | 0.6774 | 0.9483   |
+| 0.0248        | 4.9975 | 1017 | 0.2461          | 0.6251    | 0.7219 | 0.6701 | 0.9449   |
+| 0.0248        | 6.0    | 1221 | 0.2695          | 0.6410    | 0.7302 | 0.6827 | 0.9469   |
+| 0.0248        | 6.9975 | 1424 | 0.2829          | 0.6529    | 0.7340 | 0.6911 | 0.9470   |
+| 0.0081        | 8.0    | 1628 | 0.2982          | 0.6711    | 0.7181 | 0.6938 | 0.9494   |
+| 0.0081        | 8.9975 | 1831 | 0.3073          | 0.6764    | 0.7219 | 0.6984 | 0.9500   |
+| 0.0038        | 9.9754 | 2030 | 0.3079          | 0.6713    | 0.7165 | 0.6931 | 0.9500   |
+### Framework versions
+- Transformers 4.44.2
+- Pytorch 2.4.0+cu121
+- Datasets 2.21.0
+- Tokenizers 0.19.1

all_results.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+    "epoch": 9.975429975429975,
+    "eval_accuracy": 0.9500465205813469,
+    "eval_f1": 0.6984379136881121,
+    "eval_loss": 0.30729904770851135,
+    "eval_precision": 0.6764102564102564,
+    "eval_recall": 0.7219485495347564,
+    "eval_runtime": 6.0921,
+    "eval_samples": 2519,
+    "eval_samples_per_second": 413.484,
+    "eval_steps_per_second": 51.706,
+    "predict_accuracy": 0.9466933985906772,
+    "predict_f1": 0.6951548848292296,
+    "predict_loss": 0.3347860872745514,
+    "predict_precision": 0.6863237139272271,
+    "predict_recall": 0.704216285806244,
+    "predict_runtime": 9.749,
+    "predict_samples_per_second": 415.118,
+    "predict_steps_per_second": 51.903,
+    "total_flos": 6404835399317064.0,
+    "train_loss": 0.04138289297302368,
+    "train_runtime": 1065.756,
+    "train_samples": 13013,
+    "train_samples_per_second": 122.101,
+    "train_steps_per_second": 1.905
+}

config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
+  "architectures": [
+    "RobertaForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "finetuning_task": "ner",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "O",
+    "1": "B-SINTOMA",
+    "2": "I-SINTOMA"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "B-SINTOMA": 1,
+    "I-SINTOMA": 2,
+    "O": 0
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 9.975429975429975,
+    "eval_accuracy": 0.9500465205813469,
+    "eval_f1": 0.6984379136881121,
+    "eval_loss": 0.30729904770851135,
+    "eval_precision": 0.6764102564102564,
+    "eval_recall": 0.7219485495347564,
+    "eval_runtime": 6.0921,
+    "eval_samples": 2519,
+    "eval_samples_per_second": 413.484,
+    "eval_steps_per_second": 51.706
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd56006954dc777a98f3c5e0587cc614d34216bcb27350118db301e7a844faa9
+size 496244100

predict_results.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "predict_accuracy": 0.9466933985906772,
+    "predict_f1": 0.6951548848292296,
+    "predict_loss": 0.3347860872745514,
+    "predict_precision": 0.6863237139272271,
+    "predict_recall": 0.704216285806244,
+    "predict_runtime": 9.749,
+    "predict_samples_per_second": 415.118,
+    "predict_steps_per_second": 51.903
+}

predictions.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tb/events.out.tfevents.1725881335.0a1c9bec2a53.3232.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:593e737686a00ae0f64a94f2ef02389ad7dff30c0ba6a6f2b1c65ac31e873867
+size 11302

tb/events.out.tfevents.1725882696.0a1c9bec2a53.3232.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05ecdc6d00855fb66deb25a7b5be160aa0ebb2ebe07a43beb7d88fb0430fb141
+size 560

tb/events.out.tfevents.1725882852.0a1c9bec2a53.9893.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:543df15001da008ba822f9c1ebf4f77259f803cbf1c5758f2da70bdbf003d86f
+size 11091

tb/events.out.tfevents.1725883955.0a1c9bec2a53.9893.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b842d84c89f0d88706e31e98b113fae6b45879220115930147db648f848a8c24
+size 560

tb/events.out.tfevents.1725884095.0a1c9bec2a53.15221.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a9529d9c4be592245c933a5892ef8b71c8be99c2c71381d022d5f07c90bd6362
+size 5645

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50261": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "max_len": 512,
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

train.log ADDED Viewed

@@ -0,0 +1,357 @@
  0%|          | 0/1710 [00:00<?, ?it/s]
  0%|          | 1/1710 [00:01<32:09,  1.13s/it]
  0%|          | 2/1710 [00:01<19:10,  1.48it/s]
  0%|          | 3/1710 [00:01<16:10,  1.76it/s]
  0%|          | 4/1710 [00:02<14:04,  2.02it/s]
  0%|          | 5/1710 [00:02<13:32,  2.10it/s]
  0%|          | 6/1710 [00:03<14:07,  2.01it/s]
  0%|          | 7/1710 [00:03<12:53,  2.20it/s]
  0%|          | 8/1710 [00:04<12:12,  2.32it/s]
  1%|          | 9/1710 [00:04<12:30,  2.27it/s]
  1%|          | 10/1710 [00:04<11:41,  2.42it/s]
  1%|          | 11/1710 [00:05<12:06,  2.34it/s]
  1%|          | 12/1710 [00:05<13:56,  2.03it/s]
  1%|          | 13/1710 [00:06<14:13,  1.99it/s]
  1%|          | 14/1710 [00:06<14:10,  2.00it/s]
  1%|          | 15/1710 [00:07<14:07,  2.00it/s]
  1%|          | 16/1710 [00:08<15:00,  1.88it/s]
  1%|          | 17/1710 [00:08<13:46,  2.05it/s]
  1%|          | 18/1710 [00:08<13:39,  2.06it/s]
  1%|          | 19/1710 [00:09<12:48,  2.20it/s]
  1%|          | 20/1710 [00:09<12:31,  2.25it/s]
  1%|          | 21/1710 [00:10<12:28,  2.26it/s]
  1%|▏         | 22/1710 [00:10<14:21,  1.96it/s]
  1%|▏         | 23/1710 [00:11<14:32,  1.93it/s]
  1%|▏         | 24/1710 [00:11<13:26,  2.09it/s]
  1%|▏         | 25/1710 [00:12<14:27,  1.94it/s]
  2%|▏         | 26/1710 [00:12<13:07,  2.14it/s]
  2%|▏         | 27/1710 [00:13<14:22,  1.95it/s]
  2%|▏         | 28/1710 [00:13<13:51,  2.02it/s]
  2%|▏         | 29/1710 [00:14<14:49,  1.89it/s]
  2%|▏         | 30/1710 [00:14<13:40,  2.05it/s]
  2%|▏         | 31/1710 [00:15<13:01,  2.15it/s]
  2%|▏         | 32/1710 [00:15<14:15,  1.96it/s]
  2%|▏         | 33/1710 [00:16<18:22,  1.52it/s]
  2%|▏         | 34/1710 [00:17<18:55,  1.48it/s]
  2%|▏         | 35/1710 [00:17<16:24,  1.70it/s]
  2%|▏         | 36/1710 [00:18<14:21,  1.94it/s]
  2%|▏         | 37/1710 [00:19<17:15,  1.62it/s]
  2%|▏         | 38/1710 [00:19<15:50,  1.76it/s]
  2%|▏         | 39/1710 [00:19<14:13,  1.96it/s]
  2%|▏         | 40/1710 [00:20<14:20,  1.94it/s]
  2%|▏         | 41/1710 [00:20<13:25,  2.07it/s]
  2%|▏         | 42/1710 [00:21<14:16,  1.95it/s]
  3%|▎         | 43/1710 [00:21<14:07,  1.97it/s]
  3%|▎         | 44/1710 [00:22<13:33,  2.05it/s]
  3%|▎         | 45/1710 [00:22<12:56,  2.14it/s]
  3%|▎         | 46/1710 [00:23<12:49,  2.16it/s]
  3%|▎         | 47/1710 [00:23<12:51,  2.16it/s]
  3%|▎         | 48/1710 [00:24<12:06,  2.29it/s]
  3%|▎         | 49/1710 [00:24<15:02,  1.84it/s]
  3%|▎         | 50/1710 [00:25<14:06,  1.96it/s]
  3%|▎         | 51/1710 [00:25<13:11,  2.10it/s]
  3%|▎         | 52/1710 [00:26<12:45,  2.17it/s]
  3%|▎         | 53/1710 [00:26<12:22,  2.23it/s]
  3%|▎         | 54/1710 [00:27<12:33,  2.20it/s]
  3%|▎         | 55/1710 [00:27<15:05,  1.83it/s]
  3%|▎         | 56/1710 [00:28<13:50,  1.99it/s]
  3%|▎         | 57/1710 [00:28<13:48,  2.00it/s]
  3%|▎         | 58/1710 [00:29<12:17,  2.24it/s]
  3%|▎         | 59/1710 [00:29<11:30,  2.39it/s]
  4%|▎         | 60/1710 [00:29<11:26,  2.40it/s]
  4%|▎         | 61/1710 [00:30<12:02,  2.28it/s]
  4%|▎         | 62/1710 [00:31<14:35,  1.88it/s]
  4%|▎         | 63/1710 [00:31<13:25,  2.05it/s]
  4%|▎         | 64/1710 [00:31<12:02,  2.28it/s]
  4%|▍         | 65/1710 [00:32<12:17,  2.23it/s]
  4%|▍         | 66/1710 [00:32<11:20,  2.42it/s]
  4%|▍         | 67/1710 [00:33<12:26,  2.20it/s]
  4%|▍         | 68/1710 [00:33<12:17,  2.23it/s]
  4%|▍         | 69/1710 [00:33<12:09,  2.25it/s]
  4%|▍         | 70/1710 [00:34<12:14,  2.23it/s]
  4%|▍         | 71/1710 [00:34<11:59,  2.28it/s]
  4%|▍         | 72/1710 [00:35<12:12,  2.24it/s]
  4%|▍         | 73/1710 [00:35<11:23,  2.40it/s]
  4%|▍         | 74/1710 [00:36<11:55,  2.29it/s]
  4%|▍         | 75/1710 [00:36<11:33,  2.36it/s]
  4%|▍         | 76/1710 [00:37<13:03,  2.09it/s]
  5%|▍         | 77/1710 [00:37<13:12,  2.06it/s]
  5%|▍         | 78/1710 [00:38<14:10,  1.92it/s]
  5%|▍         | 79/1710 [00:38<14:31,  1.87it/s]
  5%|▍         | 80/1710 [00:39<13:57,  1.95it/s]
  5%|▍         | 81/1710 [00:39<13:52,  1.96it/s]
  5%|▍         | 82/1710 [00:40<12:48,  2.12it/s]
  5%|▍         | 83/1710 [00:40<14:18,  1.90it/s]
  5%|▍         | 84/1710 [00:41<13:36,  1.99it/s]
  5%|▍         | 85/1710 [00:41<12:53,  2.10it/s]
  5%|▌         | 86/1710 [00:42<12:27,  2.17it/s]
  5%|▌         | 87/1710 [00:42<12:30,  2.16it/s]
  5%|▌         | 88/1710 [00:43<12:09,  2.22it/s]
  5%|▌         | 89/1710 [00:43<12:54,  2.09it/s]
  5%|▌         | 90/1710 [00:43<12:16,  2.20it/s]
  5%|▌         | 91/1710 [00:44<12:47,  2.11it/s]
  5%|▌         | 92/1710 [00:44<12:29,  2.16it/s]
  5%|▌         | 93/1710 [00:45<12:22,  2.18it/s]
  5%|▌         | 94/1710 [00:45<12:03,  2.23it/s]
  6%|▌         | 95/1710 [00:46<12:00,  2.24it/s]
  6%|▌         | 96/1710 [00:46<13:09,  2.05it/s]
  6%|▌         | 97/1710 [00:47<12:05,  2.22it/s]
  6%|▌         | 98/1710 [00:47<11:19,  2.37it/s]
  6%|▌         | 99/1710 [00:47<10:59,  2.44it/s]
  6%|▌         | 100/1710 [00:48<11:51,  2.26it/s]
  6%|▌         | 101/1710 [00:48<11:43,  2.29it/s]
  6%|▌         | 102/1710 [00:49<11:46,  2.28it/s]
  6%|▌         | 103/1710 [00:49<11:30,  2.33it/s]
  6%|▌         | 104/1710 [00:50<12:02,  2.22it/s]
  6%|▌         | 105/1710 [00:50<11:09,  2.40it/s]
  6%|▌         | 106/1710 [00:50<11:00,  2.43it/s]
  6%|▋         | 107/1710 [00:51<11:12,  2.38it/s]
  6%|▋         | 108/1710 [00:51<10:58,  2.43it/s]
  6%|▋         | 109/1710 [00:52<10:58,  2.43it/s]
  6%|▋         | 110/1710 [00:52<11:53,  2.24it/s]
  6%|▋         | 111/1710 [00:53<11:47,  2.26it/s]
  7%|▋         | 112/1710 [00:53<11:39,  2.29it/s]
  7%|▋         | 113/1710 [00:53<11:16,  2.36it/s]
  7%|▋         | 114/1710 [00:54<11:12,  2.37it/s]
  7%|▋         | 115/1710 [00:54<10:35,  2.51it/s]
  7%|▋         | 116/1710 [00:55<11:13,  2.37it/s]
  7%|▋         | 117/1710 [00:55<11:17,  2.35it/s]
  7%|▋         | 118/1710 [00:56<14:58,  1.77it/s]
  7%|▋         | 119/1710 [00:56<14:07,  1.88it/s]
  7%|▋         | 120/1710 [00:57<13:45,  1.93it/s]
  7%|▋         | 121/1710 [00:57<12:26,  2.13it/s]
  7%|▋         | 122/1710 [00:58<11:56,  2.22it/s]
  7%|▋         | 123/1710 [00:58<11:05,  2.39it/s]
  7%|▋         | 124/1710 [00:58<10:58,  2.41it/s]
  7%|▋         | 125/1710 [00:59<10:03,  2.63it/s]
  7%|▋         | 126/1710 [00:59<10:30,  2.51it/s]
  7%|▋         | 127/1710 [01:00<10:41,  2.47it/s]
  7%|▋         | 128/1710 [01:00<10:48,  2.44it/s]
  8%|▊         | 129/1710 [01:00<10:51,  2.42it/s]
  8%|▊         | 130/1710 [01:01<11:48,  2.23it/s]
  8%|▊         | 131/1710 [01:01<11:11,  2.35it/s]
  8%|▊         | 132/1710 [01:02<11:31,  2.28it/s]
  8%|▊         | 133/1710 [01:02<11:40,  2.25it/s]
  8%|▊         | 134/1710 [01:03<10:50,  2.42it/s]
  8%|▊         | 135/1710 [01:03<10:17,  2.55it/s]
  8%|▊         | 136/1710 [01:03<10:39,  2.46it/s]
  8%|▊         | 137/1710 [01:04<11:22,  2.30it/s]
  8%|▊         | 138/1710 [01:04<11:21,  2.31it/s]
  8%|▊         | 139/1710 [01:05<11:55,  2.20it/s]
  8%|▊         | 140/1710 [01:06<14:01,  1.87it/s]
  8%|▊         | 141/1710 [01:06<12:55,  2.02it/s]
  8%|▊         | 142/1710 [01:07<13:39,  1.91it/s]
  8%|▊         | 143/1710 [01:07<12:42,  2.06it/s]
  8%|▊         | 144/1710 [01:07<11:24,  2.29it/s]
  8%|▊         | 145/1710 [01:08<11:51,  2.20it/s]
  9%|▊         | 146/1710 [01:08<12:38,  2.06it/s]
  9%|▊         | 147/1710 [01:09<12:53,  2.02it/s]
  9%|▊         | 148/1710 [01:09<11:37,  2.24it/s]
  9%|▊         | 149/1710 [01:10<13:00,  2.00it/s]
  9%|▉         | 150/1710 [01:10<13:03,  1.99it/s]
  9%|▉         | 151/1710 [01:11<12:32,  2.07it/s]
  9%|▉         | 152/1710 [01:11<12:23,  2.10it/s]
  9%|▉         | 153/1710 [01:12<11:41,  2.22it/s]
  9%|▉         | 154/1710 [01:12<11:02,  2.35it/s]
  9%|▉         | 155/1710 [01:12<10:28,  2.48it/s]
  9%|▉         | 156/1710 [01:13<11:00,  2.35it/s]
  9%|▉         | 157/1710 [01:13<10:35,  2.44it/s]
  9%|▉         | 158/1710 [01:14<10:03,  2.57it/s]
  9%|▉         | 159/1710 [01:14<10:55,  2.37it/s]
  9%|▉         | 160/1710 [01:14<11:03,  2.34it/s]
  9%|▉         | 161/1710 [01:15<10:27,  2.47it/s]
  9%|▉         | 162/1710 [01:15<10:53,  2.37it/s]
 10%|▉         | 163/1710 [01:16<11:08,  2.31it/s]
 10%|▉         | 164/1710 [01:16<10:45,  2.39it/s]
 10%|▉         | 165/1710 [01:17<11:08,  2.31it/s]
 10%|▉         | 166/1710 [01:17<12:47,  2.01it/s]
 10%|▉         | 167/1710 [01:18<11:48,  2.18it/s]
 10%|▉         | 168/1710 [01:18<11:28,  2.24it/s]
 10%|▉         | 169/1710 [01:19<12:21,  2.08it/s]
 10%|▉         | 170/1710 [01:19<11:20,  2.26it/s]
 10%|█         | 171/1710 [01:19<11:08,  2.30it/s][INFO|trainer.py:811] 2024-09-09 12:16:15,508 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
  0%|          | 0/315 [00:00<?, ?it/s][A
  3%|▎         | 8/315 [00:00<00:04, 74.15it/s][A
  5%|▌         | 16/315 [00:00<00:04, 72.87it/s][A
  8%|▊         | 24/315 [00:00<00:03, 74.59it/s][A
 10%|█         | 32/315 [00:00<00:04, 70.63it/s][A
 13%|█▎        | 40/315 [00:00<00:03, 71.80it/s][A
 15%|█▌        | 48/315 [00:00<00:03, 72.49it/s][A
 18%|█▊        | 56/315 [00:00<00:03, 72.18it/s][A
 20%|██        | 64/315 [00:00<00:03, 70.11it/s][A
 23%|██▎       | 72/315 [00:00<00:03, 72.26it/s][A
 25%|██▌       | 80/315 [00:01<00:03, 68.95it/s][A
 28%|██▊       | 87/315 [00:01<00:03, 67.94it/s][A
 30%|███       | 95/315 [00:01<00:03, 69.32it/s][A
 32%|███▏      | 102/315 [00:01<00:03, 65.97it/s][A
 35%|███▍      | 110/315 [00:01<00:02, 68.77it/s][A
 37%|███▋      | 118/315 [00:01<00:02, 70.24it/s][A
 40%|████      | 126/315 [00:01<00:02, 67.00it/s][A
 43%|████▎     | 134/315 [00:01<00:02, 67.51it/s][A
 45%|████▍     | 141/315 [00:02<00:02, 68.03it/s][A
 47%|████▋     | 149/315 [00:02<00:02, 70.61it/s][A
 50%|████▉     | 157/315 [00:02<00:02, 72.86it/s][A
 52%|█████▏    | 165/315 [00:02<00:02, 71.30it/s][A
 55%|█████▍    | 173/315 [00:02<00:02, 70.07it/s][A
 57%|█████▋    | 181/315 [00:02<00:01, 67.81it/s][A
 60%|██████    | 189/315 [00:02<00:01, 68.05it/s][A
 62%|██████▏   | 196/315 [00:02<00:01, 67.00it/s][A
 64%|██████▍   | 203/315 [00:02<00:01, 64.45it/s][A
 67%|██████▋   | 210/315 [00:03<00:01, 65.01it/s][A
 69%|██████▉   | 218/315 [00:03<00:01, 68.74it/s][A
 72%|███████▏  | 226/315 [00:03<00:01, 71.36it/s][A
 75%|███████▍  | 235/315 [00:03<00:01, 74.60it/s][A
 77%|███████▋  | 243/315 [00:03<00:01, 70.81it/s][A
 80%|███████▉  | 251/315 [00:03<00:00, 70.96it/s][A
 82%|████████▏ | 259/315 [00:03<00:00, 68.92it/s][A
 85%|████████▍ | 267/315 [00:03<00:00, 70.16it/s][A
 88%|████████▊ | 276/315 [00:03<00:00, 73.45it/s][A
 90%|█████████ | 284/315 [00:04<00:00, 73.52it/s][A
 93%|█████████▎| 292/315 [00:04<00:00, 71.60it/s][A
 95%|█████████▌| 300/315 [00:04<00:00, 71.31it/s][A
 98%|█████████▊| 308/315 [00:04<00:00, 71.30it/s][A
 10%|█         | 171/1710 [01:25<11:08,  2.30it/s]
                                                 [A[INFO|trainer.py:3503] 2024-09-09 12:16:21,499 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-171
 10%|█         | 172/1710 [01:30<1:27:43,  3.42s/it]
 10%|█         | 173/1710 [01:30<1:05:49,  2.57s/it]
 10%|█         | 174/1710 [01:31<49:01,  1.92s/it]
 10%|█         | 175/1710 [01:31<37:24,  1.46s/it]
 10%|█         | 176/1710 [01:32<29:43,  1.16s/it]
 10%|█         | 177/1710 [01:32<23:13,  1.10it/s]
 10%|█         | 178/1710 [01:32<19:22,  1.32it/s]
 10%|█         | 179/1710 [01:33<16:39,  1.53it/s]
 11%|█         | 180/1710 [01:33<14:52,  1.71it/s]
 11%|█         | 181/1710 [01:34<13:53,  1.84it/s]
 11%|█         | 182/1710 [01:34<13:02,  1.95it/s]
 11%|█         | 183/1710 [01:34<12:03,  2.11it/s]
 11%|█         | 184/1710 [01:35<11:05,  2.29it/s]
 11%|█         | 185/1710 [01:35<11:05,  2.29it/s]
 11%|█         | 186/1710 [01:36<10:52,  2.33it/s]
 11%|█         | 187/1710 [01:36<10:16,  2.47it/s]
 11%|█         | 188/1710 [01:37<11:20,  2.24it/s]
 11%|█         | 189/1710 [01:37<10:06,  2.51it/s]
 11%|█         | 190/1710 [01:37<10:00,  2.53it/s]
 11%|█         | 191/1710 [01:38<10:40,  2.37it/s]
 11%|█         | 192/1710 [01:38<13:43,  1.84it/s]
 11%|█▏        | 193/1710 [01:39<13:01,  1.94it/s]
 11%|█▏        | 194/1710 [01:40<14:45,  1.71it/s]
 11%|█▏        | 195/1710 [01:40<13:18,  1.90it/s]
 11%|█▏        | 196/1710 [01:41<12:48,  1.97it/s]
 12%|█▏        | 197/1710 [01:41<11:50,  2.13it/s]
 12%|█▏        | 198/1710 [01:41<12:19,  2.05it/s]
 12%|█▏        | 199/1710 [01:42<11:26,  2.20it/s]
 12%|█▏        | 200/1710 [01:42<11:07,  2.26it/s]
 12%|█▏        | 201/1710 [01:43<11:23,  2.21it/s]
 12%|█▏        | 202/1710 [01:43<11:44,  2.14it/s]
 12%|█▏        | 203/1710 [01:44<11:12,  2.24it/s]
 12%|█▏        | 204/1710 [01:44<11:04,  2.27it/s]
 12%|█▏        | 205/1710 [01:44<10:26,  2.40it/s]
 12%|█▏        | 206/1710 [01:45<10:31,  2.38it/s]
 12%|█▏        | 207/1710 [01:45<10:37,  2.36it/s]
 12%|█▏        | 208/1710 [01:46<10:40,  2.35it/s]
 12%|█▏        | 209/1710 [01:46<10:26,  2.40it/s]
 12%|█▏        | 210/1710 [01:47<10:21,  2.41it/s]
 12%|█▏        | 211/1710 [01:47<13:31,  1.85it/s]
 12%|█▏        | 212/1710 [01:48<12:25,  2.01it/s]
 12%|█▏        | 213/1710 [01:48<11:29,  2.17it/s]
 13%|█▎        | 214/1710 [01:48<10:48,  2.31it/s]
 13%|█▎        | 215/1710 [01:49<11:08,  2.24it/s]
 13%|█▎        | 216/1710 [01:49<11:34,  2.15it/s]
 13%|█▎        | 217/1710 [01:50<10:57,  2.27it/s]
 13%|█▎        | 218/1710 [01:50<11:25,  2.18it/s]
 13%|█▎        | 219/1710 [01:51<10:37,  2.34it/s]
 13%|█▎        | 220/1710 [01:51<10:59,  2.26it/s]
 13%|█▎        | 221/1710 [01:52<11:41,  2.12it/s]
 13%|█▎        | 222/1710 [01:52<11:35,  2.14it/s]
 13%|█▎        | 223/1710 [01:53<10:35,  2.34it/s]
 13%|█▎        | 224/1710 [01:53<10:22,  2.39it/s]
 13%|█▎        | 225/1710 [01:53<11:22,  2.18it/s]
 13%|█▎        | 226/1710 [01:54<10:48,  2.29it/s]
 13%|█▎        | 227/1710 [01:55<15:06,  1.64it/s]
 13%|█▎        | 228/1710 [01:55<15:03,  1.64it/s]
 13%|█▎        | 229/1710 [01:56<13:49,  1.79it/s]
 13%|█▎        | 230/1710 [01:56<13:40,  1.80it/s]
 14%|█▎        | 231/1710 [01:57<12:47,  1.93it/s]
 14%|█▎        | 232/1710 [01:57<11:18,  2.18it/s]
 14%|█▎        | 233/1710 [01:58<11:45,  2.09it/s]
 14%|█▎        | 234/1710 [01:58<11:04,  2.22it/s]
 14%|█▎        | 235/1710 [01:58<10:23,  2.37it/s]
 14%|█▍        | 236/1710 [01:59<10:08,  2.42it/s]
 14%|█▍        | 237/1710 [01:59<11:05,  2.21it/s]
 14%|█▍        | 238/1710 [02:00<13:19,  1.84it/s]
 14%|█▍        | 239/1710 [02:01<12:42,  1.93it/s]
 14%|█▍        | 240/1710 [02:01<11:38,  2.10it/s]
 14%|█▍        | 241/1710 [02:01<10:37,  2.30it/s]
 14%|█▍        | 242/1710 [02:02<10:17,  2.38it/s]
 14%|█▍        | 243/1710 [02:02<10:50,  2.25it/s]
 14%|█▍        | 244/1710 [02:03<10:29,  2.33it/s]
 14%|█▍        | 245/1710 [02:03<10:30,  2.32it/s]
 14%|█▍        | 246/1710 [02:04<11:29,  2.12it/s]
 14%|█▍        | 247/1710 [02:04<11:58,  2.04it/s]
 15%|█▍        | 248/1710 [02:05<11:34,  2.10it/s]
 15%|█▍        | 249/1710 [02:05<10:23,  2.34it/s]
 15%|█▍        | 250/1710 [02:05<10:26,  2.33it/s]
 15%|█▍        | 251/1710 [02:06<10:35,  2.30it/s]
 15%|█▍        | 252/1710 [02:06<10:23,  2.34it/s]
 15%|█▍        | 253/1710 [02:07<10:17,  2.36it/s]
 15%|█▍        | 254/1710 [02:07<12:00,  2.02it/s]
 15%|█▍        | 255/1710 [02:08<11:09,  2.17it/s]
 15%|█▍        | 256/1710 [02:08<11:16,  2.15it/s]
 15%|█▌        | 257/1710 [02:09<11:21,  2.13it/s]
 15%|█▌        | 258/1710 [02:09<10:36,  2.28it/s]
 15%|█▌        | 259/1710 [02:09<10:12,  2.37it/s]
 15%|█▌        | 260/1710 [02:10<10:50,  2.23it/s]
 15%|█▌        | 261/1710 [02:10<11:02,  2.19it/s]
 15%|█▌        | 262/1710 [02:11<11:02,  2.18it/s]
 15%|█▌        | 263/1710 [02:11<10:14,  2.35it/s]
 15%|█▌        | 264/1710 [02:12<11:19,  2.13it/s]
 15%|█▌        | 265/1710 [02:12<13:03,  1.84it/s]
 16%|█▌        | 266/1710 [02:13<11:58,  2.01it/s]
 16%|█▌        | 267/1710 [02:13<11:15,  2.14it/s]
 16%|█▌        | 268/1710 [02:14<11:35,  2.07it/s]
 16%|█▌        | 269/1710 [02:14<11:31,  2.08it/s]
 16%|█▌        | 270/1710 [02:15<11:24,  2.11it/s]
 16%|█▌        | 271/1710 [02:15<11:16,  2.13it/s]
 16%|█▌        | 272/1710 [02:16<11:15,  2.13it/s]
 16%|█▌        | 273/1710 [02:16<11:04,  2.16it/s]
 16%|█▌        | 274/1710 [02:17<12:48,  1.87it/s]
 16%|█▌        | 275/1710 [02:17<11:30,  2.08it/s]
 16%|█▌        | 276/1710 [02:17<10:30,  2.28it/s]
 16%|█▌        | 277/1710 [02:18<10:22,  2.30it/s]

+2024-09-09 12:14:35.494661: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
+2024-09-09 12:14:35.513016: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
+2024-09-09 12:14:35.535014: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
+2024-09-09 12:14:35.541769: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
+2024-09-09 12:14:35.557993: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
+To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
+2024-09-09 12:14:36.793402: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
+/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
+  warnings.warn(
+09/09/2024 12:14:38 - WARNING - __main__ -   Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
+09/09/2024 12:14:38 - INFO - __main__ -   Training/evaluation parameters TrainingArguments(
+_n_gpu=1,
+accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.999,
+adam_epsilon=1e-08,
+auto_find_batch_size=False,
+batch_eval_metrics=False,
+bf16=False,
+bf16_full_eval=False,
+data_seed=None,
+dataloader_drop_last=False,
+dataloader_num_workers=0,
+dataloader_persistent_workers=False,
+dataloader_pin_memory=True,
+dataloader_prefetch_factor=None,
+ddp_backend=None,
+ddp_broadcast_buffers=None,
+ddp_bucket_cap_mb=None,
+ddp_find_unused_parameters=None,
+ddp_timeout=1800,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+dispatch_batches=None,
+do_eval=True,
+do_predict=True,
+do_train=True,
+eval_accumulation_steps=None,
+eval_delay=0,
+eval_do_concat_batches=True,
+eval_on_start=False,
+eval_steps=None,
+eval_strategy=epoch,
+eval_use_gather_object=False,
+evaluation_strategy=epoch,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+fsdp=[],
+fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
+fsdp_min_num_params=0,
+fsdp_transformer_layer_cls_to_wrap=None,
+full_determinism=False,
+gradient_accumulation_steps=2,
+gradient_checkpointing=False,
+gradient_checkpointing_kwargs=None,
+greater_is_better=True,
+group_by_length=False,
+half_precision_backend=auto,
+hub_always_push=False,
+hub_model_id=None,
+hub_private_repo=False,
+hub_strategy=every_save,
+hub_token=<HUB_TOKEN>,
+ignore_data_skip=False,
+include_inputs_for_metrics=False,
+include_num_input_tokens_seen=False,
+include_tokens_per_second=False,
+jit_mode_eval=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=5e-05,
+length_column_name=length,
+load_best_model_at_end=True,
+local_rank=0,
+log_level=passive,
+log_level_replica=warning,
+log_on_each_node=True,
+logging_dir=/content/dissertation/scripts/ner/output/tb,
+logging_first_step=False,
+logging_nan_inf_filter=True,
+logging_steps=500,
+logging_strategy=steps,
+lr_scheduler_kwargs={},
+lr_scheduler_type=linear,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=f1,
+mp_parameters=,
+neftune_noise_alpha=None,
+no_cuda=False,
+num_train_epochs=10.0,
+optim=adamw_torch,
+optim_args=None,
+optim_target_modules=None,
+output_dir=/content/dissertation/scripts/ner/output,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=8,
+per_device_train_batch_size=32,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=None,
+push_to_hub_organization=None,
+push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
+ray_scope=last,
+remove_unused_columns=True,
+report_to=['tensorboard'],
+restore_callback_states_from_checkpoint=False,
+resume_from_checkpoint=None,
+run_name=/content/dissertation/scripts/ner/output,
+save_on_each_node=False,
+save_only_model=False,
+save_safetensors=True,
+save_steps=500,
+save_strategy=epoch,
+save_total_limit=None,
+seed=42,
+skip_memory_metrics=True,
+split_batches=None,
+tf32=None,
+torch_compile=False,
+torch_compile_backend=None,
+torch_compile_mode=None,
+torch_empty_cache_steps=None,
+torchdynamo=None,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_cpu=False,
+use_ipex=False,
+use_legacy_prediction_loop=False,
+use_mps_device=False,
+warmup_ratio=0.0,
+warmup_steps=0,
+weight_decay=0.0,
+)
+[INFO|configuration_utils.py:733] 2024-09-09 12:14:50,533 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
+[INFO|configuration_utils.py:800] 2024-09-09 12:14:50,537 >> Model config RobertaConfig {
+  "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "finetuning_task": "ner",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "O",
+    "1": "B-SINTOMA",
+    "2": "I-SINTOMA"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "B-SINTOMA": 1,
+    "I-SINTOMA": 2,
+    "O": 0
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}
+[INFO|configuration_utils.py:733] 2024-09-09 12:14:50,787 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
+[INFO|configuration_utils.py:800] 2024-09-09 12:14:50,788 >> Model config RobertaConfig {
+  "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,800 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/vocab.json
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,801 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/merges.txt
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,801 >> loading file tokenizer.json from cache at None
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,801 >> loading file added_tokens.json from cache at None
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,801 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/special_tokens_map.json
+[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,801 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/tokenizer_config.json
+[INFO|configuration_utils.py:733] 2024-09-09 12:14:50,801 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
+[INFO|configuration_utils.py:800] 2024-09-09 12:14:50,802 >> Model config RobertaConfig {
+  "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}
+/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884
+  warnings.warn(
+[INFO|configuration_utils.py:733] 2024-09-09 12:14:50,882 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
+[INFO|configuration_utils.py:800] 2024-09-09 12:14:50,883 >> Model config RobertaConfig {
+  "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "transformers_version": "4.44.2",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}
+[INFO|modeling_utils.py:3678] 2024-09-09 12:14:51,213 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/pytorch_model.bin
+[INFO|modeling_utils.py:4497] 2024-09-09 12:14:51,293 >> Some weights of the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
+- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
+- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
+[WARNING|modeling_utils.py:4509] 2024-09-09 12:14:51,293 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es and are newly initialized: ['classifier.bias', 'classifier.weight']
+You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
+/content/dissertation/scripts/ner/run_ner_train.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate
+  metric = load_metric("seqeval", trust_remote_code=True)
+[INFO|trainer.py:811] 2024-09-09 12:14:55,082 >> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
+[INFO|trainer.py:2134] 2024-09-09 12:14:55,636 >> ***** Running training *****
+[INFO|trainer.py:2135] 2024-09-09 12:14:55,636 >>   Num examples = 10,936
+[INFO|trainer.py:2136] 2024-09-09 12:14:55,636 >>   Num Epochs = 10
+[INFO|trainer.py:2137] 2024-09-09 12:14:55,636 >>   Instantaneous batch size per device = 32
+[INFO|trainer.py:2140] 2024-09-09 12:14:55,636 >>   Total train batch size (w. parallel, distributed & accumulation) = 64
+[INFO|trainer.py:2141] 2024-09-09 12:14:55,636 >>   Gradient Accumulation steps = 2
+[INFO|trainer.py:2142] 2024-09-09 12:14:55,636 >>   Total optimization steps = 1,710
+[INFO|trainer.py:2143] 2024-09-09 12:14:55,637 >>   Number of trainable parameters = 124,055,043
  0%|          | 0/1710 [00:00<?, ?it/s]
  0%|          | 1/1710 [00:01<32:09,  1.13s/it]
  0%|          | 2/1710 [00:01<19:10,  1.48it/s]
  0%|          | 3/1710 [00:01<16:10,  1.76it/s]
  0%|          | 4/1710 [00:02<14:04,  2.02it/s]
  0%|          | 5/1710 [00:02<13:32,  2.10it/s]
  0%|          | 6/1710 [00:03<14:07,  2.01it/s]
  0%|          | 7/1710 [00:03<12:53,  2.20it/s]
  0%|          | 8/1710 [00:04<12:12,  2.32it/s]
  1%|          | 9/1710 [00:04<12:30,  2.27it/s]
  1%|          | 10/1710 [00:04<11:41,  2.42it/s]
  1%|          | 11/1710 [00:05<12:06,  2.34it/s]
  1%|          | 12/1710 [00:05<13:56,  2.03it/s]
  1%|          | 13/1710 [00:06<14:13,  1.99it/s]
  1%|          | 14/1710 [00:06<14:10,  2.00it/s]
  1%|          | 15/1710 [00:07<14:07,  2.00it/s]
  1%|          | 16/1710 [00:08<15:00,  1.88it/s]
  1%|          | 17/1710 [00:08<13:46,  2.05it/s]
  1%|          | 18/1710 [00:08<13:39,  2.06it/s]
  1%|          | 19/1710 [00:09<12:48,  2.20it/s]
  1%|          | 20/1710 [00:09<12:31,  2.25it/s]
  1%|          | 21/1710 [00:10<12:28,  2.26it/s]
  1%|▏         | 22/1710 [00:10<14:21,  1.96it/s]
  1%|▏         | 23/1710 [00:11<14:32,  1.93it/s]
  1%|▏         | 24/1710 [00:11<13:26,  2.09it/s]
  1%|▏         | 25/1710 [00:12<14:27,  1.94it/s]
  2%|▏         | 26/1710 [00:12<13:07,  2.14it/s]
  2%|▏         | 27/1710 [00:13<14:22,  1.95it/s]
  2%|▏         | 28/1710 [00:13<13:51,  2.02it/s]
  2%|▏         | 29/1710 [00:14<14:49,  1.89it/s]
  2%|▏         | 30/1710 [00:14<13:40,  2.05it/s]
  2%|▏         | 31/1710 [00:15<13:01,  2.15it/s]
  2%|▏         | 32/1710 [00:15<14:15,  1.96it/s]
  2%|▏         | 33/1710 [00:16<18:22,  1.52it/s]
  2%|▏         | 34/1710 [00:17<18:55,  1.48it/s]
  2%|▏         | 35/1710 [00:17<16:24,  1.70it/s]
  2%|▏         | 36/1710 [00:18<14:21,  1.94it/s]
  2%|▏         | 37/1710 [00:19<17:15,  1.62it/s]
  2%|▏         | 38/1710 [00:19<15:50,  1.76it/s]
  2%|▏         | 39/1710 [00:19<14:13,  1.96it/s]
  2%|▏         | 40/1710 [00:20<14:20,  1.94it/s]
  2%|▏         | 41/1710 [00:20<13:25,  2.07it/s]
  2%|▏         | 42/1710 [00:21<14:16,  1.95it/s]
  3%|▎         | 43/1710 [00:21<14:07,  1.97it/s]
  3%|▎         | 44/1710 [00:22<13:33,  2.05it/s]
  3%|▎         | 45/1710 [00:22<12:56,  2.14it/s]
  3%|▎         | 46/1710 [00:23<12:49,  2.16it/s]
  3%|▎         | 47/1710 [00:23<12:51,  2.16it/s]
  3%|▎         | 48/1710 [00:24<12:06,  2.29it/s]
  3%|▎         | 49/1710 [00:24<15:02,  1.84it/s]
  3%|▎         | 50/1710 [00:25<14:06,  1.96it/s]
  3%|▎         | 51/1710 [00:25<13:11,  2.10it/s]
  3%|▎         | 52/1710 [00:26<12:45,  2.17it/s]
  3%|▎         | 53/1710 [00:26<12:22,  2.23it/s]
  3%|▎         | 54/1710 [00:27<12:33,  2.20it/s]
  3%|▎         | 55/1710 [00:27<15:05,  1.83it/s]
  3%|▎         | 56/1710 [00:28<13:50,  1.99it/s]
  3%|▎         | 57/1710 [00:28<13:48,  2.00it/s]
  3%|▎         | 58/1710 [00:29<12:17,  2.24it/s]
  3%|▎         | 59/1710 [00:29<11:30,  2.39it/s]
  4%|▎         | 60/1710 [00:29<11:26,  2.40it/s]
  4%|▎         | 61/1710 [00:30<12:02,  2.28it/s]
  4%|▎         | 62/1710 [00:31<14:35,  1.88it/s]
  4%|▎         | 63/1710 [00:31<13:25,  2.05it/s]
  4%|▎         | 64/1710 [00:31<12:02,  2.28it/s]
  4%|▍         | 65/1710 [00:32<12:17,  2.23it/s]
  4%|▍         | 66/1710 [00:32<11:20,  2.42it/s]
  4%|▍         | 67/1710 [00:33<12:26,  2.20it/s]
  4%|▍         | 68/1710 [00:33<12:17,  2.23it/s]
  4%|▍         | 69/1710 [00:33<12:09,  2.25it/s]
  4%|▍         | 70/1710 [00:34<12:14,  2.23it/s]
  4%|▍         | 71/1710 [00:34<11:59,  2.28it/s]
  4%|▍         | 72/1710 [00:35<12:12,  2.24it/s]
  4%|▍         | 73/1710 [00:35<11:23,  2.40it/s]
  4%|▍         | 74/1710 [00:36<11:55,  2.29it/s]
  4%|▍         | 75/1710 [00:36<11:33,  2.36it/s]
  4%|▍         | 76/1710 [00:37<13:03,  2.09it/s]
  5%|▍         | 77/1710 [00:37<13:12,  2.06it/s]
  5%|▍         | 78/1710 [00:38<14:10,  1.92it/s]
  5%|▍         | 79/1710 [00:38<14:31,  1.87it/s]
  5%|▍         | 80/1710 [00:39<13:57,  1.95it/s]
  5%|▍         | 81/1710 [00:39<13:52,  1.96it/s]
  5%|▍         | 82/1710 [00:40<12:48,  2.12it/s]
  5%|▍         | 83/1710 [00:40<14:18,  1.90it/s]
  5%|▍         | 84/1710 [00:41<13:36,  1.99it/s]
  5%|▍         | 85/1710 [00:41<12:53,  2.10it/s]
  5%|▌         | 86/1710 [00:42<12:27,  2.17it/s]
  5%|▌         | 87/1710 [00:42<12:30,  2.16it/s]
  5%|▌         | 88/1710 [00:43<12:09,  2.22it/s]
  5%|▌         | 89/1710 [00:43<12:54,  2.09it/s]
  5%|▌         | 90/1710 [00:43<12:16,  2.20it/s]
  5%|▌         | 91/1710 [00:44<12:47,  2.11it/s]
  5%|▌         | 92/1710 [00:44<12:29,  2.16it/s]
  5%|▌         | 93/1710 [00:45<12:22,  2.18it/s]
  5%|▌         | 94/1710 [00:45<12:03,  2.23it/s]
  6%|▌         | 95/1710 [00:46<12:00,  2.24it/s]
  6%|▌         | 96/1710 [00:46<13:09,  2.05it/s]
  6%|▌         | 97/1710 [00:47<12:05,  2.22it/s]
  6%|▌         | 98/1710 [00:47<11:19,  2.37it/s]
  6%|▌         | 99/1710 [00:47<10:59,  2.44it/s]
  6%|▌         | 100/1710 [00:48<11:51,  2.26it/s]
  6%|▌         | 101/1710 [00:48<11:43,  2.29it/s]
  6%|▌         | 102/1710 [00:49<11:46,  2.28it/s]
  6%|▌         | 103/1710 [00:49<11:30,  2.33it/s]
  6%|▌         | 104/1710 [00:50<12:02,  2.22it/s]
  6%|▌         | 105/1710 [00:50<11:09,  2.40it/s]
  6%|▌         | 106/1710 [00:50<11:00,  2.43it/s]
  6%|▋         | 107/1710 [00:51<11:12,  2.38it/s]
  6%|▋         | 108/1710 [00:51<10:58,  2.43it/s]
  6%|▋         | 109/1710 [00:52<10:58,  2.43it/s]
  6%|▋         | 110/1710 [00:52<11:53,  2.24it/s]
  6%|▋         | 111/1710 [00:53<11:47,  2.26it/s]
  7%|▋         | 112/1710 [00:53<11:39,  2.29it/s]
  7%|▋         | 113/1710 [00:53<11:16,  2.36it/s]
  7%|▋         | 114/1710 [00:54<11:12,  2.37it/s]
  7%|▋         | 115/1710 [00:54<10:35,  2.51it/s]
  7%|▋         | 116/1710 [00:55<11:13,  2.37it/s]
  7%|▋         | 117/1710 [00:55<11:17,  2.35it/s]
  7%|▋         | 118/1710 [00:56<14:58,  1.77it/s]
  7%|▋         | 119/1710 [00:56<14:07,  1.88it/s]
  7%|▋         | 120/1710 [00:57<13:45,  1.93it/s]
  7%|▋         | 121/1710 [00:57<12:26,  2.13it/s]
  7%|▋         | 122/1710 [00:58<11:56,  2.22it/s]
  7%|▋         | 123/1710 [00:58<11:05,  2.39it/s]
  7%|▋         | 124/1710 [00:58<10:58,  2.41it/s]
  7%|▋         | 125/1710 [00:59<10:03,  2.63it/s]
  7%|▋         | 126/1710 [00:59<10:30,  2.51it/s]
  7%|▋         | 127/1710 [01:00<10:41,  2.47it/s]
  7%|▋         | 128/1710 [01:00<10:48,  2.44it/s]
  8%|▊         | 129/1710 [01:00<10:51,  2.42it/s]
  8%|▊         | 130/1710 [01:01<11:48,  2.23it/s]
  8%|▊         | 131/1710 [01:01<11:11,  2.35it/s]
  8%|▊         | 132/1710 [01:02<11:31,  2.28it/s]
  8%|▊         | 133/1710 [01:02<11:40,  2.25it/s]
  8%|▊         | 134/1710 [01:03<10:50,  2.42it/s]
  8%|▊         | 135/1710 [01:03<10:17,  2.55it/s]
  8%|▊         | 136/1710 [01:03<10:39,  2.46it/s]
  8%|▊         | 137/1710 [01:04<11:22,  2.30it/s]
  8%|▊         | 138/1710 [01:04<11:21,  2.31it/s]
  8%|▊         | 139/1710 [01:05<11:55,  2.20it/s]
  8%|▊         | 140/1710 [01:06<14:01,  1.87it/s]
  8%|▊         | 141/1710 [01:06<12:55,  2.02it/s]
  8%|▊         | 142/1710 [01:07<13:39,  1.91it/s]
  8%|▊         | 143/1710 [01:07<12:42,  2.06it/s]
  8%|▊         | 144/1710 [01:07<11:24,  2.29it/s]
  8%|▊         | 145/1710 [01:08<11:51,  2.20it/s]
  9%|▊         | 146/1710 [01:08<12:38,  2.06it/s]
  9%|▊         | 147/1710 [01:09<12:53,  2.02it/s]
  9%|▊         | 148/1710 [01:09<11:37,  2.24it/s]
  9%|▊         | 149/1710 [01:10<13:00,  2.00it/s]
  9%|▉         | 150/1710 [01:10<13:03,  1.99it/s]
  9%|▉         | 151/1710 [01:11<12:32,  2.07it/s]
  9%|▉         | 152/1710 [01:11<12:23,  2.10it/s]
  9%|▉         | 153/1710 [01:12<11:41,  2.22it/s]
  9%|▉         | 154/1710 [01:12<11:02,  2.35it/s]
  9%|▉         | 155/1710 [01:12<10:28,  2.48it/s]
  9%|▉         | 156/1710 [01:13<11:00,  2.35it/s]
  9%|▉         | 157/1710 [01:13<10:35,  2.44it/s]
  9%|▉         | 158/1710 [01:14<10:03,  2.57it/s]
  9%|▉         | 159/1710 [01:14<10:55,  2.37it/s]
  9%|▉         | 160/1710 [01:14<11:03,  2.34it/s]
  9%|▉         | 161/1710 [01:15<10:27,  2.47it/s]
  9%|▉         | 162/1710 [01:15<10:53,  2.37it/s]
 10%|▉         | 163/1710 [01:16<11:08,  2.31it/s]
 10%|▉         | 164/1710 [01:16<10:45,  2.39it/s]
 10%|▉         | 165/1710 [01:17<11:08,  2.31it/s]
 10%|▉         | 166/1710 [01:17<12:47,  2.01it/s]
 10%|▉         | 167/1710 [01:18<11:48,  2.18it/s]
 10%|▉         | 168/1710 [01:18<11:28,  2.24it/s]
 10%|▉         | 169/1710 [01:19<12:21,  2.08it/s]
 10%|▉         | 170/1710 [01:19<11:20,  2.26it/s]
 10%|█         | 171/1710 [01:19<11:08,  2.30it/s][INFO|trainer.py:811] 2024-09-09 12:16:15,508 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`,  you can safely ignore this message.
+[INFO|trainer.py:3819] 2024-09-09 12:16:15,510 >>
+***** Running Evaluation *****
+[INFO|trainer.py:3821] 2024-09-09 12:16:15,510 >>   Num examples = 2519
+[INFO|trainer.py:3824] 2024-09-09 12:16:15,510 >>   Batch size = 8
  0%|          | 0/315 [00:00<?, ?it/s][A
  3%|▎         | 8/315 [00:00<00:04, 74.15it/s][A
  5%|▌         | 16/315 [00:00<00:04, 72.87it/s][A
  8%|▊         | 24/315 [00:00<00:03, 74.59it/s][A
 10%|█         | 32/315 [00:00<00:04, 70.63it/s][A
 13%|█▎        | 40/315 [00:00<00:03, 71.80it/s][A
 15%|█▌        | 48/315 [00:00<00:03, 72.49it/s][A
 18%|█▊        | 56/315 [00:00<00:03, 72.18it/s][A
 20%|██        | 64/315 [00:00<00:03, 70.11it/s][A
 23%|██▎       | 72/315 [00:00<00:03, 72.26it/s][A
 25%|██▌       | 80/315 [00:01<00:03, 68.95it/s][A
 28%|██▊       | 87/315 [00:01<00:03, 67.94it/s][A
 30%|███       | 95/315 [00:01<00:03, 69.32it/s][A
 32%|███▏      | 102/315 [00:01<00:03, 65.97it/s][A
 35%|███▍      | 110/315 [00:01<00:02, 68.77it/s][A
 37%|███▋      | 118/315 [00:01<00:02, 70.24it/s][A
 40%|████      | 126/315 [00:01<00:02, 67.00it/s][A
 43%|████▎     | 134/315 [00:01<00:02, 67.51it/s][A
 45%|████▍     | 141/315 [00:02<00:02, 68.03it/s][A
 47%|████▋     | 149/315 [00:02<00:02, 70.61it/s][A
 50%|████▉     | 157/315 [00:02<00:02, 72.86it/s][A
 52%|█████▏    | 165/315 [00:02<00:02, 71.30it/s][A
 55%|█████▍    | 173/315 [00:02<00:02, 70.07it/s][A
 57%|█████▋    | 181/315 [00:02<00:01, 67.81it/s][A
 60%|██████    | 189/315 [00:02<00:01, 68.05it/s][A
 62%|██████▏   | 196/315 [00:02<00:01, 67.00it/s][A
 64%|██████▍   | 203/315 [00:02<00:01, 64.45it/s][A
 67%|██████▋   | 210/315 [00:03<00:01, 65.01it/s][A
 69%|██████▉   | 218/315 [00:03<00:01, 68.74it/s][A
 72%|███████▏  | 226/315 [00:03<00:01, 71.36it/s][A
 75%|███████▍  | 235/315 [00:03<00:01, 74.60it/s][A
 77%|███████▋  | 243/315 [00:03<00:01, 70.81it/s][A
 80%|███████▉  | 251/315 [00:03<00:00, 70.96it/s][A
 82%|████████▏ | 259/315 [00:03<00:00, 68.92it/s][A
 85%|████████▍ | 267/315 [00:03<00:00, 70.16it/s][A
 88%|████████▊ | 276/315 [00:03<00:00, 73.45it/s][A
 90%|█████████ | 284/315 [00:04<00:00, 73.52it/s][A
 93%|█████████▎| 292/315 [00:04<00:00, 71.60it/s][A
 95%|█████████▌| 300/315 [00:04<00:00, 71.31it/s][A
 98%|█████████▊| 308/315 [00:04<00:00, 71.30it/s][A
 10%|█         | 171/1710 [01:25<11:08,  2.30it/s]
                                                 [A[INFO|trainer.py:3503] 2024-09-09 12:16:21,499 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-171
+[INFO|configuration_utils.py:472] 2024-09-09 12:16:21,501 >> Configuration saved in /content/dissertation/scripts/ner/output/checkpoint-171/config.json
+[INFO|modeling_utils.py:2799] 2024-09-09 12:16:22,527 >> Model weights saved in /content/dissertation/scripts/ner/output/checkpoint-171/model.safetensors
+[INFO|tokenization_utils_base.py:2684] 2024-09-09 12:16:22,528 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/checkpoint-171/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2693] 2024-09-09 12:16:22,529 >> Special tokens file saved in /content/dissertation/scripts/ner/output/checkpoint-171/special_tokens_map.json
+[INFO|tokenization_utils_base.py:2684] 2024-09-09 12:16:25,565 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
+[INFO|tokenization_utils_base.py:2693] 2024-09-09 12:16:25,565 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
 10%|█         | 172/1710 [01:30<1:27:43,  3.42s/it]
 10%|█         | 173/1710 [01:30<1:05:49,  2.57s/it]
 10%|█         | 174/1710 [01:31<49:01,  1.92s/it]
 10%|█         | 175/1710 [01:31<37:24,  1.46s/it]
 10%|█         | 176/1710 [01:32<29:43,  1.16s/it]
 10%|█         | 177/1710 [01:32<23:13,  1.10it/s]
 10%|█         | 178/1710 [01:32<19:22,  1.32it/s]
 10%|█         | 179/1710 [01:33<16:39,  1.53it/s]
 11%|█         | 180/1710 [01:33<14:52,  1.71it/s]
 11%|█         | 181/1710 [01:34<13:53,  1.84it/s]
 11%|█         | 182/1710 [01:34<13:02,  1.95it/s]
 11%|█         | 183/1710 [01:34<12:03,  2.11it/s]
 11%|█         | 184/1710 [01:35<11:05,  2.29it/s]
 11%|█         | 185/1710 [01:35<11:05,  2.29it/s]
 11%|█         | 186/1710 [01:36<10:52,  2.33it/s]
 11%|█         | 187/1710 [01:36<10:16,  2.47it/s]
 11%|█         | 188/1710 [01:37<11:20,  2.24it/s]
 11%|█         | 189/1710 [01:37<10:06,  2.51it/s]
 11%|█         | 190/1710 [01:37<10:00,  2.53it/s]
 11%|█         | 191/1710 [01:38<10:40,  2.37it/s]
 11%|█         | 192/1710 [01:38<13:43,  1.84it/s]
 11%|█▏        | 193/1710 [01:39<13:01,  1.94it/s]
 11%|█▏        | 194/1710 [01:40<14:45,  1.71it/s]
 11%|█▏        | 195/1710 [01:40<13:18,  1.90it/s]
 11%|█▏        | 196/1710 [01:41<12:48,  1.97it/s]
 12%|█▏        | 197/1710 [01:41<11:50,  2.13it/s]
 12%|█▏        | 198/1710 [01:41<12:19,  2.05it/s]
 12%|█▏        | 199/1710 [01:42<11:26,  2.20it/s]
 12%|█▏        | 200/1710 [01:42<11:07,  2.26it/s]
 12%|█▏        | 201/1710 [01:43<11:23,  2.21it/s]
 12%|█▏        | 202/1710 [01:43<11:44,  2.14it/s]
 12%|█▏        | 203/1710 [01:44<11:12,  2.24it/s]
 12%|█▏        | 204/1710 [01:44<11:04,  2.27it/s]
 12%|█▏        | 205/1710 [01:44<10:26,  2.40it/s]
 12%|█▏        | 206/1710 [01:45<10:31,  2.38it/s]
 12%|█▏        | 207/1710 [01:45<10:37,  2.36it/s]
 12%|█▏        | 208/1710 [01:46<10:40,  2.35it/s]
 12%|█▏        | 209/1710 [01:46<10:26,  2.40it/s]
 12%|█▏        | 210/1710 [01:47<10:21,  2.41it/s]
 12%|█▏        | 211/1710 [01:47<13:31,  1.85it/s]
 12%|█▏        | 212/1710 [01:48<12:25,  2.01it/s]
 12%|█▏        | 213/1710 [01:48<11:29,  2.17it/s]
 13%|█▎        | 214/1710 [01:48<10:48,  2.31it/s]
 13%|█▎        | 215/1710 [01:49<11:08,  2.24it/s]
 13%|█▎        | 216/1710 [01:49<11:34,  2.15it/s]
 13%|█▎        | 217/1710 [01:50<10:57,  2.27it/s]
 13%|█▎        | 218/1710 [01:50<11:25,  2.18it/s]
 13%|█▎        | 219/1710 [01:51<10:37,  2.34it/s]
 13%|█▎        | 220/1710 [01:51<10:59,  2.26it/s]
 13%|█▎        | 221/1710 [01:52<11:41,  2.12it/s]
 13%|█▎        | 222/1710 [01:52<11:35,  2.14it/s]
 13%|█▎        | 223/1710 [01:53<10:35,  2.34it/s]
 13%|█▎        | 224/1710 [01:53<10:22,  2.39it/s]
 13%|█▎        | 225/1710 [01:53<11:22,  2.18it/s]
 13%|█▎        | 226/1710 [01:54<10:48,  2.29it/s]
 13%|█▎        | 227/1710 [01:55<15:06,  1.64it/s]
 13%|█▎        | 228/1710 [01:55<15:03,  1.64it/s]
 13%|█▎        | 229/1710 [01:56<13:49,  1.79it/s]
 13%|█▎        | 230/1710 [01:56<13:40,  1.80it/s]
 14%|█▎        | 231/1710 [01:57<12:47,  1.93it/s]
 14%|█▎        | 232/1710 [01:57<11:18,  2.18it/s]
 14%|█▎        | 233/1710 [01:58<11:45,  2.09it/s]
 14%|█▎        | 234/1710 [01:58<11:04,  2.22it/s]
 14%|█▎        | 235/1710 [01:58<10:23,  2.37it/s]
 14%|█▍        | 236/1710 [01:59<10:08,  2.42it/s]
 14%|█▍        | 237/1710 [01:59<11:05,  2.21it/s]
 14%|█▍        | 238/1710 [02:00<13:19,  1.84it/s]
 14%|█▍        | 239/1710 [02:01<12:42,  1.93it/s]
 14%|█▍        | 240/1710 [02:01<11:38,  2.10it/s]
 14%|█▍        | 241/1710 [02:01<10:37,  2.30it/s]
 14%|█▍        | 242/1710 [02:02<10:17,  2.38it/s]
 14%|█▍        | 243/1710 [02:02<10:50,  2.25it/s]
 14%|█▍        | 244/1710 [02:03<10:29,  2.33it/s]
 14%|█▍        | 245/1710 [02:03<10:30,  2.32it/s]
 14%|█▍        | 246/1710 [02:04<11:29,  2.12it/s]
 14%|█▍        | 247/1710 [02:04<11:58,  2.04it/s]
 15%|█▍        | 248/1710 [02:05<11:34,  2.10it/s]
 15%|█▍        | 249/1710 [02:05<10:23,  2.34it/s]
 15%|█▍        | 250/1710 [02:05<10:26,  2.33it/s]
 15%|█▍        | 251/1710 [02:06<10:35,  2.30it/s]
 15%|█▍        | 252/1710 [02:06<10:23,  2.34it/s]
 15%|█▍        | 253/1710 [02:07<10:17,  2.36it/s]
 15%|█▍        | 254/1710 [02:07<12:00,  2.02it/s]
 15%|█▍        | 255/1710 [02:08<11:09,  2.17it/s]
 15%|█▍        | 256/1710 [02:08<11:16,  2.15it/s]
 15%|█▌        | 257/1710 [02:09<11:21,  2.13it/s]
 15%|█▌        | 258/1710 [02:09<10:36,  2.28it/s]
 15%|█▌        | 259/1710 [02:09<10:12,  2.37it/s]
 15%|█▌        | 260/1710 [02:10<10:50,  2.23it/s]
 15%|█▌        | 261/1710 [02:10<11:02,  2.19it/s]
 15%|█▌        | 262/1710 [02:11<11:02,  2.18it/s]
 15%|█▌        | 263/1710 [02:11<10:14,  2.35it/s]
 15%|█▌        | 264/1710 [02:12<11:19,  2.13it/s]
 15%|█▌        | 265/1710 [02:12<13:03,  1.84it/s]
 16%|█▌        | 266/1710 [02:13<11:58,  2.01it/s]
 16%|█▌        | 267/1710 [02:13<11:15,  2.14it/s]
 16%|█▌        | 268/1710 [02:14<11:35,  2.07it/s]
 16%|█▌        | 269/1710 [02:14<11:31,  2.08it/s]
 16%|█▌        | 270/1710 [02:15<11:24,  2.11it/s]
 16%|█▌        | 271/1710 [02:15<11:16,  2.13it/s]
 16%|█▌        | 272/1710 [02:16<11:15,  2.13it/s]
 16%|█▌        | 273/1710 [02:16<11:04,  2.16it/s]
 16%|█▌        | 274/1710 [02:17<12:48,  1.87it/s]
 16%|█▌        | 275/1710 [02:17<11:30,  2.08it/s]
 16%|█▌        | 276/1710 [02:17<10:30,  2.28it/s]
 16%|█▌        | 277/1710 [02:18<10:22,  2.30it/s]

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 9.975429975429975,
+    "total_flos": 6404835399317064.0,
+    "train_loss": 0.04138289297302368,
+    "train_runtime": 1065.756,
+    "train_samples": 13013,
+    "train_samples_per_second": 122.101,
+    "train_steps_per_second": 1.905
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,190 @@

+{
+  "best_metric": 0.6984379136881121,
+  "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1831",
+  "epoch": 9.975429975429975,
+  "eval_steps": 500,
+  "global_step": 2030,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.9975429975429976,
+      "eval_accuracy": 0.9467740383072925,
+      "eval_f1": 0.6143236074270556,
+      "eval_loss": 0.15010379254817963,
+      "eval_precision": 0.5959855892949047,
+      "eval_recall": 0.6338259441707718,
+      "eval_runtime": 5.907,
+      "eval_samples_per_second": 426.445,
+      "eval_steps_per_second": 53.327,
+      "step": 203
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.949244441592608,
+      "eval_f1": 0.6728575218890952,
+      "eval_loss": 0.17612887918949127,
+      "eval_precision": 0.6529351184346035,
+      "eval_recall": 0.6940339354132458,
+      "eval_runtime": 5.8933,
+      "eval_samples_per_second": 427.436,
+      "eval_steps_per_second": 53.451,
+      "step": 407
+    },
+    {
+      "epoch": 2.457002457002457,
+      "grad_norm": 0.6181371212005615,
+      "learning_rate": 3.768472906403941e-05,
+      "loss": 0.1312,
+      "step": 500
+    },
+    {
+      "epoch": 2.9975429975429977,
+      "eval_accuracy": 0.9469665372645898,
+      "eval_f1": 0.671967171069505,
+      "eval_loss": 0.1995203047990799,
+      "eval_precision": 0.6322393822393823,
+      "eval_recall": 0.7170224411603722,
+      "eval_runtime": 5.8448,
+      "eval_samples_per_second": 430.983,
+      "eval_steps_per_second": 53.894,
+      "step": 610
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9482979883858963,
+      "eval_f1": 0.6774025974025973,
+      "eval_loss": 0.21822449564933777,
+      "eval_precision": 0.6445872466633712,
+      "eval_recall": 0.7137383689107827,
+      "eval_runtime": 5.872,
+      "eval_samples_per_second": 428.988,
+      "eval_steps_per_second": 53.645,
+      "step": 814
+    },
+    {
+      "epoch": 4.914004914004914,
+      "grad_norm": 0.7616795301437378,
+      "learning_rate": 2.5369458128078822e-05,
+      "loss": 0.0248,
+      "step": 1000
+    },
+    {
+      "epoch": 4.997542997542998,
+      "eval_accuracy": 0.9448650903140942,
+      "eval_f1": 0.6700533401066802,
+      "eval_loss": 0.24612903594970703,
+      "eval_precision": 0.6251184834123222,
+      "eval_recall": 0.7219485495347564,
+      "eval_runtime": 5.8462,
+      "eval_samples_per_second": 430.877,
+      "eval_steps_per_second": 53.881,
+      "step": 1017
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.9469023709454907,
+      "eval_f1": 0.6827021494370521,
+      "eval_loss": 0.26953065395355225,
+      "eval_precision": 0.6410379625180201,
+      "eval_recall": 0.7301587301587301,
+      "eval_runtime": 5.9067,
+      "eval_samples_per_second": 426.468,
+      "eval_steps_per_second": 53.33,
+      "step": 1221
+    },
+    {
+      "epoch": 6.997542997542998,
+      "eval_accuracy": 0.9469986204241394,
+      "eval_f1": 0.6910590054109765,
+      "eval_loss": 0.2829184830188751,
+      "eval_precision": 0.6528724440116845,
+      "eval_recall": 0.7339901477832512,
+      "eval_runtime": 5.8572,
+      "eval_samples_per_second": 430.069,
+      "eval_steps_per_second": 53.78,
+      "step": 1424
+    },
+    {
+      "epoch": 7.371007371007371,
+      "grad_norm": 0.2855200171470642,
+      "learning_rate": 1.3054187192118228e-05,
+      "loss": 0.0081,
+      "step": 1500
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.9494048573903558,
+      "eval_f1": 0.6938127974616606,
+      "eval_loss": 0.29823970794677734,
+      "eval_precision": 0.6710997442455243,
+      "eval_recall": 0.7181171319102354,
+      "eval_runtime": 5.8929,
+      "eval_samples_per_second": 427.463,
+      "eval_steps_per_second": 53.454,
+      "step": 1628
+    },
+    {
+      "epoch": 8.997542997542997,
+      "eval_accuracy": 0.9500465205813469,
+      "eval_f1": 0.6984379136881121,
+      "eval_loss": 0.30729904770851135,
+      "eval_precision": 0.6764102564102564,
+      "eval_recall": 0.7219485495347564,
+      "eval_runtime": 5.8665,
+      "eval_samples_per_second": 429.386,
+      "eval_steps_per_second": 53.695,
+      "step": 1831
+    },
+    {
+      "epoch": 9.828009828009828,
+      "grad_norm": 0.6682894825935364,
+      "learning_rate": 7.389162561576355e-07,
+      "loss": 0.0038,
+      "step": 2000
+    },
+    {
+      "epoch": 9.975429975429975,
+      "eval_accuracy": 0.9500465205813469,
+      "eval_f1": 0.6931427058512046,
+      "eval_loss": 0.3079104423522949,
+      "eval_precision": 0.6712820512820513,
+      "eval_recall": 0.7164750957854407,
+      "eval_runtime": 5.9033,
+      "eval_samples_per_second": 426.708,
+      "eval_steps_per_second": 53.36,
+      "step": 2030
+    },
+    {
+      "epoch": 9.975429975429975,
+      "step": 2030,
+      "total_flos": 6404835399317064.0,
+      "train_loss": 0.04138289297302368,
+      "train_runtime": 1065.756,
+      "train_samples_per_second": 122.101,
+      "train_steps_per_second": 1.905
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2030,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 6404835399317064.0,
+  "train_batch_size": 32,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:13556e6c97b2f39e25d5830ab0bc61ce81f807bcf643d150d23dd97c2f606c57
+size 5240

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff