Training in progress, epoch 1
Browse files- README.md +103 -0
- all_results.json +26 -0
- config.json +39 -0
- eval_results.json +12 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- predict_results.json +10 -0
- predictions.txt +0 -0
- special_tokens_map.json +51 -0
- tb/events.out.tfevents.1725881335.0a1c9bec2a53.3232.0 +3 -0
- tb/events.out.tfevents.1725882696.0a1c9bec2a53.3232.1 +3 -0
- tb/events.out.tfevents.1725882852.0a1c9bec2a53.9893.0 +3 -0
- tb/events.out.tfevents.1725883955.0a1c9bec2a53.9893.1 +3 -0
- tb/events.out.tfevents.1725884095.0a1c9bec2a53.15221.0 +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +58 -0
- train.log +357 -0
- train_results.json +9 -0
- trainer_state.json +190 -0
- training_args.bin +3 -0
- vocab.json +0 -0
README.md
ADDED
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers
|
3 |
+
license: apache-2.0
|
4 |
+
base_model: PlanTL-GOB-ES/bsc-bio-ehr-es
|
5 |
+
tags:
|
6 |
+
- token-classification
|
7 |
+
- generated_from_trainer
|
8 |
+
datasets:
|
9 |
+
- Rodrigo1771/symptemist-fasttext-8-ner
|
10 |
+
metrics:
|
11 |
+
- precision
|
12 |
+
- recall
|
13 |
+
- f1
|
14 |
+
- accuracy
|
15 |
+
model-index:
|
16 |
+
- name: output
|
17 |
+
results:
|
18 |
+
- task:
|
19 |
+
name: Token Classification
|
20 |
+
type: token-classification
|
21 |
+
dataset:
|
22 |
+
name: Rodrigo1771/symptemist-fasttext-8-ner
|
23 |
+
type: Rodrigo1771/symptemist-fasttext-8-ner
|
24 |
+
config: SympTEMIST NER
|
25 |
+
split: validation
|
26 |
+
args: SympTEMIST NER
|
27 |
+
metrics:
|
28 |
+
- name: Precision
|
29 |
+
type: precision
|
30 |
+
value: 0.6764102564102564
|
31 |
+
- name: Recall
|
32 |
+
type: recall
|
33 |
+
value: 0.7219485495347564
|
34 |
+
- name: F1
|
35 |
+
type: f1
|
36 |
+
value: 0.6984379136881121
|
37 |
+
- name: Accuracy
|
38 |
+
type: accuracy
|
39 |
+
value: 0.9500465205813469
|
40 |
+
---
|
41 |
+
|
42 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
43 |
+
should probably proofread and complete it, then remove this comment. -->
|
44 |
+
|
45 |
+
# output
|
46 |
+
|
47 |
+
This model is a fine-tuned version of [PlanTL-GOB-ES/bsc-bio-ehr-es](https://huggingface.co/PlanTL-GOB-ES/bsc-bio-ehr-es) on the Rodrigo1771/symptemist-fasttext-8-ner dataset.
|
48 |
+
It achieves the following results on the evaluation set:
|
49 |
+
- Loss: 0.3073
|
50 |
+
- Precision: 0.6764
|
51 |
+
- Recall: 0.7219
|
52 |
+
- F1: 0.6984
|
53 |
+
- Accuracy: 0.9500
|
54 |
+
|
55 |
+
## Model description
|
56 |
+
|
57 |
+
More information needed
|
58 |
+
|
59 |
+
## Intended uses & limitations
|
60 |
+
|
61 |
+
More information needed
|
62 |
+
|
63 |
+
## Training and evaluation data
|
64 |
+
|
65 |
+
More information needed
|
66 |
+
|
67 |
+
## Training procedure
|
68 |
+
|
69 |
+
### Training hyperparameters
|
70 |
+
|
71 |
+
The following hyperparameters were used during training:
|
72 |
+
- learning_rate: 5e-05
|
73 |
+
- train_batch_size: 32
|
74 |
+
- eval_batch_size: 8
|
75 |
+
- seed: 42
|
76 |
+
- gradient_accumulation_steps: 2
|
77 |
+
- total_train_batch_size: 64
|
78 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
79 |
+
- lr_scheduler_type: linear
|
80 |
+
- num_epochs: 10.0
|
81 |
+
|
82 |
+
### Training results
|
83 |
+
|
84 |
+
| Training Loss | Epoch | Step | Validation Loss | Precision | Recall | F1 | Accuracy |
|
85 |
+
|:-------------:|:------:|:----:|:---------------:|:---------:|:------:|:------:|:--------:|
|
86 |
+
| No log | 0.9975 | 203 | 0.1501 | 0.5960 | 0.6338 | 0.6143 | 0.9468 |
|
87 |
+
| No log | 2.0 | 407 | 0.1761 | 0.6529 | 0.6940 | 0.6729 | 0.9492 |
|
88 |
+
| 0.1312 | 2.9975 | 610 | 0.1995 | 0.6322 | 0.7170 | 0.6720 | 0.9470 |
|
89 |
+
| 0.1312 | 4.0 | 814 | 0.2182 | 0.6446 | 0.7137 | 0.6774 | 0.9483 |
|
90 |
+
| 0.0248 | 4.9975 | 1017 | 0.2461 | 0.6251 | 0.7219 | 0.6701 | 0.9449 |
|
91 |
+
| 0.0248 | 6.0 | 1221 | 0.2695 | 0.6410 | 0.7302 | 0.6827 | 0.9469 |
|
92 |
+
| 0.0248 | 6.9975 | 1424 | 0.2829 | 0.6529 | 0.7340 | 0.6911 | 0.9470 |
|
93 |
+
| 0.0081 | 8.0 | 1628 | 0.2982 | 0.6711 | 0.7181 | 0.6938 | 0.9494 |
|
94 |
+
| 0.0081 | 8.9975 | 1831 | 0.3073 | 0.6764 | 0.7219 | 0.6984 | 0.9500 |
|
95 |
+
| 0.0038 | 9.9754 | 2030 | 0.3079 | 0.6713 | 0.7165 | 0.6931 | 0.9500 |
|
96 |
+
|
97 |
+
|
98 |
+
### Framework versions
|
99 |
+
|
100 |
+
- Transformers 4.44.2
|
101 |
+
- Pytorch 2.4.0+cu121
|
102 |
+
- Datasets 2.21.0
|
103 |
+
- Tokenizers 0.19.1
|
all_results.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 9.975429975429975,
|
3 |
+
"eval_accuracy": 0.9500465205813469,
|
4 |
+
"eval_f1": 0.6984379136881121,
|
5 |
+
"eval_loss": 0.30729904770851135,
|
6 |
+
"eval_precision": 0.6764102564102564,
|
7 |
+
"eval_recall": 0.7219485495347564,
|
8 |
+
"eval_runtime": 6.0921,
|
9 |
+
"eval_samples": 2519,
|
10 |
+
"eval_samples_per_second": 413.484,
|
11 |
+
"eval_steps_per_second": 51.706,
|
12 |
+
"predict_accuracy": 0.9466933985906772,
|
13 |
+
"predict_f1": 0.6951548848292296,
|
14 |
+
"predict_loss": 0.3347860872745514,
|
15 |
+
"predict_precision": 0.6863237139272271,
|
16 |
+
"predict_recall": 0.704216285806244,
|
17 |
+
"predict_runtime": 9.749,
|
18 |
+
"predict_samples_per_second": 415.118,
|
19 |
+
"predict_steps_per_second": 51.903,
|
20 |
+
"total_flos": 6404835399317064.0,
|
21 |
+
"train_loss": 0.04138289297302368,
|
22 |
+
"train_runtime": 1065.756,
|
23 |
+
"train_samples": 13013,
|
24 |
+
"train_samples_per_second": 122.101,
|
25 |
+
"train_steps_per_second": 1.905
|
26 |
+
}
|
config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaForTokenClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"finetuning_task": "ner",
|
11 |
+
"gradient_checkpointing": false,
|
12 |
+
"hidden_act": "gelu",
|
13 |
+
"hidden_dropout_prob": 0.1,
|
14 |
+
"hidden_size": 768,
|
15 |
+
"id2label": {
|
16 |
+
"0": "O",
|
17 |
+
"1": "B-SINTOMA",
|
18 |
+
"2": "I-SINTOMA"
|
19 |
+
},
|
20 |
+
"initializer_range": 0.02,
|
21 |
+
"intermediate_size": 3072,
|
22 |
+
"label2id": {
|
23 |
+
"B-SINTOMA": 1,
|
24 |
+
"I-SINTOMA": 2,
|
25 |
+
"O": 0
|
26 |
+
},
|
27 |
+
"layer_norm_eps": 1e-05,
|
28 |
+
"max_position_embeddings": 514,
|
29 |
+
"model_type": "roberta",
|
30 |
+
"num_attention_heads": 12,
|
31 |
+
"num_hidden_layers": 12,
|
32 |
+
"pad_token_id": 1,
|
33 |
+
"position_embedding_type": "absolute",
|
34 |
+
"torch_dtype": "float32",
|
35 |
+
"transformers_version": "4.44.2",
|
36 |
+
"type_vocab_size": 1,
|
37 |
+
"use_cache": true,
|
38 |
+
"vocab_size": 50262
|
39 |
+
}
|
eval_results.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 9.975429975429975,
|
3 |
+
"eval_accuracy": 0.9500465205813469,
|
4 |
+
"eval_f1": 0.6984379136881121,
|
5 |
+
"eval_loss": 0.30729904770851135,
|
6 |
+
"eval_precision": 0.6764102564102564,
|
7 |
+
"eval_recall": 0.7219485495347564,
|
8 |
+
"eval_runtime": 6.0921,
|
9 |
+
"eval_samples": 2519,
|
10 |
+
"eval_samples_per_second": 413.484,
|
11 |
+
"eval_steps_per_second": 51.706
|
12 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd56006954dc777a98f3c5e0587cc614d34216bcb27350118db301e7a844faa9
|
3 |
+
size 496244100
|
predict_results.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"predict_accuracy": 0.9466933985906772,
|
3 |
+
"predict_f1": 0.6951548848292296,
|
4 |
+
"predict_loss": 0.3347860872745514,
|
5 |
+
"predict_precision": 0.6863237139272271,
|
6 |
+
"predict_recall": 0.704216285806244,
|
7 |
+
"predict_runtime": 9.749,
|
8 |
+
"predict_samples_per_second": 415.118,
|
9 |
+
"predict_steps_per_second": 51.903
|
10 |
+
}
|
predictions.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": true,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": true,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": true,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": true,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tb/events.out.tfevents.1725881335.0a1c9bec2a53.3232.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:593e737686a00ae0f64a94f2ef02389ad7dff30c0ba6a6f2b1c65ac31e873867
|
3 |
+
size 11302
|
tb/events.out.tfevents.1725882696.0a1c9bec2a53.3232.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:05ecdc6d00855fb66deb25a7b5be160aa0ebb2ebe07a43beb7d88fb0430fb141
|
3 |
+
size 560
|
tb/events.out.tfevents.1725882852.0a1c9bec2a53.9893.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:543df15001da008ba822f9c1ebf4f77259f803cbf1c5758f2da70bdbf003d86f
|
3 |
+
size 11091
|
tb/events.out.tfevents.1725883955.0a1c9bec2a53.9893.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b842d84c89f0d88706e31e98b113fae6b45879220115930147db648f848a8c24
|
3 |
+
size 560
|
tb/events.out.tfevents.1725884095.0a1c9bec2a53.15221.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9529d9c4be592245c933a5892ef8b71c8be99c2c71381d022d5f07c90bd6362
|
3 |
+
size 5645
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": true,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"0": {
|
5 |
+
"content": "<s>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"1": {
|
13 |
+
"content": "<pad>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": true,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"2": {
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"3": {
|
29 |
+
"content": "<unk>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
},
|
36 |
+
"50261": {
|
37 |
+
"content": "<mask>",
|
38 |
+
"lstrip": true,
|
39 |
+
"normalized": true,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": true
|
43 |
+
}
|
44 |
+
},
|
45 |
+
"bos_token": "<s>",
|
46 |
+
"clean_up_tokenization_spaces": true,
|
47 |
+
"cls_token": "<s>",
|
48 |
+
"eos_token": "</s>",
|
49 |
+
"errors": "replace",
|
50 |
+
"mask_token": "<mask>",
|
51 |
+
"max_len": 512,
|
52 |
+
"model_max_length": 512,
|
53 |
+
"pad_token": "<pad>",
|
54 |
+
"sep_token": "</s>",
|
55 |
+
"tokenizer_class": "RobertaTokenizer",
|
56 |
+
"trim_offsets": true,
|
57 |
+
"unk_token": "<unk>"
|
58 |
+
}
|
train.log
ADDED
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
0 |
0%| | 0/1710 [00:00<?, ?it/s]
|
1 |
0%| | 1/1710 [00:01<32:09, 1.13s/it]
|
2 |
0%| | 2/1710 [00:01<19:10, 1.48it/s]
|
3 |
0%| | 3/1710 [00:01<16:10, 1.76it/s]
|
4 |
0%| | 4/1710 [00:02<14:04, 2.02it/s]
|
5 |
0%| | 5/1710 [00:02<13:32, 2.10it/s]
|
6 |
0%| | 6/1710 [00:03<14:07, 2.01it/s]
|
7 |
0%| | 7/1710 [00:03<12:53, 2.20it/s]
|
8 |
0%| | 8/1710 [00:04<12:12, 2.32it/s]
|
9 |
1%| | 9/1710 [00:04<12:30, 2.27it/s]
|
10 |
1%| | 10/1710 [00:04<11:41, 2.42it/s]
|
11 |
1%| | 11/1710 [00:05<12:06, 2.34it/s]
|
12 |
1%| | 12/1710 [00:05<13:56, 2.03it/s]
|
13 |
1%| | 13/1710 [00:06<14:13, 1.99it/s]
|
14 |
1%| | 14/1710 [00:06<14:10, 2.00it/s]
|
15 |
1%| | 15/1710 [00:07<14:07, 2.00it/s]
|
16 |
1%| | 16/1710 [00:08<15:00, 1.88it/s]
|
17 |
1%| | 17/1710 [00:08<13:46, 2.05it/s]
|
18 |
1%| | 18/1710 [00:08<13:39, 2.06it/s]
|
19 |
1%| | 19/1710 [00:09<12:48, 2.20it/s]
|
20 |
1%| | 20/1710 [00:09<12:31, 2.25it/s]
|
21 |
1%| | 21/1710 [00:10<12:28, 2.26it/s]
|
22 |
1%|▏ | 22/1710 [00:10<14:21, 1.96it/s]
|
23 |
1%|▏ | 23/1710 [00:11<14:32, 1.93it/s]
|
24 |
1%|▏ | 24/1710 [00:11<13:26, 2.09it/s]
|
25 |
1%|▏ | 25/1710 [00:12<14:27, 1.94it/s]
|
26 |
2%|▏ | 26/1710 [00:12<13:07, 2.14it/s]
|
27 |
2%|▏ | 27/1710 [00:13<14:22, 1.95it/s]
|
28 |
2%|▏ | 28/1710 [00:13<13:51, 2.02it/s]
|
29 |
2%|▏ | 29/1710 [00:14<14:49, 1.89it/s]
|
30 |
2%|▏ | 30/1710 [00:14<13:40, 2.05it/s]
|
31 |
2%|▏ | 31/1710 [00:15<13:01, 2.15it/s]
|
32 |
2%|▏ | 32/1710 [00:15<14:15, 1.96it/s]
|
33 |
2%|▏ | 33/1710 [00:16<18:22, 1.52it/s]
|
34 |
2%|▏ | 34/1710 [00:17<18:55, 1.48it/s]
|
35 |
2%|▏ | 35/1710 [00:17<16:24, 1.70it/s]
|
36 |
2%|▏ | 36/1710 [00:18<14:21, 1.94it/s]
|
37 |
2%|▏ | 37/1710 [00:19<17:15, 1.62it/s]
|
38 |
2%|▏ | 38/1710 [00:19<15:50, 1.76it/s]
|
39 |
2%|▏ | 39/1710 [00:19<14:13, 1.96it/s]
|
40 |
2%|▏ | 40/1710 [00:20<14:20, 1.94it/s]
|
41 |
2%|▏ | 41/1710 [00:20<13:25, 2.07it/s]
|
42 |
2%|▏ | 42/1710 [00:21<14:16, 1.95it/s]
|
43 |
3%|▎ | 43/1710 [00:21<14:07, 1.97it/s]
|
44 |
3%|▎ | 44/1710 [00:22<13:33, 2.05it/s]
|
45 |
3%|▎ | 45/1710 [00:22<12:56, 2.14it/s]
|
46 |
3%|▎ | 46/1710 [00:23<12:49, 2.16it/s]
|
47 |
3%|▎ | 47/1710 [00:23<12:51, 2.16it/s]
|
48 |
3%|▎ | 48/1710 [00:24<12:06, 2.29it/s]
|
49 |
3%|▎ | 49/1710 [00:24<15:02, 1.84it/s]
|
50 |
3%|▎ | 50/1710 [00:25<14:06, 1.96it/s]
|
51 |
3%|▎ | 51/1710 [00:25<13:11, 2.10it/s]
|
52 |
3%|▎ | 52/1710 [00:26<12:45, 2.17it/s]
|
53 |
3%|▎ | 53/1710 [00:26<12:22, 2.23it/s]
|
54 |
3%|▎ | 54/1710 [00:27<12:33, 2.20it/s]
|
55 |
3%|▎ | 55/1710 [00:27<15:05, 1.83it/s]
|
56 |
3%|▎ | 56/1710 [00:28<13:50, 1.99it/s]
|
57 |
3%|▎ | 57/1710 [00:28<13:48, 2.00it/s]
|
58 |
3%|▎ | 58/1710 [00:29<12:17, 2.24it/s]
|
59 |
3%|▎ | 59/1710 [00:29<11:30, 2.39it/s]
|
60 |
4%|▎ | 60/1710 [00:29<11:26, 2.40it/s]
|
61 |
4%|▎ | 61/1710 [00:30<12:02, 2.28it/s]
|
62 |
4%|▎ | 62/1710 [00:31<14:35, 1.88it/s]
|
63 |
4%|▎ | 63/1710 [00:31<13:25, 2.05it/s]
|
64 |
4%|▎ | 64/1710 [00:31<12:02, 2.28it/s]
|
65 |
4%|▍ | 65/1710 [00:32<12:17, 2.23it/s]
|
66 |
4%|▍ | 66/1710 [00:32<11:20, 2.42it/s]
|
67 |
4%|▍ | 67/1710 [00:33<12:26, 2.20it/s]
|
68 |
4%|▍ | 68/1710 [00:33<12:17, 2.23it/s]
|
69 |
4%|▍ | 69/1710 [00:33<12:09, 2.25it/s]
|
70 |
4%|▍ | 70/1710 [00:34<12:14, 2.23it/s]
|
71 |
4%|▍ | 71/1710 [00:34<11:59, 2.28it/s]
|
72 |
4%|▍ | 72/1710 [00:35<12:12, 2.24it/s]
|
73 |
4%|▍ | 73/1710 [00:35<11:23, 2.40it/s]
|
74 |
4%|▍ | 74/1710 [00:36<11:55, 2.29it/s]
|
75 |
4%|▍ | 75/1710 [00:36<11:33, 2.36it/s]
|
76 |
4%|▍ | 76/1710 [00:37<13:03, 2.09it/s]
|
77 |
5%|▍ | 77/1710 [00:37<13:12, 2.06it/s]
|
78 |
5%|▍ | 78/1710 [00:38<14:10, 1.92it/s]
|
79 |
5%|▍ | 79/1710 [00:38<14:31, 1.87it/s]
|
80 |
5%|▍ | 80/1710 [00:39<13:57, 1.95it/s]
|
81 |
5%|▍ | 81/1710 [00:39<13:52, 1.96it/s]
|
82 |
5%|▍ | 82/1710 [00:40<12:48, 2.12it/s]
|
83 |
5%|▍ | 83/1710 [00:40<14:18, 1.90it/s]
|
84 |
5%|▍ | 84/1710 [00:41<13:36, 1.99it/s]
|
85 |
5%|▍ | 85/1710 [00:41<12:53, 2.10it/s]
|
86 |
5%|▌ | 86/1710 [00:42<12:27, 2.17it/s]
|
87 |
5%|▌ | 87/1710 [00:42<12:30, 2.16it/s]
|
88 |
5%|▌ | 88/1710 [00:43<12:09, 2.22it/s]
|
89 |
5%|▌ | 89/1710 [00:43<12:54, 2.09it/s]
|
90 |
5%|▌ | 90/1710 [00:43<12:16, 2.20it/s]
|
91 |
5%|▌ | 91/1710 [00:44<12:47, 2.11it/s]
|
92 |
5%|▌ | 92/1710 [00:44<12:29, 2.16it/s]
|
93 |
5%|▌ | 93/1710 [00:45<12:22, 2.18it/s]
|
94 |
5%|▌ | 94/1710 [00:45<12:03, 2.23it/s]
|
95 |
6%|▌ | 95/1710 [00:46<12:00, 2.24it/s]
|
96 |
6%|▌ | 96/1710 [00:46<13:09, 2.05it/s]
|
97 |
6%|▌ | 97/1710 [00:47<12:05, 2.22it/s]
|
98 |
6%|▌ | 98/1710 [00:47<11:19, 2.37it/s]
|
99 |
6%|▌ | 99/1710 [00:47<10:59, 2.44it/s]
|
100 |
6%|▌ | 100/1710 [00:48<11:51, 2.26it/s]
|
101 |
6%|▌ | 101/1710 [00:48<11:43, 2.29it/s]
|
102 |
6%|▌ | 102/1710 [00:49<11:46, 2.28it/s]
|
103 |
6%|▌ | 103/1710 [00:49<11:30, 2.33it/s]
|
104 |
6%|▌ | 104/1710 [00:50<12:02, 2.22it/s]
|
105 |
6%|▌ | 105/1710 [00:50<11:09, 2.40it/s]
|
106 |
6%|▌ | 106/1710 [00:50<11:00, 2.43it/s]
|
107 |
6%|▋ | 107/1710 [00:51<11:12, 2.38it/s]
|
108 |
6%|▋ | 108/1710 [00:51<10:58, 2.43it/s]
|
109 |
6%|▋ | 109/1710 [00:52<10:58, 2.43it/s]
|
110 |
6%|▋ | 110/1710 [00:52<11:53, 2.24it/s]
|
111 |
6%|▋ | 111/1710 [00:53<11:47, 2.26it/s]
|
112 |
7%|▋ | 112/1710 [00:53<11:39, 2.29it/s]
|
113 |
7%|▋ | 113/1710 [00:53<11:16, 2.36it/s]
|
114 |
7%|▋ | 114/1710 [00:54<11:12, 2.37it/s]
|
115 |
7%|▋ | 115/1710 [00:54<10:35, 2.51it/s]
|
116 |
7%|▋ | 116/1710 [00:55<11:13, 2.37it/s]
|
117 |
7%|▋ | 117/1710 [00:55<11:17, 2.35it/s]
|
118 |
7%|▋ | 118/1710 [00:56<14:58, 1.77it/s]
|
119 |
7%|▋ | 119/1710 [00:56<14:07, 1.88it/s]
|
120 |
7%|▋ | 120/1710 [00:57<13:45, 1.93it/s]
|
121 |
7%|▋ | 121/1710 [00:57<12:26, 2.13it/s]
|
122 |
7%|▋ | 122/1710 [00:58<11:56, 2.22it/s]
|
123 |
7%|▋ | 123/1710 [00:58<11:05, 2.39it/s]
|
124 |
7%|▋ | 124/1710 [00:58<10:58, 2.41it/s]
|
125 |
7%|▋ | 125/1710 [00:59<10:03, 2.63it/s]
|
126 |
7%|▋ | 126/1710 [00:59<10:30, 2.51it/s]
|
127 |
7%|▋ | 127/1710 [01:00<10:41, 2.47it/s]
|
128 |
7%|▋ | 128/1710 [01:00<10:48, 2.44it/s]
|
129 |
8%|▊ | 129/1710 [01:00<10:51, 2.42it/s]
|
130 |
8%|▊ | 130/1710 [01:01<11:48, 2.23it/s]
|
131 |
8%|▊ | 131/1710 [01:01<11:11, 2.35it/s]
|
132 |
8%|▊ | 132/1710 [01:02<11:31, 2.28it/s]
|
133 |
8%|▊ | 133/1710 [01:02<11:40, 2.25it/s]
|
134 |
8%|▊ | 134/1710 [01:03<10:50, 2.42it/s]
|
135 |
8%|▊ | 135/1710 [01:03<10:17, 2.55it/s]
|
136 |
8%|▊ | 136/1710 [01:03<10:39, 2.46it/s]
|
137 |
8%|▊ | 137/1710 [01:04<11:22, 2.30it/s]
|
138 |
8%|▊ | 138/1710 [01:04<11:21, 2.31it/s]
|
139 |
8%|▊ | 139/1710 [01:05<11:55, 2.20it/s]
|
140 |
8%|▊ | 140/1710 [01:06<14:01, 1.87it/s]
|
141 |
8%|▊ | 141/1710 [01:06<12:55, 2.02it/s]
|
142 |
8%|▊ | 142/1710 [01:07<13:39, 1.91it/s]
|
143 |
8%|▊ | 143/1710 [01:07<12:42, 2.06it/s]
|
144 |
8%|▊ | 144/1710 [01:07<11:24, 2.29it/s]
|
145 |
8%|▊ | 145/1710 [01:08<11:51, 2.20it/s]
|
146 |
9%|▊ | 146/1710 [01:08<12:38, 2.06it/s]
|
147 |
9%|▊ | 147/1710 [01:09<12:53, 2.02it/s]
|
148 |
9%|▊ | 148/1710 [01:09<11:37, 2.24it/s]
|
149 |
9%|▊ | 149/1710 [01:10<13:00, 2.00it/s]
|
150 |
9%|▉ | 150/1710 [01:10<13:03, 1.99it/s]
|
151 |
9%|▉ | 151/1710 [01:11<12:32, 2.07it/s]
|
152 |
9%|▉ | 152/1710 [01:11<12:23, 2.10it/s]
|
153 |
9%|▉ | 153/1710 [01:12<11:41, 2.22it/s]
|
154 |
9%|▉ | 154/1710 [01:12<11:02, 2.35it/s]
|
155 |
9%|▉ | 155/1710 [01:12<10:28, 2.48it/s]
|
156 |
9%|▉ | 156/1710 [01:13<11:00, 2.35it/s]
|
157 |
9%|▉ | 157/1710 [01:13<10:35, 2.44it/s]
|
158 |
9%|▉ | 158/1710 [01:14<10:03, 2.57it/s]
|
159 |
9%|▉ | 159/1710 [01:14<10:55, 2.37it/s]
|
160 |
9%|▉ | 160/1710 [01:14<11:03, 2.34it/s]
|
161 |
9%|▉ | 161/1710 [01:15<10:27, 2.47it/s]
|
162 |
9%|▉ | 162/1710 [01:15<10:53, 2.37it/s]
|
163 |
10%|▉ | 163/1710 [01:16<11:08, 2.31it/s]
|
164 |
10%|▉ | 164/1710 [01:16<10:45, 2.39it/s]
|
165 |
10%|▉ | 165/1710 [01:17<11:08, 2.31it/s]
|
166 |
10%|▉ | 166/1710 [01:17<12:47, 2.01it/s]
|
167 |
10%|▉ | 167/1710 [01:18<11:48, 2.18it/s]
|
168 |
10%|▉ | 168/1710 [01:18<11:28, 2.24it/s]
|
169 |
10%|▉ | 169/1710 [01:19<12:21, 2.08it/s]
|
170 |
10%|▉ | 170/1710 [01:19<11:20, 2.26it/s]
|
171 |
10%|█ | 171/1710 [01:19<11:08, 2.30it/s][INFO|trainer.py:811] 2024-09-09 12:16:15,508 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
0%| | 0/315 [00:00<?, ?it/s][A
|
|
|
173 |
3%|▎ | 8/315 [00:00<00:04, 74.15it/s][A
|
|
|
174 |
5%|▌ | 16/315 [00:00<00:04, 72.87it/s][A
|
|
|
175 |
8%|▊ | 24/315 [00:00<00:03, 74.59it/s][A
|
|
|
176 |
10%|█ | 32/315 [00:00<00:04, 70.63it/s][A
|
|
|
177 |
13%|█▎ | 40/315 [00:00<00:03, 71.80it/s][A
|
|
|
178 |
15%|█▌ | 48/315 [00:00<00:03, 72.49it/s][A
|
|
|
179 |
18%|█▊ | 56/315 [00:00<00:03, 72.18it/s][A
|
|
|
180 |
20%|██ | 64/315 [00:00<00:03, 70.11it/s][A
|
|
|
181 |
23%|██▎ | 72/315 [00:00<00:03, 72.26it/s][A
|
|
|
182 |
25%|██▌ | 80/315 [00:01<00:03, 68.95it/s][A
|
|
|
183 |
28%|██▊ | 87/315 [00:01<00:03, 67.94it/s][A
|
|
|
184 |
30%|███ | 95/315 [00:01<00:03, 69.32it/s][A
|
|
|
185 |
32%|███▏ | 102/315 [00:01<00:03, 65.97it/s][A
|
|
|
186 |
35%|███▍ | 110/315 [00:01<00:02, 68.77it/s][A
|
|
|
187 |
37%|███▋ | 118/315 [00:01<00:02, 70.24it/s][A
|
|
|
188 |
40%|████ | 126/315 [00:01<00:02, 67.00it/s][A
|
|
|
189 |
43%|████▎ | 134/315 [00:01<00:02, 67.51it/s][A
|
|
|
190 |
45%|████▍ | 141/315 [00:02<00:02, 68.03it/s][A
|
|
|
191 |
47%|████▋ | 149/315 [00:02<00:02, 70.61it/s][A
|
|
|
192 |
50%|████▉ | 157/315 [00:02<00:02, 72.86it/s][A
|
|
|
193 |
52%|█████▏ | 165/315 [00:02<00:02, 71.30it/s][A
|
|
|
194 |
55%|█████▍ | 173/315 [00:02<00:02, 70.07it/s][A
|
|
|
195 |
57%|█████▋ | 181/315 [00:02<00:01, 67.81it/s][A
|
|
|
196 |
60%|██████ | 189/315 [00:02<00:01, 68.05it/s][A
|
|
|
197 |
62%|██████▏ | 196/315 [00:02<00:01, 67.00it/s][A
|
|
|
198 |
64%|██████▍ | 203/315 [00:02<00:01, 64.45it/s][A
|
|
|
199 |
67%|██████▋ | 210/315 [00:03<00:01, 65.01it/s][A
|
|
|
200 |
69%|██████▉ | 218/315 [00:03<00:01, 68.74it/s][A
|
|
|
201 |
72%|███████▏ | 226/315 [00:03<00:01, 71.36it/s][A
|
|
|
202 |
75%|███████▍ | 235/315 [00:03<00:01, 74.60it/s][A
|
|
|
203 |
77%|███████▋ | 243/315 [00:03<00:01, 70.81it/s][A
|
|
|
204 |
80%|███████▉ | 251/315 [00:03<00:00, 70.96it/s][A
|
|
|
205 |
82%|████████▏ | 259/315 [00:03<00:00, 68.92it/s][A
|
|
|
206 |
85%|████████▍ | 267/315 [00:03<00:00, 70.16it/s][A
|
|
|
207 |
88%|████████▊ | 276/315 [00:03<00:00, 73.45it/s][A
|
|
|
208 |
90%|█████████ | 284/315 [00:04<00:00, 73.52it/s][A
|
|
|
209 |
93%|█████████▎| 292/315 [00:04<00:00, 71.60it/s][A
|
|
|
210 |
95%|█████████▌| 300/315 [00:04<00:00, 71.31it/s][A
|
|
|
211 |
98%|█████████▊| 308/315 [00:04<00:00, 71.30it/s][A
|
212 |
|
|
|
213 |
|
214 |
10%|█ | 171/1710 [01:25<11:08, 2.30it/s]
|
|
|
|
|
215 |
[A[INFO|trainer.py:3503] 2024-09-09 12:16:21,499 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-171
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
10%|█ | 172/1710 [01:30<1:27:43, 3.42s/it]
|
217 |
10%|█ | 173/1710 [01:30<1:05:49, 2.57s/it]
|
218 |
10%|█ | 174/1710 [01:31<49:01, 1.92s/it]
|
219 |
10%|█ | 175/1710 [01:31<37:24, 1.46s/it]
|
220 |
10%|█ | 176/1710 [01:32<29:43, 1.16s/it]
|
221 |
10%|█ | 177/1710 [01:32<23:13, 1.10it/s]
|
222 |
10%|█ | 178/1710 [01:32<19:22, 1.32it/s]
|
223 |
10%|█ | 179/1710 [01:33<16:39, 1.53it/s]
|
224 |
11%|█ | 180/1710 [01:33<14:52, 1.71it/s]
|
225 |
11%|█ | 181/1710 [01:34<13:53, 1.84it/s]
|
226 |
11%|█ | 182/1710 [01:34<13:02, 1.95it/s]
|
227 |
11%|█ | 183/1710 [01:34<12:03, 2.11it/s]
|
228 |
11%|█ | 184/1710 [01:35<11:05, 2.29it/s]
|
229 |
11%|█ | 185/1710 [01:35<11:05, 2.29it/s]
|
230 |
11%|█ | 186/1710 [01:36<10:52, 2.33it/s]
|
231 |
11%|█ | 187/1710 [01:36<10:16, 2.47it/s]
|
232 |
11%|█ | 188/1710 [01:37<11:20, 2.24it/s]
|
233 |
11%|█ | 189/1710 [01:37<10:06, 2.51it/s]
|
234 |
11%|█ | 190/1710 [01:37<10:00, 2.53it/s]
|
235 |
11%|█ | 191/1710 [01:38<10:40, 2.37it/s]
|
236 |
11%|█ | 192/1710 [01:38<13:43, 1.84it/s]
|
237 |
11%|█▏ | 193/1710 [01:39<13:01, 1.94it/s]
|
238 |
11%|█▏ | 194/1710 [01:40<14:45, 1.71it/s]
|
239 |
11%|█▏ | 195/1710 [01:40<13:18, 1.90it/s]
|
240 |
11%|█▏ | 196/1710 [01:41<12:48, 1.97it/s]
|
241 |
12%|█▏ | 197/1710 [01:41<11:50, 2.13it/s]
|
242 |
12%|█▏ | 198/1710 [01:41<12:19, 2.05it/s]
|
243 |
12%|█▏ | 199/1710 [01:42<11:26, 2.20it/s]
|
244 |
12%|█▏ | 200/1710 [01:42<11:07, 2.26it/s]
|
245 |
12%|█▏ | 201/1710 [01:43<11:23, 2.21it/s]
|
246 |
12%|█▏ | 202/1710 [01:43<11:44, 2.14it/s]
|
247 |
12%|█▏ | 203/1710 [01:44<11:12, 2.24it/s]
|
248 |
12%|█▏ | 204/1710 [01:44<11:04, 2.27it/s]
|
249 |
12%|█▏ | 205/1710 [01:44<10:26, 2.40it/s]
|
250 |
12%|█▏ | 206/1710 [01:45<10:31, 2.38it/s]
|
251 |
12%|█▏ | 207/1710 [01:45<10:37, 2.36it/s]
|
252 |
12%|█▏ | 208/1710 [01:46<10:40, 2.35it/s]
|
253 |
12%|█▏ | 209/1710 [01:46<10:26, 2.40it/s]
|
254 |
12%|█▏ | 210/1710 [01:47<10:21, 2.41it/s]
|
255 |
12%|█▏ | 211/1710 [01:47<13:31, 1.85it/s]
|
256 |
12%|█▏ | 212/1710 [01:48<12:25, 2.01it/s]
|
257 |
12%|█▏ | 213/1710 [01:48<11:29, 2.17it/s]
|
258 |
13%|█▎ | 214/1710 [01:48<10:48, 2.31it/s]
|
259 |
13%|█▎ | 215/1710 [01:49<11:08, 2.24it/s]
|
260 |
13%|█▎ | 216/1710 [01:49<11:34, 2.15it/s]
|
261 |
13%|█▎ | 217/1710 [01:50<10:57, 2.27it/s]
|
262 |
13%|█▎ | 218/1710 [01:50<11:25, 2.18it/s]
|
263 |
13%|█▎ | 219/1710 [01:51<10:37, 2.34it/s]
|
264 |
13%|█▎ | 220/1710 [01:51<10:59, 2.26it/s]
|
265 |
13%|█▎ | 221/1710 [01:52<11:41, 2.12it/s]
|
266 |
13%|█▎ | 222/1710 [01:52<11:35, 2.14it/s]
|
267 |
13%|█▎ | 223/1710 [01:53<10:35, 2.34it/s]
|
268 |
13%|█▎ | 224/1710 [01:53<10:22, 2.39it/s]
|
269 |
13%|█▎ | 225/1710 [01:53<11:22, 2.18it/s]
|
270 |
13%|█▎ | 226/1710 [01:54<10:48, 2.29it/s]
|
271 |
13%|█▎ | 227/1710 [01:55<15:06, 1.64it/s]
|
272 |
13%|█▎ | 228/1710 [01:55<15:03, 1.64it/s]
|
273 |
13%|█▎ | 229/1710 [01:56<13:49, 1.79it/s]
|
274 |
13%|█▎ | 230/1710 [01:56<13:40, 1.80it/s]
|
275 |
14%|█▎ | 231/1710 [01:57<12:47, 1.93it/s]
|
276 |
14%|█▎ | 232/1710 [01:57<11:18, 2.18it/s]
|
277 |
14%|█▎ | 233/1710 [01:58<11:45, 2.09it/s]
|
278 |
14%|█▎ | 234/1710 [01:58<11:04, 2.22it/s]
|
279 |
14%|█▎ | 235/1710 [01:58<10:23, 2.37it/s]
|
280 |
14%|█▍ | 236/1710 [01:59<10:08, 2.42it/s]
|
281 |
14%|█▍ | 237/1710 [01:59<11:05, 2.21it/s]
|
282 |
14%|█▍ | 238/1710 [02:00<13:19, 1.84it/s]
|
283 |
14%|█▍ | 239/1710 [02:01<12:42, 1.93it/s]
|
284 |
14%|█▍ | 240/1710 [02:01<11:38, 2.10it/s]
|
285 |
14%|█▍ | 241/1710 [02:01<10:37, 2.30it/s]
|
286 |
14%|█▍ | 242/1710 [02:02<10:17, 2.38it/s]
|
287 |
14%|█▍ | 243/1710 [02:02<10:50, 2.25it/s]
|
288 |
14%|█▍ | 244/1710 [02:03<10:29, 2.33it/s]
|
289 |
14%|█▍ | 245/1710 [02:03<10:30, 2.32it/s]
|
290 |
14%|█▍ | 246/1710 [02:04<11:29, 2.12it/s]
|
291 |
14%|█▍ | 247/1710 [02:04<11:58, 2.04it/s]
|
292 |
15%|█▍ | 248/1710 [02:05<11:34, 2.10it/s]
|
293 |
15%|█▍ | 249/1710 [02:05<10:23, 2.34it/s]
|
294 |
15%|█▍ | 250/1710 [02:05<10:26, 2.33it/s]
|
295 |
15%|█▍ | 251/1710 [02:06<10:35, 2.30it/s]
|
296 |
15%|█▍ | 252/1710 [02:06<10:23, 2.34it/s]
|
297 |
15%|█▍ | 253/1710 [02:07<10:17, 2.36it/s]
|
298 |
15%|█▍ | 254/1710 [02:07<12:00, 2.02it/s]
|
299 |
15%|█▍ | 255/1710 [02:08<11:09, 2.17it/s]
|
300 |
15%|█▍ | 256/1710 [02:08<11:16, 2.15it/s]
|
301 |
15%|█▌ | 257/1710 [02:09<11:21, 2.13it/s]
|
302 |
15%|█▌ | 258/1710 [02:09<10:36, 2.28it/s]
|
303 |
15%|█▌ | 259/1710 [02:09<10:12, 2.37it/s]
|
304 |
15%|█▌ | 260/1710 [02:10<10:50, 2.23it/s]
|
305 |
15%|█▌ | 261/1710 [02:10<11:02, 2.19it/s]
|
306 |
15%|█▌ | 262/1710 [02:11<11:02, 2.18it/s]
|
307 |
15%|█▌ | 263/1710 [02:11<10:14, 2.35it/s]
|
308 |
15%|█▌ | 264/1710 [02:12<11:19, 2.13it/s]
|
309 |
15%|█▌ | 265/1710 [02:12<13:03, 1.84it/s]
|
310 |
16%|█▌ | 266/1710 [02:13<11:58, 2.01it/s]
|
311 |
16%|█▌ | 267/1710 [02:13<11:15, 2.14it/s]
|
312 |
16%|█▌ | 268/1710 [02:14<11:35, 2.07it/s]
|
313 |
16%|█▌ | 269/1710 [02:14<11:31, 2.08it/s]
|
314 |
16%|█▌ | 270/1710 [02:15<11:24, 2.11it/s]
|
315 |
16%|█▌ | 271/1710 [02:15<11:16, 2.13it/s]
|
316 |
16%|█▌ | 272/1710 [02:16<11:15, 2.13it/s]
|
317 |
16%|█▌ | 273/1710 [02:16<11:04, 2.16it/s]
|
318 |
16%|█▌ | 274/1710 [02:17<12:48, 1.87it/s]
|
319 |
16%|█▌ | 275/1710 [02:17<11:30, 2.08it/s]
|
320 |
16%|█▌ | 276/1710 [02:17<10:30, 2.28it/s]
|
321 |
16%|█▌ | 277/1710 [02:18<10:22, 2.30it/s]
|
|
|
1 |
+
2024-09-09 12:14:35.494661: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
2 |
+
2024-09-09 12:14:35.513016: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
3 |
+
2024-09-09 12:14:35.535014: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
4 |
+
2024-09-09 12:14:35.541769: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
5 |
+
2024-09-09 12:14:35.557993: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
6 |
+
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
7 |
+
2024-09-09 12:14:36.793402: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
8 |
+
/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead
|
9 |
+
warnings.warn(
|
10 |
+
09/09/2024 12:14:38 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
|
11 |
+
09/09/2024 12:14:38 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
|
12 |
+
_n_gpu=1,
|
13 |
+
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
|
14 |
+
adafactor=False,
|
15 |
+
adam_beta1=0.9,
|
16 |
+
adam_beta2=0.999,
|
17 |
+
adam_epsilon=1e-08,
|
18 |
+
auto_find_batch_size=False,
|
19 |
+
batch_eval_metrics=False,
|
20 |
+
bf16=False,
|
21 |
+
bf16_full_eval=False,
|
22 |
+
data_seed=None,
|
23 |
+
dataloader_drop_last=False,
|
24 |
+
dataloader_num_workers=0,
|
25 |
+
dataloader_persistent_workers=False,
|
26 |
+
dataloader_pin_memory=True,
|
27 |
+
dataloader_prefetch_factor=None,
|
28 |
+
ddp_backend=None,
|
29 |
+
ddp_broadcast_buffers=None,
|
30 |
+
ddp_bucket_cap_mb=None,
|
31 |
+
ddp_find_unused_parameters=None,
|
32 |
+
ddp_timeout=1800,
|
33 |
+
debug=[],
|
34 |
+
deepspeed=None,
|
35 |
+
disable_tqdm=False,
|
36 |
+
dispatch_batches=None,
|
37 |
+
do_eval=True,
|
38 |
+
do_predict=True,
|
39 |
+
do_train=True,
|
40 |
+
eval_accumulation_steps=None,
|
41 |
+
eval_delay=0,
|
42 |
+
eval_do_concat_batches=True,
|
43 |
+
eval_on_start=False,
|
44 |
+
eval_steps=None,
|
45 |
+
eval_strategy=epoch,
|
46 |
+
eval_use_gather_object=False,
|
47 |
+
evaluation_strategy=epoch,
|
48 |
+
fp16=False,
|
49 |
+
fp16_backend=auto,
|
50 |
+
fp16_full_eval=False,
|
51 |
+
fp16_opt_level=O1,
|
52 |
+
fsdp=[],
|
53 |
+
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
|
54 |
+
fsdp_min_num_params=0,
|
55 |
+
fsdp_transformer_layer_cls_to_wrap=None,
|
56 |
+
full_determinism=False,
|
57 |
+
gradient_accumulation_steps=2,
|
58 |
+
gradient_checkpointing=False,
|
59 |
+
gradient_checkpointing_kwargs=None,
|
60 |
+
greater_is_better=True,
|
61 |
+
group_by_length=False,
|
62 |
+
half_precision_backend=auto,
|
63 |
+
hub_always_push=False,
|
64 |
+
hub_model_id=None,
|
65 |
+
hub_private_repo=False,
|
66 |
+
hub_strategy=every_save,
|
67 |
+
hub_token=<HUB_TOKEN>,
|
68 |
+
ignore_data_skip=False,
|
69 |
+
include_inputs_for_metrics=False,
|
70 |
+
include_num_input_tokens_seen=False,
|
71 |
+
include_tokens_per_second=False,
|
72 |
+
jit_mode_eval=False,
|
73 |
+
label_names=None,
|
74 |
+
label_smoothing_factor=0.0,
|
75 |
+
learning_rate=5e-05,
|
76 |
+
length_column_name=length,
|
77 |
+
load_best_model_at_end=True,
|
78 |
+
local_rank=0,
|
79 |
+
log_level=passive,
|
80 |
+
log_level_replica=warning,
|
81 |
+
log_on_each_node=True,
|
82 |
+
logging_dir=/content/dissertation/scripts/ner/output/tb,
|
83 |
+
logging_first_step=False,
|
84 |
+
logging_nan_inf_filter=True,
|
85 |
+
logging_steps=500,
|
86 |
+
logging_strategy=steps,
|
87 |
+
lr_scheduler_kwargs={},
|
88 |
+
lr_scheduler_type=linear,
|
89 |
+
max_grad_norm=1.0,
|
90 |
+
max_steps=-1,
|
91 |
+
metric_for_best_model=f1,
|
92 |
+
mp_parameters=,
|
93 |
+
neftune_noise_alpha=None,
|
94 |
+
no_cuda=False,
|
95 |
+
num_train_epochs=10.0,
|
96 |
+
optim=adamw_torch,
|
97 |
+
optim_args=None,
|
98 |
+
optim_target_modules=None,
|
99 |
+
output_dir=/content/dissertation/scripts/ner/output,
|
100 |
+
overwrite_output_dir=True,
|
101 |
+
past_index=-1,
|
102 |
+
per_device_eval_batch_size=8,
|
103 |
+
per_device_train_batch_size=32,
|
104 |
+
prediction_loss_only=False,
|
105 |
+
push_to_hub=True,
|
106 |
+
push_to_hub_model_id=None,
|
107 |
+
push_to_hub_organization=None,
|
108 |
+
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
|
109 |
+
ray_scope=last,
|
110 |
+
remove_unused_columns=True,
|
111 |
+
report_to=['tensorboard'],
|
112 |
+
restore_callback_states_from_checkpoint=False,
|
113 |
+
resume_from_checkpoint=None,
|
114 |
+
run_name=/content/dissertation/scripts/ner/output,
|
115 |
+
save_on_each_node=False,
|
116 |
+
save_only_model=False,
|
117 |
+
save_safetensors=True,
|
118 |
+
save_steps=500,
|
119 |
+
save_strategy=epoch,
|
120 |
+
save_total_limit=None,
|
121 |
+
seed=42,
|
122 |
+
skip_memory_metrics=True,
|
123 |
+
split_batches=None,
|
124 |
+
tf32=None,
|
125 |
+
torch_compile=False,
|
126 |
+
torch_compile_backend=None,
|
127 |
+
torch_compile_mode=None,
|
128 |
+
torch_empty_cache_steps=None,
|
129 |
+
torchdynamo=None,
|
130 |
+
tpu_metrics_debug=False,
|
131 |
+
tpu_num_cores=None,
|
132 |
+
use_cpu=False,
|
133 |
+
use_ipex=False,
|
134 |
+
use_legacy_prediction_loop=False,
|
135 |
+
use_mps_device=False,
|
136 |
+
warmup_ratio=0.0,
|
137 |
+
warmup_steps=0,
|
138 |
+
weight_decay=0.0,
|
139 |
+
)
|
140 |
+
|
141 |
+
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
[INFO|configuration_utils.py:733] 2024-09-09 12:14:50,533 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
148 |
+
[INFO|configuration_utils.py:800] 2024-09-09 12:14:50,537 >> Model config RobertaConfig {
|
149 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
150 |
+
"architectures": [
|
151 |
+
"RobertaForMaskedLM"
|
152 |
+
],
|
153 |
+
"attention_probs_dropout_prob": 0.1,
|
154 |
+
"bos_token_id": 0,
|
155 |
+
"classifier_dropout": null,
|
156 |
+
"eos_token_id": 2,
|
157 |
+
"finetuning_task": "ner",
|
158 |
+
"gradient_checkpointing": false,
|
159 |
+
"hidden_act": "gelu",
|
160 |
+
"hidden_dropout_prob": 0.1,
|
161 |
+
"hidden_size": 768,
|
162 |
+
"id2label": {
|
163 |
+
"0": "O",
|
164 |
+
"1": "B-SINTOMA",
|
165 |
+
"2": "I-SINTOMA"
|
166 |
+
},
|
167 |
+
"initializer_range": 0.02,
|
168 |
+
"intermediate_size": 3072,
|
169 |
+
"label2id": {
|
170 |
+
"B-SINTOMA": 1,
|
171 |
+
"I-SINTOMA": 2,
|
172 |
+
"O": 0
|
173 |
+
},
|
174 |
+
"layer_norm_eps": 1e-05,
|
175 |
+
"max_position_embeddings": 514,
|
176 |
+
"model_type": "roberta",
|
177 |
+
"num_attention_heads": 12,
|
178 |
+
"num_hidden_layers": 12,
|
179 |
+
"pad_token_id": 1,
|
180 |
+
"position_embedding_type": "absolute",
|
181 |
+
"transformers_version": "4.44.2",
|
182 |
+
"type_vocab_size": 1,
|
183 |
+
"use_cache": true,
|
184 |
+
"vocab_size": 50262
|
185 |
+
}
|
186 |
+
|
187 |
+
[INFO|configuration_utils.py:733] 2024-09-09 12:14:50,787 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
188 |
+
[INFO|configuration_utils.py:800] 2024-09-09 12:14:50,788 >> Model config RobertaConfig {
|
189 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
190 |
+
"architectures": [
|
191 |
+
"RobertaForMaskedLM"
|
192 |
+
],
|
193 |
+
"attention_probs_dropout_prob": 0.1,
|
194 |
+
"bos_token_id": 0,
|
195 |
+
"classifier_dropout": null,
|
196 |
+
"eos_token_id": 2,
|
197 |
+
"gradient_checkpointing": false,
|
198 |
+
"hidden_act": "gelu",
|
199 |
+
"hidden_dropout_prob": 0.1,
|
200 |
+
"hidden_size": 768,
|
201 |
+
"initializer_range": 0.02,
|
202 |
+
"intermediate_size": 3072,
|
203 |
+
"layer_norm_eps": 1e-05,
|
204 |
+
"max_position_embeddings": 514,
|
205 |
+
"model_type": "roberta",
|
206 |
+
"num_attention_heads": 12,
|
207 |
+
"num_hidden_layers": 12,
|
208 |
+
"pad_token_id": 1,
|
209 |
+
"position_embedding_type": "absolute",
|
210 |
+
"transformers_version": "4.44.2",
|
211 |
+
"type_vocab_size": 1,
|
212 |
+
"use_cache": true,
|
213 |
+
"vocab_size": 50262
|
214 |
+
}
|
215 |
+
|
216 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,800 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/vocab.json
|
217 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,801 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/merges.txt
|
218 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,801 >> loading file tokenizer.json from cache at None
|
219 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,801 >> loading file added_tokens.json from cache at None
|
220 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,801 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/special_tokens_map.json
|
221 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-09 12:14:50,801 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/tokenizer_config.json
|
222 |
+
[INFO|configuration_utils.py:733] 2024-09-09 12:14:50,801 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
223 |
+
[INFO|configuration_utils.py:800] 2024-09-09 12:14:50,802 >> Model config RobertaConfig {
|
224 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
225 |
+
"architectures": [
|
226 |
+
"RobertaForMaskedLM"
|
227 |
+
],
|
228 |
+
"attention_probs_dropout_prob": 0.1,
|
229 |
+
"bos_token_id": 0,
|
230 |
+
"classifier_dropout": null,
|
231 |
+
"eos_token_id": 2,
|
232 |
+
"gradient_checkpointing": false,
|
233 |
+
"hidden_act": "gelu",
|
234 |
+
"hidden_dropout_prob": 0.1,
|
235 |
+
"hidden_size": 768,
|
236 |
+
"initializer_range": 0.02,
|
237 |
+
"intermediate_size": 3072,
|
238 |
+
"layer_norm_eps": 1e-05,
|
239 |
+
"max_position_embeddings": 514,
|
240 |
+
"model_type": "roberta",
|
241 |
+
"num_attention_heads": 12,
|
242 |
+
"num_hidden_layers": 12,
|
243 |
+
"pad_token_id": 1,
|
244 |
+
"position_embedding_type": "absolute",
|
245 |
+
"transformers_version": "4.44.2",
|
246 |
+
"type_vocab_size": 1,
|
247 |
+
"use_cache": true,
|
248 |
+
"vocab_size": 50262
|
249 |
+
}
|
250 |
+
|
251 |
+
/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884
|
252 |
+
warnings.warn(
|
253 |
+
[INFO|configuration_utils.py:733] 2024-09-09 12:14:50,882 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
254 |
+
[INFO|configuration_utils.py:800] 2024-09-09 12:14:50,883 >> Model config RobertaConfig {
|
255 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
256 |
+
"architectures": [
|
257 |
+
"RobertaForMaskedLM"
|
258 |
+
],
|
259 |
+
"attention_probs_dropout_prob": 0.1,
|
260 |
+
"bos_token_id": 0,
|
261 |
+
"classifier_dropout": null,
|
262 |
+
"eos_token_id": 2,
|
263 |
+
"gradient_checkpointing": false,
|
264 |
+
"hidden_act": "gelu",
|
265 |
+
"hidden_dropout_prob": 0.1,
|
266 |
+
"hidden_size": 768,
|
267 |
+
"initializer_range": 0.02,
|
268 |
+
"intermediate_size": 3072,
|
269 |
+
"layer_norm_eps": 1e-05,
|
270 |
+
"max_position_embeddings": 514,
|
271 |
+
"model_type": "roberta",
|
272 |
+
"num_attention_heads": 12,
|
273 |
+
"num_hidden_layers": 12,
|
274 |
+
"pad_token_id": 1,
|
275 |
+
"position_embedding_type": "absolute",
|
276 |
+
"transformers_version": "4.44.2",
|
277 |
+
"type_vocab_size": 1,
|
278 |
+
"use_cache": true,
|
279 |
+
"vocab_size": 50262
|
280 |
+
}
|
281 |
+
|
282 |
+
[INFO|modeling_utils.py:3678] 2024-09-09 12:14:51,213 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/pytorch_model.bin
|
283 |
+
[INFO|modeling_utils.py:4497] 2024-09-09 12:14:51,293 >> Some weights of the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
|
284 |
+
- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
|
285 |
+
- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
|
286 |
+
[WARNING|modeling_utils.py:4509] 2024-09-09 12:14:51,293 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es and are newly initialized: ['classifier.bias', 'classifier.weight']
|
287 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
288 |
+
|
289 |
+
|
290 |
+
|
291 |
+
/content/dissertation/scripts/ner/run_ner_train.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library 🤗 Evaluate: https://huggingface.co/docs/evaluate
|
292 |
+
metric = load_metric("seqeval", trust_remote_code=True)
|
293 |
+
[INFO|trainer.py:811] 2024-09-09 12:14:55,082 >> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
294 |
+
[INFO|trainer.py:2134] 2024-09-09 12:14:55,636 >> ***** Running training *****
|
295 |
+
[INFO|trainer.py:2135] 2024-09-09 12:14:55,636 >> Num examples = 10,936
|
296 |
+
[INFO|trainer.py:2136] 2024-09-09 12:14:55,636 >> Num Epochs = 10
|
297 |
+
[INFO|trainer.py:2137] 2024-09-09 12:14:55,636 >> Instantaneous batch size per device = 32
|
298 |
+
[INFO|trainer.py:2140] 2024-09-09 12:14:55,636 >> Total train batch size (w. parallel, distributed & accumulation) = 64
|
299 |
+
[INFO|trainer.py:2141] 2024-09-09 12:14:55,636 >> Gradient Accumulation steps = 2
|
300 |
+
[INFO|trainer.py:2142] 2024-09-09 12:14:55,636 >> Total optimization steps = 1,710
|
301 |
+
[INFO|trainer.py:2143] 2024-09-09 12:14:55,637 >> Number of trainable parameters = 124,055,043
|
302 |
+
|
303 |
0%| | 0/1710 [00:00<?, ?it/s]
|
304 |
0%| | 1/1710 [00:01<32:09, 1.13s/it]
|
305 |
0%| | 2/1710 [00:01<19:10, 1.48it/s]
|
306 |
0%| | 3/1710 [00:01<16:10, 1.76it/s]
|
307 |
0%| | 4/1710 [00:02<14:04, 2.02it/s]
|
308 |
0%| | 5/1710 [00:02<13:32, 2.10it/s]
|
309 |
0%| | 6/1710 [00:03<14:07, 2.01it/s]
|
310 |
0%| | 7/1710 [00:03<12:53, 2.20it/s]
|
311 |
0%| | 8/1710 [00:04<12:12, 2.32it/s]
|
312 |
1%| | 9/1710 [00:04<12:30, 2.27it/s]
|
313 |
1%| | 10/1710 [00:04<11:41, 2.42it/s]
|
314 |
1%| | 11/1710 [00:05<12:06, 2.34it/s]
|
315 |
1%| | 12/1710 [00:05<13:56, 2.03it/s]
|
316 |
1%| | 13/1710 [00:06<14:13, 1.99it/s]
|
317 |
1%| | 14/1710 [00:06<14:10, 2.00it/s]
|
318 |
1%| | 15/1710 [00:07<14:07, 2.00it/s]
|
319 |
1%| | 16/1710 [00:08<15:00, 1.88it/s]
|
320 |
1%| | 17/1710 [00:08<13:46, 2.05it/s]
|
321 |
1%| | 18/1710 [00:08<13:39, 2.06it/s]
|
322 |
1%| | 19/1710 [00:09<12:48, 2.20it/s]
|
323 |
1%| | 20/1710 [00:09<12:31, 2.25it/s]
|
324 |
1%| | 21/1710 [00:10<12:28, 2.26it/s]
|
325 |
1%|▏ | 22/1710 [00:10<14:21, 1.96it/s]
|
326 |
1%|▏ | 23/1710 [00:11<14:32, 1.93it/s]
|
327 |
1%|▏ | 24/1710 [00:11<13:26, 2.09it/s]
|
328 |
1%|▏ | 25/1710 [00:12<14:27, 1.94it/s]
|
329 |
2%|▏ | 26/1710 [00:12<13:07, 2.14it/s]
|
330 |
2%|▏ | 27/1710 [00:13<14:22, 1.95it/s]
|
331 |
2%|▏ | 28/1710 [00:13<13:51, 2.02it/s]
|
332 |
2%|▏ | 29/1710 [00:14<14:49, 1.89it/s]
|
333 |
2%|▏ | 30/1710 [00:14<13:40, 2.05it/s]
|
334 |
2%|▏ | 31/1710 [00:15<13:01, 2.15it/s]
|
335 |
2%|▏ | 32/1710 [00:15<14:15, 1.96it/s]
|
336 |
2%|▏ | 33/1710 [00:16<18:22, 1.52it/s]
|
337 |
2%|▏ | 34/1710 [00:17<18:55, 1.48it/s]
|
338 |
2%|▏ | 35/1710 [00:17<16:24, 1.70it/s]
|
339 |
2%|▏ | 36/1710 [00:18<14:21, 1.94it/s]
|
340 |
2%|▏ | 37/1710 [00:19<17:15, 1.62it/s]
|
341 |
2%|▏ | 38/1710 [00:19<15:50, 1.76it/s]
|
342 |
2%|▏ | 39/1710 [00:19<14:13, 1.96it/s]
|
343 |
2%|▏ | 40/1710 [00:20<14:20, 1.94it/s]
|
344 |
2%|▏ | 41/1710 [00:20<13:25, 2.07it/s]
|
345 |
2%|▏ | 42/1710 [00:21<14:16, 1.95it/s]
|
346 |
3%|▎ | 43/1710 [00:21<14:07, 1.97it/s]
|
347 |
3%|▎ | 44/1710 [00:22<13:33, 2.05it/s]
|
348 |
3%|▎ | 45/1710 [00:22<12:56, 2.14it/s]
|
349 |
3%|▎ | 46/1710 [00:23<12:49, 2.16it/s]
|
350 |
3%|▎ | 47/1710 [00:23<12:51, 2.16it/s]
|
351 |
3%|▎ | 48/1710 [00:24<12:06, 2.29it/s]
|
352 |
3%|▎ | 49/1710 [00:24<15:02, 1.84it/s]
|
353 |
3%|▎ | 50/1710 [00:25<14:06, 1.96it/s]
|
354 |
3%|▎ | 51/1710 [00:25<13:11, 2.10it/s]
|
355 |
3%|▎ | 52/1710 [00:26<12:45, 2.17it/s]
|
356 |
3%|▎ | 53/1710 [00:26<12:22, 2.23it/s]
|
357 |
3%|▎ | 54/1710 [00:27<12:33, 2.20it/s]
|
358 |
3%|▎ | 55/1710 [00:27<15:05, 1.83it/s]
|
359 |
3%|▎ | 56/1710 [00:28<13:50, 1.99it/s]
|
360 |
3%|▎ | 57/1710 [00:28<13:48, 2.00it/s]
|
361 |
3%|▎ | 58/1710 [00:29<12:17, 2.24it/s]
|
362 |
3%|▎ | 59/1710 [00:29<11:30, 2.39it/s]
|
363 |
4%|▎ | 60/1710 [00:29<11:26, 2.40it/s]
|
364 |
4%|▎ | 61/1710 [00:30<12:02, 2.28it/s]
|
365 |
4%|▎ | 62/1710 [00:31<14:35, 1.88it/s]
|
366 |
4%|▎ | 63/1710 [00:31<13:25, 2.05it/s]
|
367 |
4%|▎ | 64/1710 [00:31<12:02, 2.28it/s]
|
368 |
4%|▍ | 65/1710 [00:32<12:17, 2.23it/s]
|
369 |
4%|▍ | 66/1710 [00:32<11:20, 2.42it/s]
|
370 |
4%|▍ | 67/1710 [00:33<12:26, 2.20it/s]
|
371 |
4%|▍ | 68/1710 [00:33<12:17, 2.23it/s]
|
372 |
4%|▍ | 69/1710 [00:33<12:09, 2.25it/s]
|
373 |
4%|▍ | 70/1710 [00:34<12:14, 2.23it/s]
|
374 |
4%|▍ | 71/1710 [00:34<11:59, 2.28it/s]
|
375 |
4%|▍ | 72/1710 [00:35<12:12, 2.24it/s]
|
376 |
4%|▍ | 73/1710 [00:35<11:23, 2.40it/s]
|
377 |
4%|▍ | 74/1710 [00:36<11:55, 2.29it/s]
|
378 |
4%|▍ | 75/1710 [00:36<11:33, 2.36it/s]
|
379 |
4%|▍ | 76/1710 [00:37<13:03, 2.09it/s]
|
380 |
5%|▍ | 77/1710 [00:37<13:12, 2.06it/s]
|
381 |
5%|▍ | 78/1710 [00:38<14:10, 1.92it/s]
|
382 |
5%|▍ | 79/1710 [00:38<14:31, 1.87it/s]
|
383 |
5%|▍ | 80/1710 [00:39<13:57, 1.95it/s]
|
384 |
5%|▍ | 81/1710 [00:39<13:52, 1.96it/s]
|
385 |
5%|▍ | 82/1710 [00:40<12:48, 2.12it/s]
|
386 |
5%|▍ | 83/1710 [00:40<14:18, 1.90it/s]
|
387 |
5%|▍ | 84/1710 [00:41<13:36, 1.99it/s]
|
388 |
5%|▍ | 85/1710 [00:41<12:53, 2.10it/s]
|
389 |
5%|▌ | 86/1710 [00:42<12:27, 2.17it/s]
|
390 |
5%|▌ | 87/1710 [00:42<12:30, 2.16it/s]
|
391 |
5%|▌ | 88/1710 [00:43<12:09, 2.22it/s]
|
392 |
5%|▌ | 89/1710 [00:43<12:54, 2.09it/s]
|
393 |
5%|▌ | 90/1710 [00:43<12:16, 2.20it/s]
|
394 |
5%|▌ | 91/1710 [00:44<12:47, 2.11it/s]
|
395 |
5%|▌ | 92/1710 [00:44<12:29, 2.16it/s]
|
396 |
5%|▌ | 93/1710 [00:45<12:22, 2.18it/s]
|
397 |
5%|▌ | 94/1710 [00:45<12:03, 2.23it/s]
|
398 |
6%|▌ | 95/1710 [00:46<12:00, 2.24it/s]
|
399 |
6%|▌ | 96/1710 [00:46<13:09, 2.05it/s]
|
400 |
6%|▌ | 97/1710 [00:47<12:05, 2.22it/s]
|
401 |
6%|▌ | 98/1710 [00:47<11:19, 2.37it/s]
|
402 |
6%|▌ | 99/1710 [00:47<10:59, 2.44it/s]
|
403 |
6%|▌ | 100/1710 [00:48<11:51, 2.26it/s]
|
404 |
6%|▌ | 101/1710 [00:48<11:43, 2.29it/s]
|
405 |
6%|▌ | 102/1710 [00:49<11:46, 2.28it/s]
|
406 |
6%|▌ | 103/1710 [00:49<11:30, 2.33it/s]
|
407 |
6%|▌ | 104/1710 [00:50<12:02, 2.22it/s]
|
408 |
6%|▌ | 105/1710 [00:50<11:09, 2.40it/s]
|
409 |
6%|▌ | 106/1710 [00:50<11:00, 2.43it/s]
|
410 |
6%|▋ | 107/1710 [00:51<11:12, 2.38it/s]
|
411 |
6%|▋ | 108/1710 [00:51<10:58, 2.43it/s]
|
412 |
6%|▋ | 109/1710 [00:52<10:58, 2.43it/s]
|
413 |
6%|▋ | 110/1710 [00:52<11:53, 2.24it/s]
|
414 |
6%|▋ | 111/1710 [00:53<11:47, 2.26it/s]
|
415 |
7%|▋ | 112/1710 [00:53<11:39, 2.29it/s]
|
416 |
7%|▋ | 113/1710 [00:53<11:16, 2.36it/s]
|
417 |
7%|▋ | 114/1710 [00:54<11:12, 2.37it/s]
|
418 |
7%|▋ | 115/1710 [00:54<10:35, 2.51it/s]
|
419 |
7%|▋ | 116/1710 [00:55<11:13, 2.37it/s]
|
420 |
7%|▋ | 117/1710 [00:55<11:17, 2.35it/s]
|
421 |
7%|▋ | 118/1710 [00:56<14:58, 1.77it/s]
|
422 |
7%|▋ | 119/1710 [00:56<14:07, 1.88it/s]
|
423 |
7%|▋ | 120/1710 [00:57<13:45, 1.93it/s]
|
424 |
7%|▋ | 121/1710 [00:57<12:26, 2.13it/s]
|
425 |
7%|▋ | 122/1710 [00:58<11:56, 2.22it/s]
|
426 |
7%|▋ | 123/1710 [00:58<11:05, 2.39it/s]
|
427 |
7%|▋ | 124/1710 [00:58<10:58, 2.41it/s]
|
428 |
7%|▋ | 125/1710 [00:59<10:03, 2.63it/s]
|
429 |
7%|▋ | 126/1710 [00:59<10:30, 2.51it/s]
|
430 |
7%|▋ | 127/1710 [01:00<10:41, 2.47it/s]
|
431 |
7%|▋ | 128/1710 [01:00<10:48, 2.44it/s]
|
432 |
8%|▊ | 129/1710 [01:00<10:51, 2.42it/s]
|
433 |
8%|▊ | 130/1710 [01:01<11:48, 2.23it/s]
|
434 |
8%|▊ | 131/1710 [01:01<11:11, 2.35it/s]
|
435 |
8%|▊ | 132/1710 [01:02<11:31, 2.28it/s]
|
436 |
8%|▊ | 133/1710 [01:02<11:40, 2.25it/s]
|
437 |
8%|▊ | 134/1710 [01:03<10:50, 2.42it/s]
|
438 |
8%|▊ | 135/1710 [01:03<10:17, 2.55it/s]
|
439 |
8%|▊ | 136/1710 [01:03<10:39, 2.46it/s]
|
440 |
8%|▊ | 137/1710 [01:04<11:22, 2.30it/s]
|
441 |
8%|▊ | 138/1710 [01:04<11:21, 2.31it/s]
|
442 |
8%|▊ | 139/1710 [01:05<11:55, 2.20it/s]
|
443 |
8%|▊ | 140/1710 [01:06<14:01, 1.87it/s]
|
444 |
8%|▊ | 141/1710 [01:06<12:55, 2.02it/s]
|
445 |
8%|▊ | 142/1710 [01:07<13:39, 1.91it/s]
|
446 |
8%|▊ | 143/1710 [01:07<12:42, 2.06it/s]
|
447 |
8%|▊ | 144/1710 [01:07<11:24, 2.29it/s]
|
448 |
8%|▊ | 145/1710 [01:08<11:51, 2.20it/s]
|
449 |
9%|▊ | 146/1710 [01:08<12:38, 2.06it/s]
|
450 |
9%|▊ | 147/1710 [01:09<12:53, 2.02it/s]
|
451 |
9%|▊ | 148/1710 [01:09<11:37, 2.24it/s]
|
452 |
9%|▊ | 149/1710 [01:10<13:00, 2.00it/s]
|
453 |
9%|▉ | 150/1710 [01:10<13:03, 1.99it/s]
|
454 |
9%|▉ | 151/1710 [01:11<12:32, 2.07it/s]
|
455 |
9%|▉ | 152/1710 [01:11<12:23, 2.10it/s]
|
456 |
9%|▉ | 153/1710 [01:12<11:41, 2.22it/s]
|
457 |
9%|▉ | 154/1710 [01:12<11:02, 2.35it/s]
|
458 |
9%|▉ | 155/1710 [01:12<10:28, 2.48it/s]
|
459 |
9%|▉ | 156/1710 [01:13<11:00, 2.35it/s]
|
460 |
9%|▉ | 157/1710 [01:13<10:35, 2.44it/s]
|
461 |
9%|▉ | 158/1710 [01:14<10:03, 2.57it/s]
|
462 |
9%|▉ | 159/1710 [01:14<10:55, 2.37it/s]
|
463 |
9%|▉ | 160/1710 [01:14<11:03, 2.34it/s]
|
464 |
9%|▉ | 161/1710 [01:15<10:27, 2.47it/s]
|
465 |
9%|▉ | 162/1710 [01:15<10:53, 2.37it/s]
|
466 |
10%|▉ | 163/1710 [01:16<11:08, 2.31it/s]
|
467 |
10%|▉ | 164/1710 [01:16<10:45, 2.39it/s]
|
468 |
10%|▉ | 165/1710 [01:17<11:08, 2.31it/s]
|
469 |
10%|▉ | 166/1710 [01:17<12:47, 2.01it/s]
|
470 |
10%|▉ | 167/1710 [01:18<11:48, 2.18it/s]
|
471 |
10%|▉ | 168/1710 [01:18<11:28, 2.24it/s]
|
472 |
10%|▉ | 169/1710 [01:19<12:21, 2.08it/s]
|
473 |
10%|▉ | 170/1710 [01:19<11:20, 2.26it/s]
|
474 |
10%|█ | 171/1710 [01:19<11:08, 2.30it/s][INFO|trainer.py:811] 2024-09-09 12:16:15,508 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
475 |
+
[INFO|trainer.py:3819] 2024-09-09 12:16:15,510 >>
|
476 |
+
***** Running Evaluation *****
|
477 |
+
[INFO|trainer.py:3821] 2024-09-09 12:16:15,510 >> Num examples = 2519
|
478 |
+
[INFO|trainer.py:3824] 2024-09-09 12:16:15,510 >> Batch size = 8
|
479 |
+
|
480 |
+
|
481 |
0%| | 0/315 [00:00<?, ?it/s][A
|
482 |
+
|
483 |
3%|▎ | 8/315 [00:00<00:04, 74.15it/s][A
|
484 |
+
|
485 |
5%|▌ | 16/315 [00:00<00:04, 72.87it/s][A
|
486 |
+
|
487 |
8%|▊ | 24/315 [00:00<00:03, 74.59it/s][A
|
488 |
+
|
489 |
10%|█ | 32/315 [00:00<00:04, 70.63it/s][A
|
490 |
+
|
491 |
13%|█▎ | 40/315 [00:00<00:03, 71.80it/s][A
|
492 |
+
|
493 |
15%|█▌ | 48/315 [00:00<00:03, 72.49it/s][A
|
494 |
+
|
495 |
18%|█▊ | 56/315 [00:00<00:03, 72.18it/s][A
|
496 |
+
|
497 |
20%|██ | 64/315 [00:00<00:03, 70.11it/s][A
|
498 |
+
|
499 |
23%|██▎ | 72/315 [00:00<00:03, 72.26it/s][A
|
500 |
+
|
501 |
25%|██▌ | 80/315 [00:01<00:03, 68.95it/s][A
|
502 |
+
|
503 |
28%|██▊ | 87/315 [00:01<00:03, 67.94it/s][A
|
504 |
+
|
505 |
30%|███ | 95/315 [00:01<00:03, 69.32it/s][A
|
506 |
+
|
507 |
32%|███▏ | 102/315 [00:01<00:03, 65.97it/s][A
|
508 |
+
|
509 |
35%|███▍ | 110/315 [00:01<00:02, 68.77it/s][A
|
510 |
+
|
511 |
37%|███▋ | 118/315 [00:01<00:02, 70.24it/s][A
|
512 |
+
|
513 |
40%|████ | 126/315 [00:01<00:02, 67.00it/s][A
|
514 |
+
|
515 |
43%|████▎ | 134/315 [00:01<00:02, 67.51it/s][A
|
516 |
+
|
517 |
45%|████▍ | 141/315 [00:02<00:02, 68.03it/s][A
|
518 |
+
|
519 |
47%|████▋ | 149/315 [00:02<00:02, 70.61it/s][A
|
520 |
+
|
521 |
50%|████▉ | 157/315 [00:02<00:02, 72.86it/s][A
|
522 |
+
|
523 |
52%|█████▏ | 165/315 [00:02<00:02, 71.30it/s][A
|
524 |
+
|
525 |
55%|█████▍ | 173/315 [00:02<00:02, 70.07it/s][A
|
526 |
+
|
527 |
57%|█████▋ | 181/315 [00:02<00:01, 67.81it/s][A
|
528 |
+
|
529 |
60%|██████ | 189/315 [00:02<00:01, 68.05it/s][A
|
530 |
+
|
531 |
62%|██████▏ | 196/315 [00:02<00:01, 67.00it/s][A
|
532 |
+
|
533 |
64%|██████▍ | 203/315 [00:02<00:01, 64.45it/s][A
|
534 |
+
|
535 |
67%|██████▋ | 210/315 [00:03<00:01, 65.01it/s][A
|
536 |
+
|
537 |
69%|██████▉ | 218/315 [00:03<00:01, 68.74it/s][A
|
538 |
+
|
539 |
72%|███████▏ | 226/315 [00:03<00:01, 71.36it/s][A
|
540 |
+
|
541 |
75%|███████▍ | 235/315 [00:03<00:01, 74.60it/s][A
|
542 |
+
|
543 |
77%|███████▋ | 243/315 [00:03<00:01, 70.81it/s][A
|
544 |
+
|
545 |
80%|███████▉ | 251/315 [00:03<00:00, 70.96it/s][A
|
546 |
+
|
547 |
82%|████████▏ | 259/315 [00:03<00:00, 68.92it/s][A
|
548 |
+
|
549 |
85%|████████▍ | 267/315 [00:03<00:00, 70.16it/s][A
|
550 |
+
|
551 |
88%|████████▊ | 276/315 [00:03<00:00, 73.45it/s][A
|
552 |
+
|
553 |
90%|█████████ | 284/315 [00:04<00:00, 73.52it/s][A
|
554 |
+
|
555 |
93%|█████████▎| 292/315 [00:04<00:00, 71.60it/s][A
|
556 |
+
|
557 |
95%|█████████▌| 300/315 [00:04<00:00, 71.31it/s][A
|
558 |
+
|
559 |
98%|█████████▊| 308/315 [00:04<00:00, 71.30it/s][A
|
560 |
|
561 |
+
|
562 |
|
563 |
10%|█ | 171/1710 [01:25<11:08, 2.30it/s]
|
564 |
+
|
565 |
+
|
566 |
[A[INFO|trainer.py:3503] 2024-09-09 12:16:21,499 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-171
|
567 |
+
[INFO|configuration_utils.py:472] 2024-09-09 12:16:21,501 >> Configuration saved in /content/dissertation/scripts/ner/output/checkpoint-171/config.json
|
568 |
+
[INFO|modeling_utils.py:2799] 2024-09-09 12:16:22,527 >> Model weights saved in /content/dissertation/scripts/ner/output/checkpoint-171/model.safetensors
|
569 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-09 12:16:22,528 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/checkpoint-171/tokenizer_config.json
|
570 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-09 12:16:22,529 >> Special tokens file saved in /content/dissertation/scripts/ner/output/checkpoint-171/special_tokens_map.json
|
571 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-09 12:16:25,565 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
572 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-09 12:16:25,565 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
573 |
+
|
574 |
10%|█ | 172/1710 [01:30<1:27:43, 3.42s/it]
|
575 |
10%|█ | 173/1710 [01:30<1:05:49, 2.57s/it]
|
576 |
10%|█ | 174/1710 [01:31<49:01, 1.92s/it]
|
577 |
10%|█ | 175/1710 [01:31<37:24, 1.46s/it]
|
578 |
10%|█ | 176/1710 [01:32<29:43, 1.16s/it]
|
579 |
10%|█ | 177/1710 [01:32<23:13, 1.10it/s]
|
580 |
10%|█ | 178/1710 [01:32<19:22, 1.32it/s]
|
581 |
10%|█ | 179/1710 [01:33<16:39, 1.53it/s]
|
582 |
11%|█ | 180/1710 [01:33<14:52, 1.71it/s]
|
583 |
11%|█ | 181/1710 [01:34<13:53, 1.84it/s]
|
584 |
11%|█ | 182/1710 [01:34<13:02, 1.95it/s]
|
585 |
11%|█ | 183/1710 [01:34<12:03, 2.11it/s]
|
586 |
11%|█ | 184/1710 [01:35<11:05, 2.29it/s]
|
587 |
11%|█ | 185/1710 [01:35<11:05, 2.29it/s]
|
588 |
11%|█ | 186/1710 [01:36<10:52, 2.33it/s]
|
589 |
11%|█ | 187/1710 [01:36<10:16, 2.47it/s]
|
590 |
11%|█ | 188/1710 [01:37<11:20, 2.24it/s]
|
591 |
11%|█ | 189/1710 [01:37<10:06, 2.51it/s]
|
592 |
11%|█ | 190/1710 [01:37<10:00, 2.53it/s]
|
593 |
11%|█ | 191/1710 [01:38<10:40, 2.37it/s]
|
594 |
11%|█ | 192/1710 [01:38<13:43, 1.84it/s]
|
595 |
11%|█▏ | 193/1710 [01:39<13:01, 1.94it/s]
|
596 |
11%|█▏ | 194/1710 [01:40<14:45, 1.71it/s]
|
597 |
11%|█▏ | 195/1710 [01:40<13:18, 1.90it/s]
|
598 |
11%|█▏ | 196/1710 [01:41<12:48, 1.97it/s]
|
599 |
12%|█▏ | 197/1710 [01:41<11:50, 2.13it/s]
|
600 |
12%|█▏ | 198/1710 [01:41<12:19, 2.05it/s]
|
601 |
12%|█▏ | 199/1710 [01:42<11:26, 2.20it/s]
|
602 |
12%|█▏ | 200/1710 [01:42<11:07, 2.26it/s]
|
603 |
12%|█▏ | 201/1710 [01:43<11:23, 2.21it/s]
|
604 |
12%|█▏ | 202/1710 [01:43<11:44, 2.14it/s]
|
605 |
12%|█▏ | 203/1710 [01:44<11:12, 2.24it/s]
|
606 |
12%|█▏ | 204/1710 [01:44<11:04, 2.27it/s]
|
607 |
12%|█▏ | 205/1710 [01:44<10:26, 2.40it/s]
|
608 |
12%|█▏ | 206/1710 [01:45<10:31, 2.38it/s]
|
609 |
12%|█▏ | 207/1710 [01:45<10:37, 2.36it/s]
|
610 |
12%|█▏ | 208/1710 [01:46<10:40, 2.35it/s]
|
611 |
12%|█▏ | 209/1710 [01:46<10:26, 2.40it/s]
|
612 |
12%|█▏ | 210/1710 [01:47<10:21, 2.41it/s]
|
613 |
12%|█▏ | 211/1710 [01:47<13:31, 1.85it/s]
|
614 |
12%|█▏ | 212/1710 [01:48<12:25, 2.01it/s]
|
615 |
12%|█▏ | 213/1710 [01:48<11:29, 2.17it/s]
|
616 |
13%|█▎ | 214/1710 [01:48<10:48, 2.31it/s]
|
617 |
13%|█▎ | 215/1710 [01:49<11:08, 2.24it/s]
|
618 |
13%|█▎ | 216/1710 [01:49<11:34, 2.15it/s]
|
619 |
13%|█▎ | 217/1710 [01:50<10:57, 2.27it/s]
|
620 |
13%|█▎ | 218/1710 [01:50<11:25, 2.18it/s]
|
621 |
13%|█▎ | 219/1710 [01:51<10:37, 2.34it/s]
|
622 |
13%|█▎ | 220/1710 [01:51<10:59, 2.26it/s]
|
623 |
13%|█▎ | 221/1710 [01:52<11:41, 2.12it/s]
|
624 |
13%|█▎ | 222/1710 [01:52<11:35, 2.14it/s]
|
625 |
13%|█▎ | 223/1710 [01:53<10:35, 2.34it/s]
|
626 |
13%|█▎ | 224/1710 [01:53<10:22, 2.39it/s]
|
627 |
13%|█▎ | 225/1710 [01:53<11:22, 2.18it/s]
|
628 |
13%|█▎ | 226/1710 [01:54<10:48, 2.29it/s]
|
629 |
13%|█▎ | 227/1710 [01:55<15:06, 1.64it/s]
|
630 |
13%|█▎ | 228/1710 [01:55<15:03, 1.64it/s]
|
631 |
13%|█▎ | 229/1710 [01:56<13:49, 1.79it/s]
|
632 |
13%|█▎ | 230/1710 [01:56<13:40, 1.80it/s]
|
633 |
14%|█▎ | 231/1710 [01:57<12:47, 1.93it/s]
|
634 |
14%|█▎ | 232/1710 [01:57<11:18, 2.18it/s]
|
635 |
14%|█▎ | 233/1710 [01:58<11:45, 2.09it/s]
|
636 |
14%|█▎ | 234/1710 [01:58<11:04, 2.22it/s]
|
637 |
14%|█▎ | 235/1710 [01:58<10:23, 2.37it/s]
|
638 |
14%|█▍ | 236/1710 [01:59<10:08, 2.42it/s]
|
639 |
14%|█▍ | 237/1710 [01:59<11:05, 2.21it/s]
|
640 |
14%|█▍ | 238/1710 [02:00<13:19, 1.84it/s]
|
641 |
14%|█▍ | 239/1710 [02:01<12:42, 1.93it/s]
|
642 |
14%|█▍ | 240/1710 [02:01<11:38, 2.10it/s]
|
643 |
14%|█▍ | 241/1710 [02:01<10:37, 2.30it/s]
|
644 |
14%|█▍ | 242/1710 [02:02<10:17, 2.38it/s]
|
645 |
14%|█▍ | 243/1710 [02:02<10:50, 2.25it/s]
|
646 |
14%|█▍ | 244/1710 [02:03<10:29, 2.33it/s]
|
647 |
14%|█▍ | 245/1710 [02:03<10:30, 2.32it/s]
|
648 |
14%|█▍ | 246/1710 [02:04<11:29, 2.12it/s]
|
649 |
14%|█▍ | 247/1710 [02:04<11:58, 2.04it/s]
|
650 |
15%|█▍ | 248/1710 [02:05<11:34, 2.10it/s]
|
651 |
15%|█▍ | 249/1710 [02:05<10:23, 2.34it/s]
|
652 |
15%|█▍ | 250/1710 [02:05<10:26, 2.33it/s]
|
653 |
15%|█▍ | 251/1710 [02:06<10:35, 2.30it/s]
|
654 |
15%|█▍ | 252/1710 [02:06<10:23, 2.34it/s]
|
655 |
15%|█▍ | 253/1710 [02:07<10:17, 2.36it/s]
|
656 |
15%|█▍ | 254/1710 [02:07<12:00, 2.02it/s]
|
657 |
15%|█▍ | 255/1710 [02:08<11:09, 2.17it/s]
|
658 |
15%|█▍ | 256/1710 [02:08<11:16, 2.15it/s]
|
659 |
15%|█▌ | 257/1710 [02:09<11:21, 2.13it/s]
|
660 |
15%|█▌ | 258/1710 [02:09<10:36, 2.28it/s]
|
661 |
15%|█▌ | 259/1710 [02:09<10:12, 2.37it/s]
|
662 |
15%|█▌ | 260/1710 [02:10<10:50, 2.23it/s]
|
663 |
15%|█▌ | 261/1710 [02:10<11:02, 2.19it/s]
|
664 |
15%|█▌ | 262/1710 [02:11<11:02, 2.18it/s]
|
665 |
15%|█▌ | 263/1710 [02:11<10:14, 2.35it/s]
|
666 |
15%|█▌ | 264/1710 [02:12<11:19, 2.13it/s]
|
667 |
15%|█▌ | 265/1710 [02:12<13:03, 1.84it/s]
|
668 |
16%|█▌ | 266/1710 [02:13<11:58, 2.01it/s]
|
669 |
16%|█▌ | 267/1710 [02:13<11:15, 2.14it/s]
|
670 |
16%|█▌ | 268/1710 [02:14<11:35, 2.07it/s]
|
671 |
16%|█▌ | 269/1710 [02:14<11:31, 2.08it/s]
|
672 |
16%|█▌ | 270/1710 [02:15<11:24, 2.11it/s]
|
673 |
16%|█▌ | 271/1710 [02:15<11:16, 2.13it/s]
|
674 |
16%|█▌ | 272/1710 [02:16<11:15, 2.13it/s]
|
675 |
16%|█▌ | 273/1710 [02:16<11:04, 2.16it/s]
|
676 |
16%|█▌ | 274/1710 [02:17<12:48, 1.87it/s]
|
677 |
16%|█▌ | 275/1710 [02:17<11:30, 2.08it/s]
|
678 |
16%|█▌ | 276/1710 [02:17<10:30, 2.28it/s]
|
679 |
16%|█▌ | 277/1710 [02:18<10:22, 2.30it/s]
|
train_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 9.975429975429975,
|
3 |
+
"total_flos": 6404835399317064.0,
|
4 |
+
"train_loss": 0.04138289297302368,
|
5 |
+
"train_runtime": 1065.756,
|
6 |
+
"train_samples": 13013,
|
7 |
+
"train_samples_per_second": 122.101,
|
8 |
+
"train_steps_per_second": 1.905
|
9 |
+
}
|
trainer_state.json
ADDED
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.6984379136881121,
|
3 |
+
"best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-1831",
|
4 |
+
"epoch": 9.975429975429975,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 2030,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.9975429975429976,
|
13 |
+
"eval_accuracy": 0.9467740383072925,
|
14 |
+
"eval_f1": 0.6143236074270556,
|
15 |
+
"eval_loss": 0.15010379254817963,
|
16 |
+
"eval_precision": 0.5959855892949047,
|
17 |
+
"eval_recall": 0.6338259441707718,
|
18 |
+
"eval_runtime": 5.907,
|
19 |
+
"eval_samples_per_second": 426.445,
|
20 |
+
"eval_steps_per_second": 53.327,
|
21 |
+
"step": 203
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"epoch": 2.0,
|
25 |
+
"eval_accuracy": 0.949244441592608,
|
26 |
+
"eval_f1": 0.6728575218890952,
|
27 |
+
"eval_loss": 0.17612887918949127,
|
28 |
+
"eval_precision": 0.6529351184346035,
|
29 |
+
"eval_recall": 0.6940339354132458,
|
30 |
+
"eval_runtime": 5.8933,
|
31 |
+
"eval_samples_per_second": 427.436,
|
32 |
+
"eval_steps_per_second": 53.451,
|
33 |
+
"step": 407
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"epoch": 2.457002457002457,
|
37 |
+
"grad_norm": 0.6181371212005615,
|
38 |
+
"learning_rate": 3.768472906403941e-05,
|
39 |
+
"loss": 0.1312,
|
40 |
+
"step": 500
|
41 |
+
},
|
42 |
+
{
|
43 |
+
"epoch": 2.9975429975429977,
|
44 |
+
"eval_accuracy": 0.9469665372645898,
|
45 |
+
"eval_f1": 0.671967171069505,
|
46 |
+
"eval_loss": 0.1995203047990799,
|
47 |
+
"eval_precision": 0.6322393822393823,
|
48 |
+
"eval_recall": 0.7170224411603722,
|
49 |
+
"eval_runtime": 5.8448,
|
50 |
+
"eval_samples_per_second": 430.983,
|
51 |
+
"eval_steps_per_second": 53.894,
|
52 |
+
"step": 610
|
53 |
+
},
|
54 |
+
{
|
55 |
+
"epoch": 4.0,
|
56 |
+
"eval_accuracy": 0.9482979883858963,
|
57 |
+
"eval_f1": 0.6774025974025973,
|
58 |
+
"eval_loss": 0.21822449564933777,
|
59 |
+
"eval_precision": 0.6445872466633712,
|
60 |
+
"eval_recall": 0.7137383689107827,
|
61 |
+
"eval_runtime": 5.872,
|
62 |
+
"eval_samples_per_second": 428.988,
|
63 |
+
"eval_steps_per_second": 53.645,
|
64 |
+
"step": 814
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"epoch": 4.914004914004914,
|
68 |
+
"grad_norm": 0.7616795301437378,
|
69 |
+
"learning_rate": 2.5369458128078822e-05,
|
70 |
+
"loss": 0.0248,
|
71 |
+
"step": 1000
|
72 |
+
},
|
73 |
+
{
|
74 |
+
"epoch": 4.997542997542998,
|
75 |
+
"eval_accuracy": 0.9448650903140942,
|
76 |
+
"eval_f1": 0.6700533401066802,
|
77 |
+
"eval_loss": 0.24612903594970703,
|
78 |
+
"eval_precision": 0.6251184834123222,
|
79 |
+
"eval_recall": 0.7219485495347564,
|
80 |
+
"eval_runtime": 5.8462,
|
81 |
+
"eval_samples_per_second": 430.877,
|
82 |
+
"eval_steps_per_second": 53.881,
|
83 |
+
"step": 1017
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 6.0,
|
87 |
+
"eval_accuracy": 0.9469023709454907,
|
88 |
+
"eval_f1": 0.6827021494370521,
|
89 |
+
"eval_loss": 0.26953065395355225,
|
90 |
+
"eval_precision": 0.6410379625180201,
|
91 |
+
"eval_recall": 0.7301587301587301,
|
92 |
+
"eval_runtime": 5.9067,
|
93 |
+
"eval_samples_per_second": 426.468,
|
94 |
+
"eval_steps_per_second": 53.33,
|
95 |
+
"step": 1221
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"epoch": 6.997542997542998,
|
99 |
+
"eval_accuracy": 0.9469986204241394,
|
100 |
+
"eval_f1": 0.6910590054109765,
|
101 |
+
"eval_loss": 0.2829184830188751,
|
102 |
+
"eval_precision": 0.6528724440116845,
|
103 |
+
"eval_recall": 0.7339901477832512,
|
104 |
+
"eval_runtime": 5.8572,
|
105 |
+
"eval_samples_per_second": 430.069,
|
106 |
+
"eval_steps_per_second": 53.78,
|
107 |
+
"step": 1424
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 7.371007371007371,
|
111 |
+
"grad_norm": 0.2855200171470642,
|
112 |
+
"learning_rate": 1.3054187192118228e-05,
|
113 |
+
"loss": 0.0081,
|
114 |
+
"step": 1500
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 8.0,
|
118 |
+
"eval_accuracy": 0.9494048573903558,
|
119 |
+
"eval_f1": 0.6938127974616606,
|
120 |
+
"eval_loss": 0.29823970794677734,
|
121 |
+
"eval_precision": 0.6710997442455243,
|
122 |
+
"eval_recall": 0.7181171319102354,
|
123 |
+
"eval_runtime": 5.8929,
|
124 |
+
"eval_samples_per_second": 427.463,
|
125 |
+
"eval_steps_per_second": 53.454,
|
126 |
+
"step": 1628
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"epoch": 8.997542997542997,
|
130 |
+
"eval_accuracy": 0.9500465205813469,
|
131 |
+
"eval_f1": 0.6984379136881121,
|
132 |
+
"eval_loss": 0.30729904770851135,
|
133 |
+
"eval_precision": 0.6764102564102564,
|
134 |
+
"eval_recall": 0.7219485495347564,
|
135 |
+
"eval_runtime": 5.8665,
|
136 |
+
"eval_samples_per_second": 429.386,
|
137 |
+
"eval_steps_per_second": 53.695,
|
138 |
+
"step": 1831
|
139 |
+
},
|
140 |
+
{
|
141 |
+
"epoch": 9.828009828009828,
|
142 |
+
"grad_norm": 0.6682894825935364,
|
143 |
+
"learning_rate": 7.389162561576355e-07,
|
144 |
+
"loss": 0.0038,
|
145 |
+
"step": 2000
|
146 |
+
},
|
147 |
+
{
|
148 |
+
"epoch": 9.975429975429975,
|
149 |
+
"eval_accuracy": 0.9500465205813469,
|
150 |
+
"eval_f1": 0.6931427058512046,
|
151 |
+
"eval_loss": 0.3079104423522949,
|
152 |
+
"eval_precision": 0.6712820512820513,
|
153 |
+
"eval_recall": 0.7164750957854407,
|
154 |
+
"eval_runtime": 5.9033,
|
155 |
+
"eval_samples_per_second": 426.708,
|
156 |
+
"eval_steps_per_second": 53.36,
|
157 |
+
"step": 2030
|
158 |
+
},
|
159 |
+
{
|
160 |
+
"epoch": 9.975429975429975,
|
161 |
+
"step": 2030,
|
162 |
+
"total_flos": 6404835399317064.0,
|
163 |
+
"train_loss": 0.04138289297302368,
|
164 |
+
"train_runtime": 1065.756,
|
165 |
+
"train_samples_per_second": 122.101,
|
166 |
+
"train_steps_per_second": 1.905
|
167 |
+
}
|
168 |
+
],
|
169 |
+
"logging_steps": 500,
|
170 |
+
"max_steps": 2030,
|
171 |
+
"num_input_tokens_seen": 0,
|
172 |
+
"num_train_epochs": 10,
|
173 |
+
"save_steps": 500,
|
174 |
+
"stateful_callbacks": {
|
175 |
+
"TrainerControl": {
|
176 |
+
"args": {
|
177 |
+
"should_epoch_stop": false,
|
178 |
+
"should_evaluate": false,
|
179 |
+
"should_log": false,
|
180 |
+
"should_save": true,
|
181 |
+
"should_training_stop": true
|
182 |
+
},
|
183 |
+
"attributes": {}
|
184 |
+
}
|
185 |
+
},
|
186 |
+
"total_flos": 6404835399317064.0,
|
187 |
+
"train_batch_size": 32,
|
188 |
+
"trial_name": null,
|
189 |
+
"trial_params": null
|
190 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13556e6c97b2f39e25d5830ab0bc61ce81f807bcf643d150d23dd97c2f606c57
|
3 |
+
size 5240
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|