Rodrigo1771 commited on
Commit
38648a2
Β·
verified Β·
1 Parent(s): 508fe2a

Training in progress, epoch 1

Browse files
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
3
+ "architectures": [
4
+ "RobertaForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "finetuning_task": "ner",
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "id2label": {
16
+ "0": "O",
17
+ "1": "B-SINTOMA",
18
+ "2": "I-SINTOMA"
19
+ },
20
+ "initializer_range": 0.02,
21
+ "intermediate_size": 3072,
22
+ "label2id": {
23
+ "B-SINTOMA": 1,
24
+ "I-SINTOMA": 2,
25
+ "O": 0
26
+ },
27
+ "layer_norm_eps": 1e-05,
28
+ "max_position_embeddings": 514,
29
+ "model_type": "roberta",
30
+ "num_attention_heads": 12,
31
+ "num_hidden_layers": 12,
32
+ "pad_token_id": 1,
33
+ "position_embedding_type": "absolute",
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.44.2",
36
+ "type_vocab_size": 1,
37
+ "use_cache": true,
38
+ "vocab_size": 50262
39
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ec4ff3150f8b54e9dc6cbef1e83df920e2d9ac69d5474829b2a565f66fd6b29
3
+ size 496244100
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tb/events.out.tfevents.1725474455.a5c501872057.1590.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:678f600fa33f4f3e68d8868c24bbc8754fe1267151c8964a43ea17d33cabab3d
3
+ size 5645
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50261": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "max_len": 512,
52
+ "model_max_length": 512,
53
+ "pad_token": "<pad>",
54
+ "sep_token": "</s>",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "trim_offsets": true,
57
+ "unk_token": "<unk>"
58
+ }
train.log ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0%| | 0/2480 [00:00<?, ?it/s]
1
  0%| | 1/2480 [00:01<1:07:07, 1.62s/it]
2
  0%| | 2/2480 [00:01<33:03, 1.25it/s]
3
  0%| | 3/2480 [00:02<24:43, 1.67it/s]
4
  0%| | 4/2480 [00:02<19:24, 2.13it/s]
5
  0%| | 5/2480 [00:02<15:23, 2.68it/s]
6
  0%| | 6/2480 [00:02<13:36, 3.03it/s]
7
  0%| | 7/2480 [00:03<11:58, 3.44it/s]
8
  0%| | 8/2480 [00:03<10:57, 3.76it/s]
9
  0%| | 9/2480 [00:03<10:29, 3.93it/s]
10
  0%| | 10/2480 [00:03<11:55, 3.45it/s]
11
  0%| | 11/2480 [00:04<12:31, 3.28it/s]
12
  0%| | 12/2480 [00:04<11:38, 3.53it/s]
13
  1%| | 13/2480 [00:04<11:55, 3.45it/s]
14
  1%| | 14/2480 [00:05<10:50, 3.79it/s]
15
  1%| | 15/2480 [00:05<10:23, 3.95it/s]
16
  1%| | 16/2480 [00:05<10:14, 4.01it/s]
17
  1%| | 17/2480 [00:05<10:07, 4.05it/s]
18
  1%| | 18/2480 [00:05<09:43, 4.22it/s]
19
  1%| | 19/2480 [00:06<10:53, 3.77it/s]
20
  1%| | 20/2480 [00:06<10:34, 3.88it/s]
21
  1%| | 21/2480 [00:06<10:42, 3.83it/s]
22
  1%| | 22/2480 [00:06<10:01, 4.09it/s]
23
  1%| | 23/2480 [00:07<09:20, 4.39it/s]
24
  1%| | 24/2480 [00:07<08:53, 4.60it/s]
25
  1%| | 25/2480 [00:07<08:51, 4.62it/s]
26
  1%| | 26/2480 [00:07<08:54, 4.59it/s]
27
  1%| | 27/2480 [00:08<09:03, 4.51it/s]
28
  1%| | 28/2480 [00:08<09:00, 4.54it/s]
29
  1%| | 29/2480 [00:08<09:09, 4.46it/s]
30
  1%| | 30/2480 [00:08<09:54, 4.12it/s]
31
  1%|▏ | 31/2480 [00:08<09:15, 4.41it/s]
32
  1%|▏ | 32/2480 [00:09<09:01, 4.52it/s]
33
  1%|▏ | 33/2480 [00:09<09:02, 4.51it/s]
34
  1%|▏ | 34/2480 [00:09<09:19, 4.37it/s]
35
  1%|▏ | 35/2480 [00:09<09:42, 4.20it/s]
36
  1%|▏ | 36/2480 [00:10<09:13, 4.42it/s]
37
  1%|▏ | 37/2480 [00:10<12:51, 3.16it/s]
38
  2%|▏ | 38/2480 [00:10<11:34, 3.52it/s]
39
  2%|▏ | 39/2480 [00:11<11:25, 3.56it/s]
40
  2%|▏ | 40/2480 [00:11<10:18, 3.95it/s]
41
  2%|▏ | 41/2480 [00:11<09:34, 4.25it/s]
42
  2%|▏ | 42/2480 [00:11<09:16, 4.38it/s]
43
  2%|▏ | 43/2480 [00:11<08:59, 4.52it/s]
44
  2%|▏ | 44/2480 [00:12<10:12, 3.98it/s]
45
  2%|▏ | 45/2480 [00:12<09:52, 4.11it/s]
46
  2%|▏ | 46/2480 [00:12<11:42, 3.46it/s]
47
  2%|▏ | 47/2480 [00:13<10:40, 3.80it/s]
48
  2%|▏ | 48/2480 [00:13<10:11, 3.98it/s]
49
  2%|▏ | 49/2480 [00:13<11:17, 3.59it/s]
50
  2%|▏ | 50/2480 [00:13<11:50, 3.42it/s]
51
  2%|▏ | 51/2480 [00:14<11:00, 3.68it/s]
52
  2%|▏ | 52/2480 [00:14<10:50, 3.73it/s]
53
  2%|▏ | 53/2480 [00:14<10:01, 4.04it/s]
54
  2%|▏ | 54/2480 [00:15<12:09, 3.33it/s]
55
  2%|▏ | 55/2480 [00:15<12:02, 3.36it/s]
56
  2%|▏ | 56/2480 [00:15<10:46, 3.75it/s]
57
  2%|▏ | 57/2480 [00:15<10:55, 3.70it/s]
58
  2%|▏ | 58/2480 [00:16<10:10, 3.97it/s]
59
  2%|▏ | 59/2480 [00:16<09:52, 4.09it/s]
60
  2%|▏ | 60/2480 [00:16<09:39, 4.17it/s]
61
  2%|▏ | 61/2480 [00:16<08:59, 4.48it/s]
62
  2%|β–Ž | 62/2480 [00:16<08:44, 4.61it/s]
63
  3%|β–Ž | 63/2480 [00:17<08:49, 4.56it/s]
64
  3%|β–Ž | 64/2480 [00:17<10:09, 3.97it/s]
65
  3%|β–Ž | 65/2480 [00:17<09:53, 4.07it/s]
66
  3%|β–Ž | 66/2480 [00:17<10:26, 3.85it/s]
67
  3%|β–Ž | 67/2480 [00:18<10:40, 3.77it/s]
68
  3%|β–Ž | 68/2480 [00:18<10:02, 4.00it/s]
69
  3%|β–Ž | 69/2480 [00:18<09:33, 4.21it/s]
70
  3%|β–Ž | 70/2480 [00:18<09:27, 4.25it/s]
71
  3%|β–Ž | 71/2480 [00:19<09:44, 4.12it/s]
72
  3%|β–Ž | 72/2480 [00:19<10:20, 3.88it/s]
73
  3%|β–Ž | 73/2480 [00:19<09:47, 4.10it/s]
74
  3%|β–Ž | 74/2480 [00:20<11:34, 3.47it/s]
75
  3%|β–Ž | 75/2480 [00:20<11:25, 3.51it/s]
76
  3%|β–Ž | 76/2480 [00:20<11:33, 3.47it/s]
77
  3%|β–Ž | 77/2480 [00:20<10:47, 3.71it/s]
78
  3%|β–Ž | 78/2480 [00:21<10:10, 3.93it/s]
79
  3%|β–Ž | 79/2480 [00:21<09:52, 4.05it/s]
80
  3%|β–Ž | 80/2480 [00:21<09:47, 4.09it/s]
81
  3%|β–Ž | 81/2480 [00:21<11:08, 3.59it/s]
82
  3%|β–Ž | 82/2480 [00:22<10:41, 3.74it/s]
83
  3%|β–Ž | 83/2480 [00:22<09:51, 4.05it/s]
84
  3%|β–Ž | 84/2480 [00:22<10:24, 3.83it/s]
85
  3%|β–Ž | 85/2480 [00:22<10:22, 3.85it/s]
86
  3%|β–Ž | 86/2480 [00:23<09:52, 4.04it/s]
87
  4%|β–Ž | 87/2480 [00:23<10:24, 3.83it/s]
88
  4%|β–Ž | 88/2480 [00:23<09:52, 4.04it/s]
89
  4%|β–Ž | 89/2480 [00:23<10:02, 3.97it/s]
90
  4%|β–Ž | 90/2480 [00:24<11:55, 3.34it/s]
91
  4%|β–Ž | 91/2480 [00:24<11:08, 3.57it/s]
92
  4%|β–Ž | 92/2480 [00:24<11:00, 3.62it/s]
93
  4%|▍ | 93/2480 [00:25<10:41, 3.72it/s]
94
  4%|▍ | 94/2480 [00:25<10:17, 3.87it/s]
95
  4%|▍ | 95/2480 [00:25<10:17, 3.86it/s]
96
  4%|▍ | 96/2480 [00:25<09:22, 4.24it/s]
97
  4%|▍ | 97/2480 [00:25<09:48, 4.05it/s]
98
  4%|▍ | 98/2480 [00:26<08:58, 4.43it/s]
99
  4%|▍ | 99/2480 [00:26<10:07, 3.92it/s]
100
  4%|▍ | 100/2480 [00:26<09:27, 4.19it/s]
101
  4%|▍ | 101/2480 [00:26<09:08, 4.33it/s]
102
  4%|▍ | 102/2480 [00:27<09:17, 4.27it/s]
103
  4%|▍ | 103/2480 [00:27<08:58, 4.41it/s]
104
  4%|▍ | 104/2480 [00:27<08:55, 4.44it/s]
105
  4%|▍ | 105/2480 [00:27<09:26, 4.19it/s]
106
  4%|▍ | 106/2480 [00:28<09:53, 4.00it/s]
107
  4%|▍ | 107/2480 [00:28<09:29, 4.17it/s]
108
  4%|▍ | 108/2480 [00:28<09:57, 3.97it/s]
109
  4%|▍ | 109/2480 [00:28<09:08, 4.32it/s]
110
  4%|▍ | 110/2480 [00:29<09:11, 4.30it/s]
111
  4%|▍ | 111/2480 [00:29<09:05, 4.34it/s]
112
  5%|▍ | 112/2480 [00:29<08:44, 4.51it/s]
113
  5%|▍ | 113/2480 [00:29<08:33, 4.61it/s]
114
  5%|▍ | 114/2480 [00:29<08:39, 4.55it/s]
115
  5%|▍ | 115/2480 [00:30<08:11, 4.81it/s]
116
  5%|▍ | 116/2480 [00:30<08:29, 4.64it/s]
117
  5%|▍ | 117/2480 [00:30<08:45, 4.50it/s]
118
  5%|▍ | 118/2480 [00:30<10:20, 3.81it/s]
119
  5%|▍ | 119/2480 [00:31<10:47, 3.65it/s]
120
  5%|▍ | 120/2480 [00:31<10:03, 3.91it/s]
121
  5%|▍ | 121/2480 [00:31<11:11, 3.51it/s]
122
  5%|▍ | 122/2480 [00:31<10:07, 3.88it/s]
123
  5%|▍ | 123/2480 [00:32<09:46, 4.02it/s]
124
  5%|β–Œ | 124/2480 [00:32<09:41, 4.05it/s]
125
  5%|β–Œ | 125/2480 [00:32<09:50, 3.99it/s]
126
  5%|β–Œ | 126/2480 [00:32<10:29, 3.74it/s]
127
  5%|β–Œ | 127/2480 [00:33<11:12, 3.50it/s]
128
  5%|β–Œ | 128/2480 [00:33<11:23, 3.44it/s]
129
  5%|β–Œ | 129/2480 [00:33<11:44, 3.34it/s]
130
  5%|β–Œ | 130/2480 [00:34<10:48, 3.62it/s]
131
  5%|β–Œ | 131/2480 [00:34<10:45, 3.64it/s]
132
  5%|β–Œ | 132/2480 [00:34<12:09, 3.22it/s]
133
  5%|β–Œ | 133/2480 [00:35<11:14, 3.48it/s]
134
  5%|β–Œ | 134/2480 [00:35<10:11, 3.83it/s]
135
  5%|β–Œ | 135/2480 [00:35<09:29, 4.12it/s]
136
  5%|β–Œ | 136/2480 [00:35<08:55, 4.38it/s]
137
  6%|β–Œ | 137/2480 [00:35<08:53, 4.39it/s]
138
  6%|β–Œ | 138/2480 [00:36<09:30, 4.10it/s]
139
  6%|β–Œ | 139/2480 [00:36<09:22, 4.16it/s]
140
  6%|β–Œ | 140/2480 [00:36<09:46, 3.99it/s]
141
  6%|β–Œ | 141/2480 [00:36<10:17, 3.79it/s]
142
  6%|β–Œ | 142/2480 [00:37<09:19, 4.18it/s]
143
  6%|β–Œ | 143/2480 [00:37<09:22, 4.15it/s]
144
  6%|β–Œ | 144/2480 [00:37<09:46, 3.98it/s]
145
  6%|β–Œ | 145/2480 [00:37<09:36, 4.05it/s]
146
  6%|β–Œ | 146/2480 [00:38<09:16, 4.19it/s]
147
  6%|β–Œ | 147/2480 [00:38<08:56, 4.35it/s]
148
  6%|β–Œ | 148/2480 [00:38<09:01, 4.31it/s]
149
  6%|β–Œ | 149/2480 [00:38<08:46, 4.43it/s]
150
  6%|β–Œ | 150/2480 [00:39<09:03, 4.29it/s]
151
  6%|β–Œ | 151/2480 [00:39<08:38, 4.50it/s]
152
  6%|β–Œ | 152/2480 [00:39<08:57, 4.33it/s]
153
  6%|β–Œ | 153/2480 [00:39<09:12, 4.21it/s]
154
  6%|β–Œ | 154/2480 [00:39<09:25, 4.11it/s]
155
  6%|β–‹ | 155/2480 [00:40<09:55, 3.91it/s]
156
  6%|β–‹ | 156/2480 [00:40<09:35, 4.04it/s]
157
  6%|β–‹ | 157/2480 [00:40<09:05, 4.26it/s]
158
  6%|β–‹ | 158/2480 [00:40<08:57, 4.32it/s]
159
  6%|β–‹ | 159/2480 [00:41<09:42, 3.98it/s]
160
  6%|β–‹ | 160/2480 [00:41<09:12, 4.20it/s]
161
  6%|β–‹ | 161/2480 [00:41<08:26, 4.58it/s]
162
  7%|β–‹ | 162/2480 [00:41<08:04, 4.78it/s]
163
  7%|β–‹ | 163/2480 [00:42<08:28, 4.56it/s]
164
  7%|β–‹ | 164/2480 [00:42<09:54, 3.90it/s]
165
  7%|β–‹ | 165/2480 [00:42<10:08, 3.81it/s]
166
  7%|β–‹ | 166/2480 [00:42<09:49, 3.93it/s]
167
  7%|β–‹ | 167/2480 [00:43<09:09, 4.21it/s]
168
  7%|β–‹ | 168/2480 [00:43<09:38, 4.00it/s]
169
  7%|β–‹ | 169/2480 [00:43<09:07, 4.22it/s]
170
  7%|β–‹ | 170/2480 [00:43<09:17, 4.14it/s]
171
  7%|β–‹ | 171/2480 [00:44<09:21, 4.11it/s]
172
  7%|β–‹ | 172/2480 [00:44<09:06, 4.22it/s]
173
  7%|β–‹ | 173/2480 [00:44<08:49, 4.36it/s]
174
  7%|β–‹ | 174/2480 [00:44<09:14, 4.16it/s]
175
  7%|β–‹ | 175/2480 [00:45<09:11, 4.18it/s]
176
  7%|β–‹ | 176/2480 [00:45<08:46, 4.38it/s]
177
  7%|β–‹ | 177/2480 [00:45<08:29, 4.52it/s]
178
  7%|β–‹ | 178/2480 [00:45<08:04, 4.76it/s]
179
  7%|β–‹ | 179/2480 [00:45<07:58, 4.81it/s]
180
  7%|β–‹ | 180/2480 [00:46<08:13, 4.66it/s]
181
  7%|β–‹ | 181/2480 [00:46<08:21, 4.58it/s]
182
  7%|β–‹ | 182/2480 [00:46<08:41, 4.41it/s]
183
  7%|β–‹ | 183/2480 [00:46<08:39, 4.42it/s]
184
  7%|β–‹ | 184/2480 [00:46<08:42, 4.39it/s]
185
  7%|β–‹ | 185/2480 [00:47<08:31, 4.48it/s]
186
  8%|β–Š | 186/2480 [00:47<08:22, 4.56it/s]
187
  8%|β–Š | 187/2480 [00:47<08:10, 4.68it/s]
188
  8%|β–Š | 188/2480 [00:47<08:16, 4.61it/s]
189
  8%|β–Š | 189/2480 [00:48<08:21, 4.57it/s]
190
  8%|β–Š | 190/2480 [00:48<08:52, 4.30it/s]
191
  8%|β–Š | 191/2480 [00:48<08:34, 4.45it/s]
192
  8%|β–Š | 192/2480 [00:48<08:58, 4.25it/s]
193
  8%|β–Š | 193/2480 [00:48<08:32, 4.47it/s]
194
  8%|β–Š | 194/2480 [00:49<09:04, 4.20it/s]
195
  8%|β–Š | 195/2480 [00:49<09:47, 3.89it/s]
196
  8%|β–Š | 196/2480 [00:49<09:36, 3.96it/s]
197
  8%|β–Š | 197/2480 [00:50<11:04, 3.44it/s]
198
  8%|β–Š | 198/2480 [00:50<10:01, 3.80it/s]
199
  8%|β–Š | 199/2480 [00:50<09:13, 4.12it/s]
200
  8%|β–Š | 200/2480 [00:50<08:43, 4.35it/s]
201
  8%|β–Š | 201/2480 [00:50<08:30, 4.47it/s]
202
  8%|β–Š | 202/2480 [00:51<08:31, 4.46it/s]
203
  8%|β–Š | 203/2480 [00:51<10:17, 3.69it/s]
204
  8%|β–Š | 204/2480 [00:51<10:29, 3.62it/s]
205
  8%|β–Š | 205/2480 [00:52<09:39, 3.93it/s]
206
  8%|β–Š | 206/2480 [00:52<09:18, 4.07it/s]
207
  8%|β–Š | 207/2480 [00:52<09:36, 3.94it/s]
208
  8%|β–Š | 208/2480 [00:52<10:23, 3.65it/s]
209
  8%|β–Š | 209/2480 [00:53<10:22, 3.65it/s]
210
  8%|β–Š | 210/2480 [00:53<09:54, 3.82it/s]
211
  9%|β–Š | 211/2480 [00:53<09:31, 3.97it/s]
212
  9%|β–Š | 212/2480 [00:53<09:37, 3.93it/s]
213
  9%|β–Š | 213/2480 [00:54<09:25, 4.01it/s]
214
  9%|β–Š | 214/2480 [00:54<09:16, 4.07it/s]
215
  9%|β–Š | 215/2480 [00:54<09:42, 3.89it/s]
216
  9%|β–Š | 216/2480 [00:54<09:37, 3.92it/s]
217
  9%|β–‰ | 217/2480 [00:55<09:27, 3.99it/s]
218
  9%|β–‰ | 218/2480 [00:55<09:32, 3.95it/s]
219
  9%|β–‰ | 219/2480 [00:55<08:53, 4.24it/s]
220
  9%|β–‰ | 220/2480 [00:55<10:10, 3.70it/s]
221
  9%|β–‰ | 221/2480 [00:56<09:48, 3.84it/s]
222
  9%|β–‰ | 222/2480 [00:56<08:54, 4.23it/s]
223
  9%|β–‰ | 223/2480 [00:56<09:11, 4.09it/s]
224
  9%|β–‰ | 224/2480 [00:56<09:01, 4.17it/s]
225
  9%|β–‰ | 225/2480 [00:57<09:45, 3.85it/s]
226
  9%|β–‰ | 226/2480 [00:57<09:24, 4.00it/s]
227
  9%|β–‰ | 227/2480 [00:57<09:26, 3.98it/s]
228
  9%|β–‰ | 228/2480 [00:57<08:58, 4.19it/s]
229
  9%|β–‰ | 229/2480 [00:58<08:33, 4.39it/s]
230
  9%|β–‰ | 230/2480 [00:58<08:21, 4.48it/s]
231
  9%|β–‰ | 231/2480 [00:58<08:32, 4.39it/s]
232
  9%|β–‰ | 232/2480 [00:58<09:05, 4.12it/s]
233
  9%|β–‰ | 233/2480 [00:59<08:56, 4.19it/s]
234
  9%|β–‰ | 234/2480 [00:59<09:25, 3.97it/s]
235
  9%|β–‰ | 235/2480 [00:59<09:12, 4.07it/s]
236
  10%|β–‰ | 236/2480 [00:59<10:24, 3.60it/s]
237
  10%|β–‰ | 237/2480 [01:00<09:34, 3.90it/s]
238
  10%|β–‰ | 238/2480 [01:00<09:06, 4.11it/s]
239
  10%|β–‰ | 239/2480 [01:00<08:58, 4.16it/s]
240
  10%|β–‰ | 240/2480 [01:00<08:21, 4.47it/s]
241
  10%|β–‰ | 241/2480 [01:00<07:53, 4.73it/s]
242
  10%|β–‰ | 242/2480 [01:01<08:08, 4.58it/s]
243
  10%|β–‰ | 243/2480 [01:01<08:34, 4.35it/s]
244
  10%|β–‰ | 244/2480 [01:01<09:04, 4.10it/s]
245
  10%|β–‰ | 245/2480 [01:01<09:14, 4.03it/s]
246
  10%|β–‰ | 246/2480 [01:02<08:50, 4.21it/s]
247
  10%|β–‰ | 247/2480 [01:02<08:32, 4.36it/s]
248
  10%|β–ˆ | 248/2480 [01:02<07:59, 4.65it/s][INFO|trainer.py:811] 2024-09-04 18:28:37,832 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
 
 
 
 
 
 
249
  0%| | 0/315 [00:00<?, ?it/s]
 
250
  3%|β–Ž | 9/315 [00:00<00:03, 87.62it/s]
 
251
  6%|β–Œ | 18/315 [00:00<00:03, 84.29it/s]
 
252
  9%|β–Š | 27/315 [00:00<00:03, 82.00it/s]
 
253
  11%|β–ˆβ– | 36/315 [00:00<00:03, 76.52it/s]
 
254
  14%|β–ˆβ– | 45/315 [00:00<00:03, 78.83it/s]
 
255
  17%|β–ˆβ–‹ | 54/315 [00:00<00:03, 79.38it/s]
 
256
  20%|β–ˆβ–ˆ | 63/315 [00:00<00:03, 78.44it/s]
 
257
  23%|β–ˆβ–ˆβ–Ž | 72/315 [00:00<00:03, 80.31it/s]
 
258
  26%|β–ˆβ–ˆβ–Œ | 81/315 [00:01<00:02, 79.22it/s]
 
259
  28%|β–ˆβ–ˆβ–Š | 89/315 [00:01<00:02, 78.97it/s]
 
260
  31%|β–ˆβ–ˆβ–ˆ | 97/315 [00:01<00:02, 78.20it/s]
 
261
  34%|β–ˆβ–ˆβ–ˆβ–Ž | 106/315 [00:01<00:02, 80.06it/s]
 
262
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 115/315 [00:01<00:02, 81.29it/s]
 
263
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 124/315 [00:01<00:02, 78.38it/s]
 
264
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 132/315 [00:01<00:02, 78.59it/s]
 
265
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 140/315 [00:01<00:02, 77.86it/s]
 
266
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 148/315 [00:01<00:02, 75.27it/s]
 
267
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 156/315 [00:01<00:02, 73.46it/s]
 
268
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 164/315 [00:02<00:02, 75.03it/s]
 
269
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 173/315 [00:02<00:01, 76.99it/s]
 
270
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 181/315 [00:02<00:01, 77.31it/s]
 
271
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 190/315 [00:02<00:01, 78.87it/s]
 
272
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 198/315 [00:02<00:01, 78.00it/s]
 
273
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 206/315 [00:02<00:01, 78.04it/s]
 
274
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 214/315 [00:02<00:01, 75.23it/s]
 
275
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 222/315 [00:02<00:01, 73.53it/s]
 
276
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 230/315 [00:02<00:01, 72.57it/s]
 
277
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 239/315 [00:03<00:01, 74.60it/s]
 
278
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 247/315 [00:03<00:00, 74.88it/s]
 
279
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 256/315 [00:03<00:00, 76.66it/s]
 
280
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 265/315 [00:03<00:00, 78.61it/s]
 
281
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 274/315 [00:03<00:00, 81.06it/s]
 
282
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 283/315 [00:03<00:00, 82.28it/s]
 
283
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 292/315 [00:03<00:00, 79.10it/s]
 
284
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 301/315 [00:03<00:00, 80.22it/s]
 
285
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 310/315 [00:03<00:00, 81.05it/s]
286
 
 
287
 
288
  10%|β–ˆ | 248/2480 [01:08<07:59, 4.65it/s]
 
 
289
  [INFO|trainer.py:3503] 2024-09-04 18:28:43,350 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-248
 
 
 
 
 
 
 
290
  10%|β–ˆ | 249/2480 [01:11<1:44:55, 2.82s/it]
291
  10%|β–ˆ | 250/2480 [01:11<1:15:54, 2.04s/it]
292
  10%|β–ˆ | 251/2480 [01:11<55:54, 1.51s/it]
293
  10%|β–ˆ | 252/2480 [01:12<41:42, 1.12s/it]
294
  10%|β–ˆ | 253/2480 [01:12<31:32, 1.18it/s]
295
  10%|β–ˆ | 254/2480 [01:12<24:51, 1.49it/s]
296
  10%|β–ˆ | 255/2480 [01:12<21:00, 1.77it/s]
297
  10%|β–ˆ | 256/2480 [01:13<17:50, 2.08it/s]
298
  10%|β–ˆ | 257/2480 [01:13<15:06, 2.45it/s]
299
  10%|β–ˆ | 258/2480 [01:13<12:55, 2.87it/s]
300
  10%|β–ˆ | 259/2480 [01:13<11:19, 3.27it/s]
301
  10%|β–ˆ | 260/2480 [01:14<10:42, 3.45it/s]
302
  11%|β–ˆ | 261/2480 [01:14<10:07, 3.65it/s]
303
  11%|β–ˆ | 262/2480 [01:14<09:25, 3.92it/s]
304
  11%|β–ˆ | 263/2480 [01:14<09:55, 3.72it/s]
305
  11%|β–ˆ | 264/2480 [01:15<10:10, 3.63it/s]
306
  11%|β–ˆ | 265/2480 [01:15<10:24, 3.55it/s]
307
  11%|β–ˆ | 266/2480 [01:15<09:56, 3.71it/s]
308
  11%|β–ˆ | 267/2480 [01:15<09:49, 3.75it/s]
309
  11%|β–ˆ | 268/2480 [01:16<09:37, 3.83it/s]
310
  11%|β–ˆ | 269/2480 [01:16<09:43, 3.79it/s]
311
  11%|β–ˆ | 270/2480 [01:16<09:21, 3.94it/s]
312
  11%|β–ˆ | 271/2480 [01:16<09:51, 3.74it/s]
313
  11%|β–ˆ | 272/2480 [01:17<09:45, 3.77it/s]
314
  11%|β–ˆ | 273/2480 [01:17<09:37, 3.82it/s]
315
  11%|β–ˆ | 274/2480 [01:17<09:15, 3.97it/s]
316
  11%|β–ˆ | 275/2480 [01:18<09:48, 3.75it/s]
317
  11%|β–ˆ | 276/2480 [01:18<09:30, 3.86it/s]
318
  11%|β–ˆ | 277/2480 [01:18<08:50, 4.15it/s]
319
  11%|β–ˆ | 278/2480 [01:18<08:05, 4.53it/s]
320
  11%|β–ˆβ– | 279/2480 [01:18<09:13, 3.98it/s]
321
  11%|β–ˆβ– | 280/2480 [01:19<08:55, 4.11it/s]
322
  11%|β–ˆβ– | 281/2480 [01:19<08:30, 4.31it/s]
323
  11%|β–ˆβ– | 282/2480 [01:19<08:31, 4.29it/s]
324
  11%|β–ˆβ– | 283/2480 [01:19<08:42, 4.21it/s]
325
  11%|β–ˆβ– | 284/2480 [01:20<08:57, 4.09it/s]
326
  11%|β–ˆβ– | 285/2480 [01:20<08:46, 4.17it/s]
327
  12%|β–ˆβ– | 286/2480 [01:20<08:15, 4.43it/s]
328
  12%|β–ˆβ– | 287/2480 [01:20<07:45, 4.71it/s]
329
  12%|β–ˆβ– | 288/2480 [01:21<08:39, 4.22it/s]
330
  12%|β–ˆβ– | 289/2480 [01:21<09:57, 3.67it/s]
331
  12%|β–ˆβ– | 290/2480 [01:21<09:34, 3.81it/s]
332
  12%|β–ˆβ– | 291/2480 [01:21<09:32, 3.82it/s]
333
  12%|β–ˆβ– | 292/2480 [01:22<09:35, 3.80it/s]
334
  12%|β–ˆβ– | 293/2480 [01:22<09:23, 3.88it/s]
335
  12%|β–ˆβ– | 294/2480 [01:22<08:56, 4.08it/s]
336
  12%|β–ˆβ– | 295/2480 [01:22<09:27, 3.85it/s]
337
  12%|β–ˆβ– | 296/2480 [01:23<08:49, 4.12it/s]
338
  12%|β–ˆβ– | 297/2480 [01:23<08:55, 4.08it/s]
339
  12%|β–ˆβ– | 298/2480 [01:23<08:33, 4.25it/s]
340
  12%|β–ˆβ– | 299/2480 [01:23<09:39, 3.76it/s]
341
  12%|β–ˆβ– | 300/2480 [01:24<09:25, 3.86it/s]
342
  12%|β–ˆβ– | 301/2480 [01:24<08:34, 4.23it/s]
343
  12%|β–ˆβ– | 302/2480 [01:24<07:59, 4.54it/s]
344
  12%|β–ˆβ– | 303/2480 [01:25<11:53, 3.05it/s]
345
  12%|β–ˆβ– | 304/2480 [01:25<11:51, 3.06it/s]
346
  12%|β–ˆβ– | 305/2480 [01:25<10:15, 3.54it/s]
347
  12%|β–ˆβ– | 306/2480 [01:25<09:49, 3.69it/s]
348
  12%|β–ˆβ– | 307/2480 [01:26<09:18, 3.89it/s]
349
  12%|β–ˆβ– | 308/2480 [01:26<09:14, 3.92it/s]
350
  12%|β–ˆβ– | 309/2480 [01:26<08:48, 4.11it/s]
351
  12%|β–ˆβ–Ž | 310/2480 [01:26<08:11, 4.42it/s]
352
  13%|β–ˆβ–Ž | 311/2480 [01:26<07:49, 4.62it/s]
353
  13%|β–ˆβ–Ž | 312/2480 [01:27<07:32, 4.79it/s]
354
  13%|β–ˆβ–Ž | 313/2480 [01:27<07:56, 4.55it/s]
355
  13%|β–ˆβ–Ž | 314/2480 [01:27<07:45, 4.65it/s]
356
  13%|β–ˆβ–Ž | 315/2480 [01:27<09:12, 3.92it/s]
357
  13%|β–ˆβ–Ž | 316/2480 [01:28<09:02, 3.99it/s]
358
  13%|β–ˆβ–Ž | 317/2480 [01:28<08:47, 4.10it/s]
359
  13%|β–ˆβ–Ž | 318/2480 [01:28<09:01, 3.99it/s]
360
  13%|β–ˆβ–Ž | 319/2480 [01:28<08:19, 4.32it/s]
361
  13%|β–ˆβ–Ž | 320/2480 [01:29<08:30, 4.23it/s]
362
  13%|β–ˆβ–Ž | 321/2480 [01:29<08:24, 4.28it/s]
363
  13%|β–ˆβ–Ž | 322/2480 [01:29<09:15, 3.89it/s]
364
  13%|β–ˆβ–Ž | 323/2480 [01:29<09:26, 3.81it/s]
365
  13%|β–ˆβ–Ž | 324/2480 [01:30<10:55, 3.29it/s]
366
  13%|β–ˆβ–Ž | 325/2480 [01:30<09:29, 3.78it/s]
367
  13%|β–ˆβ–Ž | 326/2480 [01:30<11:22, 3.16it/s]
368
  13%|β–ˆβ–Ž | 327/2480 [01:31<10:10, 3.52it/s]
369
  13%|β–ˆβ–Ž | 328/2480 [01:31<09:23, 3.82it/s]
370
  13%|β–ˆβ–Ž | 329/2480 [01:31<09:28, 3.79it/s]
371
  13%|β–ˆβ–Ž | 330/2480 [01:31<09:19, 3.84it/s]
372
  13%|β–ˆβ–Ž | 331/2480 [01:32<08:50, 4.05it/s]
373
  13%|β–ˆβ–Ž | 332/2480 [01:32<08:49, 4.05it/s]
374
  13%|β–ˆβ–Ž | 333/2480 [01:32<09:10, 3.90it/s]
375
  13%|β–ˆβ–Ž | 334/2480 [01:32<08:54, 4.02it/s]
376
  14%|β–ˆβ–Ž | 335/2480 [01:33<09:15, 3.86it/s]
377
  14%|β–ˆβ–Ž | 336/2480 [01:33<08:58, 3.98it/s]
378
  14%|β–ˆβ–Ž | 337/2480 [01:33<08:27, 4.22it/s]
379
  14%|β–ˆβ–Ž | 338/2480 [01:33<09:11, 3.89it/s]
380
  14%|β–ˆβ–Ž | 339/2480 [01:34<08:56, 3.99it/s]
381
  14%|β–ˆβ–Ž | 340/2480 [01:34<08:26, 4.22it/s]
382
  14%|β–ˆβ– | 341/2480 [01:34<08:03, 4.42it/s]
383
  14%|β–ˆβ– | 342/2480 [01:34<08:36, 4.14it/s]
384
  14%|β–ˆβ– | 343/2480 [01:35<08:30, 4.18it/s]
385
  14%|β–ˆβ– | 344/2480 [01:35<09:19, 3.81it/s]
386
  14%|β–ˆβ– | 345/2480 [01:35<08:34, 4.15it/s]
387
  14%|β–ˆβ– | 346/2480 [01:35<08:08, 4.36it/s]
388
  14%|β–ˆβ– | 347/2480 [01:35<07:56, 4.48it/s]
389
  14%|β–ˆβ– | 348/2480 [01:36<07:23, 4.81it/s]
390
  14%|β–ˆβ– | 349/2480 [01:36<07:13, 4.91it/s]
391
  14%|β–ˆβ– | 350/2480 [01:36<07:20, 4.84it/s]
392
  14%|β–ˆβ– | 351/2480 [01:36<07:24, 4.78it/s]
393
  14%|β–ˆβ– | 352/2480 [01:36<08:01, 4.42it/s]
394
  14%|β–ˆβ– | 353/2480 [01:37<08:36, 4.11it/s]
395
  14%|β–ˆβ– | 354/2480 [01:37<08:28, 4.18it/s]
396
  14%|β–ˆβ– | 355/2480 [01:37<08:44, 4.05it/s]
397
  14%|β–ˆβ– | 356/2480 [01:37<07:58, 4.44it/s]
398
  14%|β–ˆβ– | 357/2480 [01:38<08:08, 4.35it/s]
399
  14%|β–ˆβ– | 358/2480 [01:38<07:42, 4.59it/s]
400
  14%|β–ˆβ– | 359/2480 [01:38<07:46, 4.55it/s]
401
  15%|β–ˆβ– | 360/2480 [01:38<07:13, 4.89it/s]
402
  15%|β–ˆβ– | 361/2480 [01:39<07:37, 4.63it/s]
403
  15%|β–ˆβ– | 362/2480 [01:39<08:03, 4.38it/s]
404
  15%|β–ˆβ– | 363/2480 [01:39<08:08, 4.34it/s]
405
  15%|β–ˆβ– | 364/2480 [01:39<08:10, 4.31it/s]
406
  15%|β–ˆβ– | 365/2480 [01:39<07:53, 4.46it/s]
407
  15%|β–ˆβ– | 366/2480 [01:40<08:12, 4.29it/s]
408
  15%|β–ˆβ– | 367/2480 [01:40<07:50, 4.49it/s]
409
  15%|β–ˆβ– | 368/2480 [01:40<09:52, 3.56it/s]
410
  15%|β–ˆβ– | 369/2480 [01:41<09:27, 3.72it/s]
411
  15%|β–ˆβ– | 370/2480 [01:41<09:19, 3.77it/s]
412
  15%|β–ˆβ– | 371/2480 [01:41<09:02, 3.89it/s]
413
  15%|β–ˆβ–Œ | 372/2480 [01:41<08:39, 4.06it/s]
414
  15%|β–ˆβ–Œ | 373/2480 [01:42<09:22, 3.75it/s]
415
  15%|β–ˆβ–Œ | 374/2480 [01:42<08:36, 4.08it/s]
416
  15%|β–ˆβ–Œ | 375/2480 [01:42<09:23, 3.74it/s]
417
  15%|β–ˆβ–Œ | 376/2480 [01:42<08:49, 3.97it/s]
418
  15%|β–ˆβ–Œ | 377/2480 [01:43<08:18, 4.22it/s]
419
  15%|β–ˆβ–Œ | 378/2480 [01:43<07:47, 4.49it/s]
420
  15%|β–ˆβ–Œ | 379/2480 [01:43<08:04, 4.34it/s]
421
  15%|β–ˆβ–Œ | 380/2480 [01:43<08:11, 4.27it/s]
422
  15%|β–ˆβ–Œ | 381/2480 [01:43<08:26, 4.14it/s]
423
  15%|β–ˆβ–Œ | 382/2480 [01:44<08:41, 4.02it/s]
424
  15%|β–ˆβ–Œ | 383/2480 [01:44<08:20, 4.19it/s]
425
  15%|β–ˆβ–Œ | 384/2480 [01:44<08:18, 4.21it/s]
426
  16%|β–ˆβ–Œ | 385/2480 [01:44<08:01, 4.35it/s]
427
  16%|β–ˆβ–Œ | 386/2480 [01:45<08:05, 4.32it/s]
428
  16%|β–ˆβ–Œ | 387/2480 [01:45<08:09, 4.28it/s]
429
  16%|β–ˆβ–Œ | 388/2480 [01:45<07:40, 4.54it/s]
430
  16%|β–ˆβ–Œ | 389/2480 [01:45<07:19, 4.76it/s]
431
  16%|β–ˆβ–Œ | 390/2480 [01:46<08:15, 4.21it/s]
432
  16%|β–ˆβ–Œ | 391/2480 [01:46<08:16, 4.21it/s]
433
  16%|β–ˆβ–Œ | 392/2480 [01:46<08:03, 4.32it/s]
434
  16%|β–ˆβ–Œ | 393/2480 [01:46<07:37, 4.56it/s]
435
  16%|β–ˆβ–Œ | 394/2480 [01:46<07:51, 4.42it/s]
436
  16%|β–ˆβ–Œ | 395/2480 [01:47<07:30, 4.63it/s]
437
  16%|β–ˆβ–Œ | 396/2480 [01:47<07:22, 4.71it/s]
438
  16%|β–ˆβ–Œ | 397/2480 [01:47<07:09, 4.85it/s]
439
  16%|β–ˆβ–Œ | 398/2480 [01:47<07:15, 4.78it/s]
440
  16%|β–ˆβ–Œ | 399/2480 [01:47<07:29, 4.63it/s]
441
  16%|β–ˆβ–Œ | 400/2480 [01:48<08:38, 4.01it/s]
442
  16%|β–ˆβ–Œ | 401/2480 [01:48<08:26, 4.11it/s]
443
  16%|β–ˆβ–Œ | 402/2480 [01:48<08:07, 4.26it/s]
444
  16%|β–ˆβ–‹ | 403/2480 [01:49<09:51, 3.51it/s]
445
  16%|β–ˆβ–‹ | 404/2480 [01:49<09:03, 3.82it/s]
446
  16%|β–ˆβ–‹ | 405/2480 [01:49<09:01, 3.83it/s]
447
  16%|β–ˆβ–‹ | 406/2480 [01:49<08:35, 4.02it/s]
448
  16%|β–ˆβ–‹ | 407/2480 [01:50<09:48, 3.52it/s]
449
  16%|β–ˆβ–‹ | 408/2480 [01:50<10:05, 3.42it/s]
450
  16%|β–ˆβ–‹ | 409/2480 [01:50<09:18, 3.71it/s]
451
  17%|β–ˆβ–‹ | 410/2480 [01:50<08:44, 3.95it/s]
452
  17%|β–ˆβ–‹ | 411/2480 [01:51<09:09, 3.76it/s]
453
  17%|β–ˆβ–‹ | 412/2480 [01:51<09:02, 3.81it/s]
454
  17%|β–ˆβ–‹ | 413/2480 [01:51<08:49, 3.90it/s]
455
  17%|β–ˆβ–‹ | 414/2480 [01:51<08:50, 3.89it/s]
456
  17%|β–ˆβ–‹ | 415/2480 [01:52<08:39, 3.97it/s]
 
1
+ 2024-09-04 18:26:58.019800: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2
+ 2024-09-04 18:26:58.038161: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
3
+ 2024-09-04 18:26:58.059897: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
4
+ 2024-09-04 18:26:58.066439: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
5
+ 2024-09-04 18:26:58.082659: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
6
+ To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
7
+ 2024-09-04 18:26:59.362821: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
8
+ /usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of πŸ€— Transformers. Use `eval_strategy` instead
9
+ warnings.warn(
10
+ 09/04/2024 18:27:00 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
11
+ 09/04/2024 18:27:00 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
12
+ _n_gpu=1,
13
+ accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
14
+ adafactor=False,
15
+ adam_beta1=0.9,
16
+ adam_beta2=0.999,
17
+ adam_epsilon=1e-08,
18
+ auto_find_batch_size=False,
19
+ batch_eval_metrics=False,
20
+ bf16=False,
21
+ bf16_full_eval=False,
22
+ data_seed=None,
23
+ dataloader_drop_last=False,
24
+ dataloader_num_workers=0,
25
+ dataloader_persistent_workers=False,
26
+ dataloader_pin_memory=True,
27
+ dataloader_prefetch_factor=None,
28
+ ddp_backend=None,
29
+ ddp_broadcast_buffers=None,
30
+ ddp_bucket_cap_mb=None,
31
+ ddp_find_unused_parameters=None,
32
+ ddp_timeout=1800,
33
+ debug=[],
34
+ deepspeed=None,
35
+ disable_tqdm=False,
36
+ dispatch_batches=None,
37
+ do_eval=True,
38
+ do_predict=True,
39
+ do_train=True,
40
+ eval_accumulation_steps=None,
41
+ eval_delay=0,
42
+ eval_do_concat_batches=True,
43
+ eval_on_start=False,
44
+ eval_steps=None,
45
+ eval_strategy=epoch,
46
+ eval_use_gather_object=False,
47
+ evaluation_strategy=epoch,
48
+ fp16=False,
49
+ fp16_backend=auto,
50
+ fp16_full_eval=False,
51
+ fp16_opt_level=O1,
52
+ fsdp=[],
53
+ fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
54
+ fsdp_min_num_params=0,
55
+ fsdp_transformer_layer_cls_to_wrap=None,
56
+ full_determinism=False,
57
+ gradient_accumulation_steps=2,
58
+ gradient_checkpointing=False,
59
+ gradient_checkpointing_kwargs=None,
60
+ greater_is_better=True,
61
+ group_by_length=False,
62
+ half_precision_backend=auto,
63
+ hub_always_push=False,
64
+ hub_model_id=None,
65
+ hub_private_repo=False,
66
+ hub_strategy=every_save,
67
+ hub_token=<HUB_TOKEN>,
68
+ ignore_data_skip=False,
69
+ include_inputs_for_metrics=False,
70
+ include_num_input_tokens_seen=False,
71
+ include_tokens_per_second=False,
72
+ jit_mode_eval=False,
73
+ label_names=None,
74
+ label_smoothing_factor=0.0,
75
+ learning_rate=5e-05,
76
+ length_column_name=length,
77
+ load_best_model_at_end=True,
78
+ local_rank=0,
79
+ log_level=passive,
80
+ log_level_replica=warning,
81
+ log_on_each_node=True,
82
+ logging_dir=/content/dissertation/scripts/ner/output/tb,
83
+ logging_first_step=False,
84
+ logging_nan_inf_filter=True,
85
+ logging_steps=500,
86
+ logging_strategy=steps,
87
+ lr_scheduler_kwargs={},
88
+ lr_scheduler_type=linear,
89
+ max_grad_norm=1.0,
90
+ max_steps=-1,
91
+ metric_for_best_model=f1,
92
+ mp_parameters=,
93
+ neftune_noise_alpha=None,
94
+ no_cuda=False,
95
+ num_train_epochs=10.0,
96
+ optim=adamw_torch,
97
+ optim_args=None,
98
+ optim_target_modules=None,
99
+ output_dir=/content/dissertation/scripts/ner/output,
100
+ overwrite_output_dir=True,
101
+ past_index=-1,
102
+ per_device_eval_batch_size=8,
103
+ per_device_train_batch_size=32,
104
+ prediction_loss_only=False,
105
+ push_to_hub=True,
106
+ push_to_hub_model_id=None,
107
+ push_to_hub_organization=None,
108
+ push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
109
+ ray_scope=last,
110
+ remove_unused_columns=True,
111
+ report_to=['tensorboard'],
112
+ restore_callback_states_from_checkpoint=False,
113
+ resume_from_checkpoint=None,
114
+ run_name=/content/dissertation/scripts/ner/output,
115
+ save_on_each_node=False,
116
+ save_only_model=False,
117
+ save_safetensors=True,
118
+ save_steps=500,
119
+ save_strategy=epoch,
120
+ save_total_limit=None,
121
+ seed=42,
122
+ skip_memory_metrics=True,
123
+ split_batches=None,
124
+ tf32=None,
125
+ torch_compile=False,
126
+ torch_compile_backend=None,
127
+ torch_compile_mode=None,
128
+ torch_empty_cache_steps=None,
129
+ torchdynamo=None,
130
+ tpu_metrics_debug=False,
131
+ tpu_num_cores=None,
132
+ use_cpu=False,
133
+ use_ipex=False,
134
+ use_legacy_prediction_loop=False,
135
+ use_mps_device=False,
136
+ warmup_ratio=0.0,
137
+ warmup_steps=0,
138
+ weight_decay=0.0,
139
+ )
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+ [INFO|configuration_utils.py:733] 2024-09-04 18:27:16,977 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
148
+ [INFO|configuration_utils.py:800] 2024-09-04 18:27:16,984 >> Model config RobertaConfig {
149
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
150
+ "architectures": [
151
+ "RobertaForMaskedLM"
152
+ ],
153
+ "attention_probs_dropout_prob": 0.1,
154
+ "bos_token_id": 0,
155
+ "classifier_dropout": null,
156
+ "eos_token_id": 2,
157
+ "finetuning_task": "ner",
158
+ "gradient_checkpointing": false,
159
+ "hidden_act": "gelu",
160
+ "hidden_dropout_prob": 0.1,
161
+ "hidden_size": 768,
162
+ "id2label": {
163
+ "0": "O",
164
+ "1": "B-SINTOMA",
165
+ "2": "I-SINTOMA"
166
+ },
167
+ "initializer_range": 0.02,
168
+ "intermediate_size": 3072,
169
+ "label2id": {
170
+ "B-SINTOMA": 1,
171
+ "I-SINTOMA": 2,
172
+ "O": 0
173
+ },
174
+ "layer_norm_eps": 1e-05,
175
+ "max_position_embeddings": 514,
176
+ "model_type": "roberta",
177
+ "num_attention_heads": 12,
178
+ "num_hidden_layers": 12,
179
+ "pad_token_id": 1,
180
+ "position_embedding_type": "absolute",
181
+ "transformers_version": "4.44.2",
182
+ "type_vocab_size": 1,
183
+ "use_cache": true,
184
+ "vocab_size": 50262
185
+ }
186
+
187
+ [INFO|configuration_utils.py:733] 2024-09-04 18:27:17,622 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
188
+ [INFO|configuration_utils.py:800] 2024-09-04 18:27:17,623 >> Model config RobertaConfig {
189
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
190
+ "architectures": [
191
+ "RobertaForMaskedLM"
192
+ ],
193
+ "attention_probs_dropout_prob": 0.1,
194
+ "bos_token_id": 0,
195
+ "classifier_dropout": null,
196
+ "eos_token_id": 2,
197
+ "gradient_checkpointing": false,
198
+ "hidden_act": "gelu",
199
+ "hidden_dropout_prob": 0.1,
200
+ "hidden_size": 768,
201
+ "initializer_range": 0.02,
202
+ "intermediate_size": 3072,
203
+ "layer_norm_eps": 1e-05,
204
+ "max_position_embeddings": 514,
205
+ "model_type": "roberta",
206
+ "num_attention_heads": 12,
207
+ "num_hidden_layers": 12,
208
+ "pad_token_id": 1,
209
+ "position_embedding_type": "absolute",
210
+ "transformers_version": "4.44.2",
211
+ "type_vocab_size": 1,
212
+ "use_cache": true,
213
+ "vocab_size": 50262
214
+ }
215
+
216
+ [INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,902 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/vocab.json
217
+ [INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,902 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/merges.txt
218
+ [INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,902 >> loading file tokenizer.json from cache at None
219
+ [INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,903 >> loading file added_tokens.json from cache at None
220
+ [INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,903 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/special_tokens_map.json
221
+ [INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,903 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/tokenizer_config.json
222
+ [INFO|configuration_utils.py:733] 2024-09-04 18:27:21,903 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
223
+ [INFO|configuration_utils.py:800] 2024-09-04 18:27:21,904 >> Model config RobertaConfig {
224
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
225
+ "architectures": [
226
+ "RobertaForMaskedLM"
227
+ ],
228
+ "attention_probs_dropout_prob": 0.1,
229
+ "bos_token_id": 0,
230
+ "classifier_dropout": null,
231
+ "eos_token_id": 2,
232
+ "gradient_checkpointing": false,
233
+ "hidden_act": "gelu",
234
+ "hidden_dropout_prob": 0.1,
235
+ "hidden_size": 768,
236
+ "initializer_range": 0.02,
237
+ "intermediate_size": 3072,
238
+ "layer_norm_eps": 1e-05,
239
+ "max_position_embeddings": 514,
240
+ "model_type": "roberta",
241
+ "num_attention_heads": 12,
242
+ "num_hidden_layers": 12,
243
+ "pad_token_id": 1,
244
+ "position_embedding_type": "absolute",
245
+ "transformers_version": "4.44.2",
246
+ "type_vocab_size": 1,
247
+ "use_cache": true,
248
+ "vocab_size": 50262
249
+ }
250
+
251
+ /usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884
252
+ warnings.warn(
253
+ [INFO|configuration_utils.py:733] 2024-09-04 18:27:21,979 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
254
+ [INFO|configuration_utils.py:800] 2024-09-04 18:27:21,981 >> Model config RobertaConfig {
255
+ "_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
256
+ "architectures": [
257
+ "RobertaForMaskedLM"
258
+ ],
259
+ "attention_probs_dropout_prob": 0.1,
260
+ "bos_token_id": 0,
261
+ "classifier_dropout": null,
262
+ "eos_token_id": 2,
263
+ "gradient_checkpointing": false,
264
+ "hidden_act": "gelu",
265
+ "hidden_dropout_prob": 0.1,
266
+ "hidden_size": 768,
267
+ "initializer_range": 0.02,
268
+ "intermediate_size": 3072,
269
+ "layer_norm_eps": 1e-05,
270
+ "max_position_embeddings": 514,
271
+ "model_type": "roberta",
272
+ "num_attention_heads": 12,
273
+ "num_hidden_layers": 12,
274
+ "pad_token_id": 1,
275
+ "position_embedding_type": "absolute",
276
+ "transformers_version": "4.44.2",
277
+ "type_vocab_size": 1,
278
+ "use_cache": true,
279
+ "vocab_size": 50262
280
+ }
281
+
282
+ [INFO|modeling_utils.py:3678] 2024-09-04 18:27:26,079 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/pytorch_model.bin
283
+ [INFO|modeling_utils.py:4497] 2024-09-04 18:27:26,219 >> Some weights of the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
284
+ - This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
285
+ - This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
286
+ [WARNING|modeling_utils.py:4509] 2024-09-04 18:27:26,219 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es and are newly initialized: ['classifier.bias', 'classifier.weight']
287
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
288
+
289
+
290
+
291
+ /content/dissertation/scripts/ner/run_ner_train.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library πŸ€— Evaluate: https://huggingface.co/docs/evaluate
292
+ metric = load_metric("seqeval", trust_remote_code=True)
293
+
294
+ [INFO|trainer.py:811] 2024-09-04 18:27:34,644 >> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
295
+ [INFO|trainer.py:2134] 2024-09-04 18:27:35,302 >> ***** Running training *****
296
+ [INFO|trainer.py:2135] 2024-09-04 18:27:35,302 >> Num examples = 15,848
297
+ [INFO|trainer.py:2136] 2024-09-04 18:27:35,302 >> Num Epochs = 10
298
+ [INFO|trainer.py:2137] 2024-09-04 18:27:35,302 >> Instantaneous batch size per device = 32
299
+ [INFO|trainer.py:2140] 2024-09-04 18:27:35,302 >> Total train batch size (w. parallel, distributed & accumulation) = 64
300
+ [INFO|trainer.py:2141] 2024-09-04 18:27:35,302 >> Gradient Accumulation steps = 2
301
+ [INFO|trainer.py:2142] 2024-09-04 18:27:35,302 >> Total optimization steps = 2,480
302
+ [INFO|trainer.py:2143] 2024-09-04 18:27:35,303 >> Number of trainable parameters = 124,055,043
303
+
304
  0%| | 0/2480 [00:00<?, ?it/s]
305
  0%| | 1/2480 [00:01<1:07:07, 1.62s/it]
306
  0%| | 2/2480 [00:01<33:03, 1.25it/s]
307
  0%| | 3/2480 [00:02<24:43, 1.67it/s]
308
  0%| | 4/2480 [00:02<19:24, 2.13it/s]
309
  0%| | 5/2480 [00:02<15:23, 2.68it/s]
310
  0%| | 6/2480 [00:02<13:36, 3.03it/s]
311
  0%| | 7/2480 [00:03<11:58, 3.44it/s]
312
  0%| | 8/2480 [00:03<10:57, 3.76it/s]
313
  0%| | 9/2480 [00:03<10:29, 3.93it/s]
314
  0%| | 10/2480 [00:03<11:55, 3.45it/s]
315
  0%| | 11/2480 [00:04<12:31, 3.28it/s]
316
  0%| | 12/2480 [00:04<11:38, 3.53it/s]
317
  1%| | 13/2480 [00:04<11:55, 3.45it/s]
318
  1%| | 14/2480 [00:05<10:50, 3.79it/s]
319
  1%| | 15/2480 [00:05<10:23, 3.95it/s]
320
  1%| | 16/2480 [00:05<10:14, 4.01it/s]
321
  1%| | 17/2480 [00:05<10:07, 4.05it/s]
322
  1%| | 18/2480 [00:05<09:43, 4.22it/s]
323
  1%| | 19/2480 [00:06<10:53, 3.77it/s]
324
  1%| | 20/2480 [00:06<10:34, 3.88it/s]
325
  1%| | 21/2480 [00:06<10:42, 3.83it/s]
326
  1%| | 22/2480 [00:06<10:01, 4.09it/s]
327
  1%| | 23/2480 [00:07<09:20, 4.39it/s]
328
  1%| | 24/2480 [00:07<08:53, 4.60it/s]
329
  1%| | 25/2480 [00:07<08:51, 4.62it/s]
330
  1%| | 26/2480 [00:07<08:54, 4.59it/s]
331
  1%| | 27/2480 [00:08<09:03, 4.51it/s]
332
  1%| | 28/2480 [00:08<09:00, 4.54it/s]
333
  1%| | 29/2480 [00:08<09:09, 4.46it/s]
334
  1%| | 30/2480 [00:08<09:54, 4.12it/s]
335
  1%|▏ | 31/2480 [00:08<09:15, 4.41it/s]
336
  1%|▏ | 32/2480 [00:09<09:01, 4.52it/s]
337
  1%|▏ | 33/2480 [00:09<09:02, 4.51it/s]
338
  1%|▏ | 34/2480 [00:09<09:19, 4.37it/s]
339
  1%|▏ | 35/2480 [00:09<09:42, 4.20it/s]
340
  1%|▏ | 36/2480 [00:10<09:13, 4.42it/s]
341
  1%|▏ | 37/2480 [00:10<12:51, 3.16it/s]
342
  2%|▏ | 38/2480 [00:10<11:34, 3.52it/s]
343
  2%|▏ | 39/2480 [00:11<11:25, 3.56it/s]
344
  2%|▏ | 40/2480 [00:11<10:18, 3.95it/s]
345
  2%|▏ | 41/2480 [00:11<09:34, 4.25it/s]
346
  2%|▏ | 42/2480 [00:11<09:16, 4.38it/s]
347
  2%|▏ | 43/2480 [00:11<08:59, 4.52it/s]
348
  2%|▏ | 44/2480 [00:12<10:12, 3.98it/s]
349
  2%|▏ | 45/2480 [00:12<09:52, 4.11it/s]
350
  2%|▏ | 46/2480 [00:12<11:42, 3.46it/s]
351
  2%|▏ | 47/2480 [00:13<10:40, 3.80it/s]
352
  2%|▏ | 48/2480 [00:13<10:11, 3.98it/s]
353
  2%|▏ | 49/2480 [00:13<11:17, 3.59it/s]
354
  2%|▏ | 50/2480 [00:13<11:50, 3.42it/s]
355
  2%|▏ | 51/2480 [00:14<11:00, 3.68it/s]
356
  2%|▏ | 52/2480 [00:14<10:50, 3.73it/s]
357
  2%|▏ | 53/2480 [00:14<10:01, 4.04it/s]
358
  2%|▏ | 54/2480 [00:15<12:09, 3.33it/s]
359
  2%|▏ | 55/2480 [00:15<12:02, 3.36it/s]
360
  2%|▏ | 56/2480 [00:15<10:46, 3.75it/s]
361
  2%|▏ | 57/2480 [00:15<10:55, 3.70it/s]
362
  2%|▏ | 58/2480 [00:16<10:10, 3.97it/s]
363
  2%|▏ | 59/2480 [00:16<09:52, 4.09it/s]
364
  2%|▏ | 60/2480 [00:16<09:39, 4.17it/s]
365
  2%|▏ | 61/2480 [00:16<08:59, 4.48it/s]
366
  2%|β–Ž | 62/2480 [00:16<08:44, 4.61it/s]
367
  3%|β–Ž | 63/2480 [00:17<08:49, 4.56it/s]
368
  3%|β–Ž | 64/2480 [00:17<10:09, 3.97it/s]
369
  3%|β–Ž | 65/2480 [00:17<09:53, 4.07it/s]
370
  3%|β–Ž | 66/2480 [00:17<10:26, 3.85it/s]
371
  3%|β–Ž | 67/2480 [00:18<10:40, 3.77it/s]
372
  3%|β–Ž | 68/2480 [00:18<10:02, 4.00it/s]
373
  3%|β–Ž | 69/2480 [00:18<09:33, 4.21it/s]
374
  3%|β–Ž | 70/2480 [00:18<09:27, 4.25it/s]
375
  3%|β–Ž | 71/2480 [00:19<09:44, 4.12it/s]
376
  3%|β–Ž | 72/2480 [00:19<10:20, 3.88it/s]
377
  3%|β–Ž | 73/2480 [00:19<09:47, 4.10it/s]
378
  3%|β–Ž | 74/2480 [00:20<11:34, 3.47it/s]
379
  3%|β–Ž | 75/2480 [00:20<11:25, 3.51it/s]
380
  3%|β–Ž | 76/2480 [00:20<11:33, 3.47it/s]
381
  3%|β–Ž | 77/2480 [00:20<10:47, 3.71it/s]
382
  3%|β–Ž | 78/2480 [00:21<10:10, 3.93it/s]
383
  3%|β–Ž | 79/2480 [00:21<09:52, 4.05it/s]
384
  3%|β–Ž | 80/2480 [00:21<09:47, 4.09it/s]
385
  3%|β–Ž | 81/2480 [00:21<11:08, 3.59it/s]
386
  3%|β–Ž | 82/2480 [00:22<10:41, 3.74it/s]
387
  3%|β–Ž | 83/2480 [00:22<09:51, 4.05it/s]
388
  3%|β–Ž | 84/2480 [00:22<10:24, 3.83it/s]
389
  3%|β–Ž | 85/2480 [00:22<10:22, 3.85it/s]
390
  3%|β–Ž | 86/2480 [00:23<09:52, 4.04it/s]
391
  4%|β–Ž | 87/2480 [00:23<10:24, 3.83it/s]
392
  4%|β–Ž | 88/2480 [00:23<09:52, 4.04it/s]
393
  4%|β–Ž | 89/2480 [00:23<10:02, 3.97it/s]
394
  4%|β–Ž | 90/2480 [00:24<11:55, 3.34it/s]
395
  4%|β–Ž | 91/2480 [00:24<11:08, 3.57it/s]
396
  4%|β–Ž | 92/2480 [00:24<11:00, 3.62it/s]
397
  4%|▍ | 93/2480 [00:25<10:41, 3.72it/s]
398
  4%|▍ | 94/2480 [00:25<10:17, 3.87it/s]
399
  4%|▍ | 95/2480 [00:25<10:17, 3.86it/s]
400
  4%|▍ | 96/2480 [00:25<09:22, 4.24it/s]
401
  4%|▍ | 97/2480 [00:25<09:48, 4.05it/s]
402
  4%|▍ | 98/2480 [00:26<08:58, 4.43it/s]
403
  4%|▍ | 99/2480 [00:26<10:07, 3.92it/s]
404
  4%|▍ | 100/2480 [00:26<09:27, 4.19it/s]
405
  4%|▍ | 101/2480 [00:26<09:08, 4.33it/s]
406
  4%|▍ | 102/2480 [00:27<09:17, 4.27it/s]
407
  4%|▍ | 103/2480 [00:27<08:58, 4.41it/s]
408
  4%|▍ | 104/2480 [00:27<08:55, 4.44it/s]
409
  4%|▍ | 105/2480 [00:27<09:26, 4.19it/s]
410
  4%|▍ | 106/2480 [00:28<09:53, 4.00it/s]
411
  4%|▍ | 107/2480 [00:28<09:29, 4.17it/s]
412
  4%|▍ | 108/2480 [00:28<09:57, 3.97it/s]
413
  4%|▍ | 109/2480 [00:28<09:08, 4.32it/s]
414
  4%|▍ | 110/2480 [00:29<09:11, 4.30it/s]
415
  4%|▍ | 111/2480 [00:29<09:05, 4.34it/s]
416
  5%|▍ | 112/2480 [00:29<08:44, 4.51it/s]
417
  5%|▍ | 113/2480 [00:29<08:33, 4.61it/s]
418
  5%|▍ | 114/2480 [00:29<08:39, 4.55it/s]
419
  5%|▍ | 115/2480 [00:30<08:11, 4.81it/s]
420
  5%|▍ | 116/2480 [00:30<08:29, 4.64it/s]
421
  5%|▍ | 117/2480 [00:30<08:45, 4.50it/s]
422
  5%|▍ | 118/2480 [00:30<10:20, 3.81it/s]
423
  5%|▍ | 119/2480 [00:31<10:47, 3.65it/s]
424
  5%|▍ | 120/2480 [00:31<10:03, 3.91it/s]
425
  5%|▍ | 121/2480 [00:31<11:11, 3.51it/s]
426
  5%|▍ | 122/2480 [00:31<10:07, 3.88it/s]
427
  5%|▍ | 123/2480 [00:32<09:46, 4.02it/s]
428
  5%|β–Œ | 124/2480 [00:32<09:41, 4.05it/s]
429
  5%|β–Œ | 125/2480 [00:32<09:50, 3.99it/s]
430
  5%|β–Œ | 126/2480 [00:32<10:29, 3.74it/s]
431
  5%|β–Œ | 127/2480 [00:33<11:12, 3.50it/s]
432
  5%|β–Œ | 128/2480 [00:33<11:23, 3.44it/s]
433
  5%|β–Œ | 129/2480 [00:33<11:44, 3.34it/s]
434
  5%|β–Œ | 130/2480 [00:34<10:48, 3.62it/s]
435
  5%|β–Œ | 131/2480 [00:34<10:45, 3.64it/s]
436
  5%|β–Œ | 132/2480 [00:34<12:09, 3.22it/s]
437
  5%|β–Œ | 133/2480 [00:35<11:14, 3.48it/s]
438
  5%|β–Œ | 134/2480 [00:35<10:11, 3.83it/s]
439
  5%|β–Œ | 135/2480 [00:35<09:29, 4.12it/s]
440
  5%|β–Œ | 136/2480 [00:35<08:55, 4.38it/s]
441
  6%|β–Œ | 137/2480 [00:35<08:53, 4.39it/s]
442
  6%|β–Œ | 138/2480 [00:36<09:30, 4.10it/s]
443
  6%|β–Œ | 139/2480 [00:36<09:22, 4.16it/s]
444
  6%|β–Œ | 140/2480 [00:36<09:46, 3.99it/s]
445
  6%|β–Œ | 141/2480 [00:36<10:17, 3.79it/s]
446
  6%|β–Œ | 142/2480 [00:37<09:19, 4.18it/s]
447
  6%|β–Œ | 143/2480 [00:37<09:22, 4.15it/s]
448
  6%|β–Œ | 144/2480 [00:37<09:46, 3.98it/s]
449
  6%|β–Œ | 145/2480 [00:37<09:36, 4.05it/s]
450
  6%|β–Œ | 146/2480 [00:38<09:16, 4.19it/s]
451
  6%|β–Œ | 147/2480 [00:38<08:56, 4.35it/s]
452
  6%|β–Œ | 148/2480 [00:38<09:01, 4.31it/s]
453
  6%|β–Œ | 149/2480 [00:38<08:46, 4.43it/s]
454
  6%|β–Œ | 150/2480 [00:39<09:03, 4.29it/s]
455
  6%|β–Œ | 151/2480 [00:39<08:38, 4.50it/s]
456
  6%|β–Œ | 152/2480 [00:39<08:57, 4.33it/s]
457
  6%|β–Œ | 153/2480 [00:39<09:12, 4.21it/s]
458
  6%|β–Œ | 154/2480 [00:39<09:25, 4.11it/s]
459
  6%|β–‹ | 155/2480 [00:40<09:55, 3.91it/s]
460
  6%|β–‹ | 156/2480 [00:40<09:35, 4.04it/s]
461
  6%|β–‹ | 157/2480 [00:40<09:05, 4.26it/s]
462
  6%|β–‹ | 158/2480 [00:40<08:57, 4.32it/s]
463
  6%|β–‹ | 159/2480 [00:41<09:42, 3.98it/s]
464
  6%|β–‹ | 160/2480 [00:41<09:12, 4.20it/s]
465
  6%|β–‹ | 161/2480 [00:41<08:26, 4.58it/s]
466
  7%|β–‹ | 162/2480 [00:41<08:04, 4.78it/s]
467
  7%|β–‹ | 163/2480 [00:42<08:28, 4.56it/s]
468
  7%|β–‹ | 164/2480 [00:42<09:54, 3.90it/s]
469
  7%|β–‹ | 165/2480 [00:42<10:08, 3.81it/s]
470
  7%|β–‹ | 166/2480 [00:42<09:49, 3.93it/s]
471
  7%|β–‹ | 167/2480 [00:43<09:09, 4.21it/s]
472
  7%|β–‹ | 168/2480 [00:43<09:38, 4.00it/s]
473
  7%|β–‹ | 169/2480 [00:43<09:07, 4.22it/s]
474
  7%|β–‹ | 170/2480 [00:43<09:17, 4.14it/s]
475
  7%|β–‹ | 171/2480 [00:44<09:21, 4.11it/s]
476
  7%|β–‹ | 172/2480 [00:44<09:06, 4.22it/s]
477
  7%|β–‹ | 173/2480 [00:44<08:49, 4.36it/s]
478
  7%|β–‹ | 174/2480 [00:44<09:14, 4.16it/s]
479
  7%|β–‹ | 175/2480 [00:45<09:11, 4.18it/s]
480
  7%|β–‹ | 176/2480 [00:45<08:46, 4.38it/s]
481
  7%|β–‹ | 177/2480 [00:45<08:29, 4.52it/s]
482
  7%|β–‹ | 178/2480 [00:45<08:04, 4.76it/s]
483
  7%|β–‹ | 179/2480 [00:45<07:58, 4.81it/s]
484
  7%|β–‹ | 180/2480 [00:46<08:13, 4.66it/s]
485
  7%|β–‹ | 181/2480 [00:46<08:21, 4.58it/s]
486
  7%|β–‹ | 182/2480 [00:46<08:41, 4.41it/s]
487
  7%|β–‹ | 183/2480 [00:46<08:39, 4.42it/s]
488
  7%|β–‹ | 184/2480 [00:46<08:42, 4.39it/s]
489
  7%|β–‹ | 185/2480 [00:47<08:31, 4.48it/s]
490
  8%|β–Š | 186/2480 [00:47<08:22, 4.56it/s]
491
  8%|β–Š | 187/2480 [00:47<08:10, 4.68it/s]
492
  8%|β–Š | 188/2480 [00:47<08:16, 4.61it/s]
493
  8%|β–Š | 189/2480 [00:48<08:21, 4.57it/s]
494
  8%|β–Š | 190/2480 [00:48<08:52, 4.30it/s]
495
  8%|β–Š | 191/2480 [00:48<08:34, 4.45it/s]
496
  8%|β–Š | 192/2480 [00:48<08:58, 4.25it/s]
497
  8%|β–Š | 193/2480 [00:48<08:32, 4.47it/s]
498
  8%|β–Š | 194/2480 [00:49<09:04, 4.20it/s]
499
  8%|β–Š | 195/2480 [00:49<09:47, 3.89it/s]
500
  8%|β–Š | 196/2480 [00:49<09:36, 3.96it/s]
501
  8%|β–Š | 197/2480 [00:50<11:04, 3.44it/s]
502
  8%|β–Š | 198/2480 [00:50<10:01, 3.80it/s]
503
  8%|β–Š | 199/2480 [00:50<09:13, 4.12it/s]
504
  8%|β–Š | 200/2480 [00:50<08:43, 4.35it/s]
505
  8%|β–Š | 201/2480 [00:50<08:30, 4.47it/s]
506
  8%|β–Š | 202/2480 [00:51<08:31, 4.46it/s]
507
  8%|β–Š | 203/2480 [00:51<10:17, 3.69it/s]
508
  8%|β–Š | 204/2480 [00:51<10:29, 3.62it/s]
509
  8%|β–Š | 205/2480 [00:52<09:39, 3.93it/s]
510
  8%|β–Š | 206/2480 [00:52<09:18, 4.07it/s]
511
  8%|β–Š | 207/2480 [00:52<09:36, 3.94it/s]
512
  8%|β–Š | 208/2480 [00:52<10:23, 3.65it/s]
513
  8%|β–Š | 209/2480 [00:53<10:22, 3.65it/s]
514
  8%|β–Š | 210/2480 [00:53<09:54, 3.82it/s]
515
  9%|β–Š | 211/2480 [00:53<09:31, 3.97it/s]
516
  9%|β–Š | 212/2480 [00:53<09:37, 3.93it/s]
517
  9%|β–Š | 213/2480 [00:54<09:25, 4.01it/s]
518
  9%|β–Š | 214/2480 [00:54<09:16, 4.07it/s]
519
  9%|β–Š | 215/2480 [00:54<09:42, 3.89it/s]
520
  9%|β–Š | 216/2480 [00:54<09:37, 3.92it/s]
521
  9%|β–‰ | 217/2480 [00:55<09:27, 3.99it/s]
522
  9%|β–‰ | 218/2480 [00:55<09:32, 3.95it/s]
523
  9%|β–‰ | 219/2480 [00:55<08:53, 4.24it/s]
524
  9%|β–‰ | 220/2480 [00:55<10:10, 3.70it/s]
525
  9%|β–‰ | 221/2480 [00:56<09:48, 3.84it/s]
526
  9%|β–‰ | 222/2480 [00:56<08:54, 4.23it/s]
527
  9%|β–‰ | 223/2480 [00:56<09:11, 4.09it/s]
528
  9%|β–‰ | 224/2480 [00:56<09:01, 4.17it/s]
529
  9%|β–‰ | 225/2480 [00:57<09:45, 3.85it/s]
530
  9%|β–‰ | 226/2480 [00:57<09:24, 4.00it/s]
531
  9%|β–‰ | 227/2480 [00:57<09:26, 3.98it/s]
532
  9%|β–‰ | 228/2480 [00:57<08:58, 4.19it/s]
533
  9%|β–‰ | 229/2480 [00:58<08:33, 4.39it/s]
534
  9%|β–‰ | 230/2480 [00:58<08:21, 4.48it/s]
535
  9%|β–‰ | 231/2480 [00:58<08:32, 4.39it/s]
536
  9%|β–‰ | 232/2480 [00:58<09:05, 4.12it/s]
537
  9%|β–‰ | 233/2480 [00:59<08:56, 4.19it/s]
538
  9%|β–‰ | 234/2480 [00:59<09:25, 3.97it/s]
539
  9%|β–‰ | 235/2480 [00:59<09:12, 4.07it/s]
540
  10%|β–‰ | 236/2480 [00:59<10:24, 3.60it/s]
541
  10%|β–‰ | 237/2480 [01:00<09:34, 3.90it/s]
542
  10%|β–‰ | 238/2480 [01:00<09:06, 4.11it/s]
543
  10%|β–‰ | 239/2480 [01:00<08:58, 4.16it/s]
544
  10%|β–‰ | 240/2480 [01:00<08:21, 4.47it/s]
545
  10%|β–‰ | 241/2480 [01:00<07:53, 4.73it/s]
546
  10%|β–‰ | 242/2480 [01:01<08:08, 4.58it/s]
547
  10%|β–‰ | 243/2480 [01:01<08:34, 4.35it/s]
548
  10%|β–‰ | 244/2480 [01:01<09:04, 4.10it/s]
549
  10%|β–‰ | 245/2480 [01:01<09:14, 4.03it/s]
550
  10%|β–‰ | 246/2480 [01:02<08:50, 4.21it/s]
551
  10%|β–‰ | 247/2480 [01:02<08:32, 4.36it/s]
552
  10%|β–ˆ | 248/2480 [01:02<07:59, 4.65it/s][INFO|trainer.py:811] 2024-09-04 18:28:37,832 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
553
+ [INFO|trainer.py:3819] 2024-09-04 18:28:37,835 >>
554
+ ***** Running Evaluation *****
555
+ [INFO|trainer.py:3821] 2024-09-04 18:28:37,835 >> Num examples = 2519
556
+ [INFO|trainer.py:3824] 2024-09-04 18:28:37,835 >> Batch size = 8
557
+
558
+
559
  0%| | 0/315 [00:00<?, ?it/s]
560
+
561
  3%|β–Ž | 9/315 [00:00<00:03, 87.62it/s]
562
+
563
  6%|β–Œ | 18/315 [00:00<00:03, 84.29it/s]
564
+
565
  9%|β–Š | 27/315 [00:00<00:03, 82.00it/s]
566
+
567
  11%|β–ˆβ– | 36/315 [00:00<00:03, 76.52it/s]
568
+
569
  14%|β–ˆβ– | 45/315 [00:00<00:03, 78.83it/s]
570
+
571
  17%|β–ˆβ–‹ | 54/315 [00:00<00:03, 79.38it/s]
572
+
573
  20%|β–ˆβ–ˆ | 63/315 [00:00<00:03, 78.44it/s]
574
+
575
  23%|β–ˆβ–ˆβ–Ž | 72/315 [00:00<00:03, 80.31it/s]
576
+
577
  26%|β–ˆβ–ˆβ–Œ | 81/315 [00:01<00:02, 79.22it/s]
578
+
579
  28%|β–ˆβ–ˆβ–Š | 89/315 [00:01<00:02, 78.97it/s]
580
+
581
  31%|β–ˆβ–ˆβ–ˆ | 97/315 [00:01<00:02, 78.20it/s]
582
+
583
  34%|β–ˆβ–ˆβ–ˆβ–Ž | 106/315 [00:01<00:02, 80.06it/s]
584
+
585
  37%|β–ˆβ–ˆβ–ˆβ–‹ | 115/315 [00:01<00:02, 81.29it/s]
586
+
587
  39%|β–ˆβ–ˆβ–ˆβ–‰ | 124/315 [00:01<00:02, 78.38it/s]
588
+
589
  42%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 132/315 [00:01<00:02, 78.59it/s]
590
+
591
  44%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 140/315 [00:01<00:02, 77.86it/s]
592
+
593
  47%|β–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 148/315 [00:01<00:02, 75.27it/s]
594
+
595
  50%|β–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 156/315 [00:01<00:02, 73.46it/s]
596
+
597
  52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 164/315 [00:02<00:02, 75.03it/s]
598
+
599
  55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 173/315 [00:02<00:01, 76.99it/s]
600
+
601
  57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 181/315 [00:02<00:01, 77.31it/s]
602
+
603
  60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 190/315 [00:02<00:01, 78.87it/s]
604
+
605
  63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 198/315 [00:02<00:01, 78.00it/s]
606
+
607
  65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 206/315 [00:02<00:01, 78.04it/s]
608
+
609
  68%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 214/315 [00:02<00:01, 75.23it/s]
610
+
611
  70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 222/315 [00:02<00:01, 73.53it/s]
612
+
613
  73%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 230/315 [00:02<00:01, 72.57it/s]
614
+
615
  76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 239/315 [00:03<00:01, 74.60it/s]
616
+
617
  78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 247/315 [00:03<00:00, 74.88it/s]
618
+
619
  81%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 256/315 [00:03<00:00, 76.66it/s]
620
+
621
  84%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 265/315 [00:03<00:00, 78.61it/s]
622
+
623
  87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 274/315 [00:03<00:00, 81.06it/s]
624
+
625
  90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 283/315 [00:03<00:00, 82.28it/s]
626
+
627
  93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž| 292/315 [00:03<00:00, 79.10it/s]
628
+
629
  96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ| 301/315 [00:03<00:00, 80.22it/s]
630
+
631
  98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š| 310/315 [00:03<00:00, 81.05it/s]
632
 
633
+
634
 
635
  10%|β–ˆ | 248/2480 [01:08<07:59, 4.65it/s]
636
+
637
+
638
  [INFO|trainer.py:3503] 2024-09-04 18:28:43,350 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-248
639
+ [INFO|configuration_utils.py:472] 2024-09-04 18:28:43,351 >> Configuration saved in /content/dissertation/scripts/ner/output/checkpoint-248/config.json
640
+ [INFO|modeling_utils.py:2799] 2024-09-04 18:28:44,360 >> Model weights saved in /content/dissertation/scripts/ner/output/checkpoint-248/model.safetensors
641
+ [INFO|tokenization_utils_base.py:2684] 2024-09-04 18:28:44,361 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/checkpoint-248/tokenizer_config.json
642
+ [INFO|tokenization_utils_base.py:2693] 2024-09-04 18:28:44,362 >> Special tokens file saved in /content/dissertation/scripts/ner/output/checkpoint-248/special_tokens_map.json
643
+ [INFO|tokenization_utils_base.py:2684] 2024-09-04 18:28:46,425 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
644
+ [INFO|tokenization_utils_base.py:2693] 2024-09-04 18:28:46,425 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
645
+
646
  10%|β–ˆ | 249/2480 [01:11<1:44:55, 2.82s/it]
647
  10%|β–ˆ | 250/2480 [01:11<1:15:54, 2.04s/it]
648
  10%|β–ˆ | 251/2480 [01:11<55:54, 1.51s/it]
649
  10%|β–ˆ | 252/2480 [01:12<41:42, 1.12s/it]
650
  10%|β–ˆ | 253/2480 [01:12<31:32, 1.18it/s]
651
  10%|β–ˆ | 254/2480 [01:12<24:51, 1.49it/s]
652
  10%|β–ˆ | 255/2480 [01:12<21:00, 1.77it/s]
653
  10%|β–ˆ | 256/2480 [01:13<17:50, 2.08it/s]
654
  10%|β–ˆ | 257/2480 [01:13<15:06, 2.45it/s]
655
  10%|β–ˆ | 258/2480 [01:13<12:55, 2.87it/s]
656
  10%|β–ˆ | 259/2480 [01:13<11:19, 3.27it/s]
657
  10%|β–ˆ | 260/2480 [01:14<10:42, 3.45it/s]
658
  11%|β–ˆ | 261/2480 [01:14<10:07, 3.65it/s]
659
  11%|β–ˆ | 262/2480 [01:14<09:25, 3.92it/s]
660
  11%|β–ˆ | 263/2480 [01:14<09:55, 3.72it/s]
661
  11%|β–ˆ | 264/2480 [01:15<10:10, 3.63it/s]
662
  11%|β–ˆ | 265/2480 [01:15<10:24, 3.55it/s]
663
  11%|β–ˆ | 266/2480 [01:15<09:56, 3.71it/s]
664
  11%|β–ˆ | 267/2480 [01:15<09:49, 3.75it/s]
665
  11%|β–ˆ | 268/2480 [01:16<09:37, 3.83it/s]
666
  11%|β–ˆ | 269/2480 [01:16<09:43, 3.79it/s]
667
  11%|β–ˆ | 270/2480 [01:16<09:21, 3.94it/s]
668
  11%|β–ˆ | 271/2480 [01:16<09:51, 3.74it/s]
669
  11%|β–ˆ | 272/2480 [01:17<09:45, 3.77it/s]
670
  11%|β–ˆ | 273/2480 [01:17<09:37, 3.82it/s]
671
  11%|β–ˆ | 274/2480 [01:17<09:15, 3.97it/s]
672
  11%|β–ˆ | 275/2480 [01:18<09:48, 3.75it/s]
673
  11%|β–ˆ | 276/2480 [01:18<09:30, 3.86it/s]
674
  11%|β–ˆ | 277/2480 [01:18<08:50, 4.15it/s]
675
  11%|β–ˆ | 278/2480 [01:18<08:05, 4.53it/s]
676
  11%|β–ˆβ– | 279/2480 [01:18<09:13, 3.98it/s]
677
  11%|β–ˆβ– | 280/2480 [01:19<08:55, 4.11it/s]
678
  11%|β–ˆβ– | 281/2480 [01:19<08:30, 4.31it/s]
679
  11%|β–ˆβ– | 282/2480 [01:19<08:31, 4.29it/s]
680
  11%|β–ˆβ– | 283/2480 [01:19<08:42, 4.21it/s]
681
  11%|β–ˆβ– | 284/2480 [01:20<08:57, 4.09it/s]
682
  11%|β–ˆβ– | 285/2480 [01:20<08:46, 4.17it/s]
683
  12%|β–ˆβ– | 286/2480 [01:20<08:15, 4.43it/s]
684
  12%|β–ˆβ– | 287/2480 [01:20<07:45, 4.71it/s]
685
  12%|β–ˆβ– | 288/2480 [01:21<08:39, 4.22it/s]
686
  12%|β–ˆβ– | 289/2480 [01:21<09:57, 3.67it/s]
687
  12%|β–ˆβ– | 290/2480 [01:21<09:34, 3.81it/s]
688
  12%|β–ˆβ– | 291/2480 [01:21<09:32, 3.82it/s]
689
  12%|β–ˆβ– | 292/2480 [01:22<09:35, 3.80it/s]
690
  12%|β–ˆβ– | 293/2480 [01:22<09:23, 3.88it/s]
691
  12%|β–ˆβ– | 294/2480 [01:22<08:56, 4.08it/s]
692
  12%|β–ˆβ– | 295/2480 [01:22<09:27, 3.85it/s]
693
  12%|β–ˆβ– | 296/2480 [01:23<08:49, 4.12it/s]
694
  12%|β–ˆβ– | 297/2480 [01:23<08:55, 4.08it/s]
695
  12%|β–ˆβ– | 298/2480 [01:23<08:33, 4.25it/s]
696
  12%|β–ˆβ– | 299/2480 [01:23<09:39, 3.76it/s]
697
  12%|β–ˆβ– | 300/2480 [01:24<09:25, 3.86it/s]
698
  12%|β–ˆβ– | 301/2480 [01:24<08:34, 4.23it/s]
699
  12%|β–ˆβ– | 302/2480 [01:24<07:59, 4.54it/s]
700
  12%|β–ˆβ– | 303/2480 [01:25<11:53, 3.05it/s]
701
  12%|β–ˆβ– | 304/2480 [01:25<11:51, 3.06it/s]
702
  12%|β–ˆβ– | 305/2480 [01:25<10:15, 3.54it/s]
703
  12%|β–ˆβ– | 306/2480 [01:25<09:49, 3.69it/s]
704
  12%|β–ˆβ– | 307/2480 [01:26<09:18, 3.89it/s]
705
  12%|β–ˆβ– | 308/2480 [01:26<09:14, 3.92it/s]
706
  12%|β–ˆβ– | 309/2480 [01:26<08:48, 4.11it/s]
707
  12%|β–ˆβ–Ž | 310/2480 [01:26<08:11, 4.42it/s]
708
  13%|β–ˆβ–Ž | 311/2480 [01:26<07:49, 4.62it/s]
709
  13%|β–ˆβ–Ž | 312/2480 [01:27<07:32, 4.79it/s]
710
  13%|β–ˆβ–Ž | 313/2480 [01:27<07:56, 4.55it/s]
711
  13%|β–ˆβ–Ž | 314/2480 [01:27<07:45, 4.65it/s]
712
  13%|β–ˆβ–Ž | 315/2480 [01:27<09:12, 3.92it/s]
713
  13%|β–ˆβ–Ž | 316/2480 [01:28<09:02, 3.99it/s]
714
  13%|β–ˆβ–Ž | 317/2480 [01:28<08:47, 4.10it/s]
715
  13%|β–ˆβ–Ž | 318/2480 [01:28<09:01, 3.99it/s]
716
  13%|β–ˆβ–Ž | 319/2480 [01:28<08:19, 4.32it/s]
717
  13%|β–ˆβ–Ž | 320/2480 [01:29<08:30, 4.23it/s]
718
  13%|β–ˆβ–Ž | 321/2480 [01:29<08:24, 4.28it/s]
719
  13%|β–ˆβ–Ž | 322/2480 [01:29<09:15, 3.89it/s]
720
  13%|β–ˆβ–Ž | 323/2480 [01:29<09:26, 3.81it/s]
721
  13%|β–ˆβ–Ž | 324/2480 [01:30<10:55, 3.29it/s]
722
  13%|β–ˆβ–Ž | 325/2480 [01:30<09:29, 3.78it/s]
723
  13%|β–ˆβ–Ž | 326/2480 [01:30<11:22, 3.16it/s]
724
  13%|β–ˆβ–Ž | 327/2480 [01:31<10:10, 3.52it/s]
725
  13%|β–ˆβ–Ž | 328/2480 [01:31<09:23, 3.82it/s]
726
  13%|β–ˆβ–Ž | 329/2480 [01:31<09:28, 3.79it/s]
727
  13%|β–ˆβ–Ž | 330/2480 [01:31<09:19, 3.84it/s]
728
  13%|β–ˆβ–Ž | 331/2480 [01:32<08:50, 4.05it/s]
729
  13%|β–ˆβ–Ž | 332/2480 [01:32<08:49, 4.05it/s]
730
  13%|β–ˆβ–Ž | 333/2480 [01:32<09:10, 3.90it/s]
731
  13%|β–ˆβ–Ž | 334/2480 [01:32<08:54, 4.02it/s]
732
  14%|β–ˆβ–Ž | 335/2480 [01:33<09:15, 3.86it/s]
733
  14%|β–ˆβ–Ž | 336/2480 [01:33<08:58, 3.98it/s]
734
  14%|β–ˆβ–Ž | 337/2480 [01:33<08:27, 4.22it/s]
735
  14%|β–ˆβ–Ž | 338/2480 [01:33<09:11, 3.89it/s]
736
  14%|β–ˆβ–Ž | 339/2480 [01:34<08:56, 3.99it/s]
737
  14%|β–ˆβ–Ž | 340/2480 [01:34<08:26, 4.22it/s]
738
  14%|β–ˆβ– | 341/2480 [01:34<08:03, 4.42it/s]
739
  14%|β–ˆβ– | 342/2480 [01:34<08:36, 4.14it/s]
740
  14%|β–ˆβ– | 343/2480 [01:35<08:30, 4.18it/s]
741
  14%|β–ˆβ– | 344/2480 [01:35<09:19, 3.81it/s]
742
  14%|β–ˆβ– | 345/2480 [01:35<08:34, 4.15it/s]
743
  14%|β–ˆβ– | 346/2480 [01:35<08:08, 4.36it/s]
744
  14%|β–ˆβ– | 347/2480 [01:35<07:56, 4.48it/s]
745
  14%|β–ˆβ– | 348/2480 [01:36<07:23, 4.81it/s]
746
  14%|β–ˆβ– | 349/2480 [01:36<07:13, 4.91it/s]
747
  14%|β–ˆβ– | 350/2480 [01:36<07:20, 4.84it/s]
748
  14%|β–ˆβ– | 351/2480 [01:36<07:24, 4.78it/s]
749
  14%|β–ˆβ– | 352/2480 [01:36<08:01, 4.42it/s]
750
  14%|β–ˆβ– | 353/2480 [01:37<08:36, 4.11it/s]
751
  14%|β–ˆβ– | 354/2480 [01:37<08:28, 4.18it/s]
752
  14%|β–ˆβ– | 355/2480 [01:37<08:44, 4.05it/s]
753
  14%|β–ˆβ– | 356/2480 [01:37<07:58, 4.44it/s]
754
  14%|β–ˆβ– | 357/2480 [01:38<08:08, 4.35it/s]
755
  14%|β–ˆβ– | 358/2480 [01:38<07:42, 4.59it/s]
756
  14%|β–ˆβ– | 359/2480 [01:38<07:46, 4.55it/s]
757
  15%|β–ˆβ– | 360/2480 [01:38<07:13, 4.89it/s]
758
  15%|β–ˆβ– | 361/2480 [01:39<07:37, 4.63it/s]
759
  15%|β–ˆβ– | 362/2480 [01:39<08:03, 4.38it/s]
760
  15%|β–ˆβ– | 363/2480 [01:39<08:08, 4.34it/s]
761
  15%|β–ˆβ– | 364/2480 [01:39<08:10, 4.31it/s]
762
  15%|β–ˆβ– | 365/2480 [01:39<07:53, 4.46it/s]
763
  15%|β–ˆβ– | 366/2480 [01:40<08:12, 4.29it/s]
764
  15%|β–ˆβ– | 367/2480 [01:40<07:50, 4.49it/s]
765
  15%|β–ˆβ– | 368/2480 [01:40<09:52, 3.56it/s]
766
  15%|β–ˆβ– | 369/2480 [01:41<09:27, 3.72it/s]
767
  15%|β–ˆβ– | 370/2480 [01:41<09:19, 3.77it/s]
768
  15%|β–ˆβ– | 371/2480 [01:41<09:02, 3.89it/s]
769
  15%|β–ˆβ–Œ | 372/2480 [01:41<08:39, 4.06it/s]
770
  15%|β–ˆβ–Œ | 373/2480 [01:42<09:22, 3.75it/s]
771
  15%|β–ˆβ–Œ | 374/2480 [01:42<08:36, 4.08it/s]
772
  15%|β–ˆβ–Œ | 375/2480 [01:42<09:23, 3.74it/s]
773
  15%|β–ˆβ–Œ | 376/2480 [01:42<08:49, 3.97it/s]
774
  15%|β–ˆβ–Œ | 377/2480 [01:43<08:18, 4.22it/s]
775
  15%|β–ˆβ–Œ | 378/2480 [01:43<07:47, 4.49it/s]
776
  15%|β–ˆβ–Œ | 379/2480 [01:43<08:04, 4.34it/s]
777
  15%|β–ˆβ–Œ | 380/2480 [01:43<08:11, 4.27it/s]
778
  15%|β–ˆβ–Œ | 381/2480 [01:43<08:26, 4.14it/s]
779
  15%|β–ˆβ–Œ | 382/2480 [01:44<08:41, 4.02it/s]
780
  15%|β–ˆβ–Œ | 383/2480 [01:44<08:20, 4.19it/s]
781
  15%|β–ˆβ–Œ | 384/2480 [01:44<08:18, 4.21it/s]
782
  16%|β–ˆβ–Œ | 385/2480 [01:44<08:01, 4.35it/s]
783
  16%|β–ˆβ–Œ | 386/2480 [01:45<08:05, 4.32it/s]
784
  16%|β–ˆβ–Œ | 387/2480 [01:45<08:09, 4.28it/s]
785
  16%|β–ˆβ–Œ | 388/2480 [01:45<07:40, 4.54it/s]
786
  16%|β–ˆβ–Œ | 389/2480 [01:45<07:19, 4.76it/s]
787
  16%|β–ˆβ–Œ | 390/2480 [01:46<08:15, 4.21it/s]
788
  16%|β–ˆβ–Œ | 391/2480 [01:46<08:16, 4.21it/s]
789
  16%|β–ˆβ–Œ | 392/2480 [01:46<08:03, 4.32it/s]
790
  16%|β–ˆβ–Œ | 393/2480 [01:46<07:37, 4.56it/s]
791
  16%|β–ˆβ–Œ | 394/2480 [01:46<07:51, 4.42it/s]
792
  16%|β–ˆβ–Œ | 395/2480 [01:47<07:30, 4.63it/s]
793
  16%|β–ˆβ–Œ | 396/2480 [01:47<07:22, 4.71it/s]
794
  16%|β–ˆβ–Œ | 397/2480 [01:47<07:09, 4.85it/s]
795
  16%|β–ˆβ–Œ | 398/2480 [01:47<07:15, 4.78it/s]
796
  16%|β–ˆβ–Œ | 399/2480 [01:47<07:29, 4.63it/s]
797
  16%|β–ˆβ–Œ | 400/2480 [01:48<08:38, 4.01it/s]
798
  16%|β–ˆβ–Œ | 401/2480 [01:48<08:26, 4.11it/s]
799
  16%|β–ˆβ–Œ | 402/2480 [01:48<08:07, 4.26it/s]
800
  16%|β–ˆβ–‹ | 403/2480 [01:49<09:51, 3.51it/s]
801
  16%|β–ˆβ–‹ | 404/2480 [01:49<09:03, 3.82it/s]
802
  16%|β–ˆβ–‹ | 405/2480 [01:49<09:01, 3.83it/s]
803
  16%|β–ˆβ–‹ | 406/2480 [01:49<08:35, 4.02it/s]
804
  16%|β–ˆβ–‹ | 407/2480 [01:50<09:48, 3.52it/s]
805
  16%|β–ˆβ–‹ | 408/2480 [01:50<10:05, 3.42it/s]
806
  16%|β–ˆβ–‹ | 409/2480 [01:50<09:18, 3.71it/s]
807
  17%|β–ˆβ–‹ | 410/2480 [01:50<08:44, 3.95it/s]
808
  17%|β–ˆβ–‹ | 411/2480 [01:51<09:09, 3.76it/s]
809
  17%|β–ˆβ–‹ | 412/2480 [01:51<09:02, 3.81it/s]
810
  17%|β–ˆβ–‹ | 413/2480 [01:51<08:49, 3.90it/s]
811
  17%|β–ˆβ–‹ | 414/2480 [01:51<08:50, 3.89it/s]
812
  17%|β–ˆβ–‹ | 415/2480 [01:52<08:39, 3.97it/s]
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13556e6c97b2f39e25d5830ab0bc61ce81f807bcf643d150d23dd97c2f606c57
3
+ size 5240
vocab.json ADDED
The diff for this file is too large to render. See raw diff