Training in progress, epoch 1
Browse files- config.json +39 -0
- merges.txt +0 -0
- model.safetensors +3 -0
- special_tokens_map.json +51 -0
- tb/events.out.tfevents.1725474455.a5c501872057.1590.0 +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +58 -0
- train.log +355 -0
- training_args.bin +3 -0
- vocab.json +0 -0
config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
3 |
+
"architectures": [
|
4 |
+
"RobertaForTokenClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"finetuning_task": "ner",
|
11 |
+
"gradient_checkpointing": false,
|
12 |
+
"hidden_act": "gelu",
|
13 |
+
"hidden_dropout_prob": 0.1,
|
14 |
+
"hidden_size": 768,
|
15 |
+
"id2label": {
|
16 |
+
"0": "O",
|
17 |
+
"1": "B-SINTOMA",
|
18 |
+
"2": "I-SINTOMA"
|
19 |
+
},
|
20 |
+
"initializer_range": 0.02,
|
21 |
+
"intermediate_size": 3072,
|
22 |
+
"label2id": {
|
23 |
+
"B-SINTOMA": 1,
|
24 |
+
"I-SINTOMA": 2,
|
25 |
+
"O": 0
|
26 |
+
},
|
27 |
+
"layer_norm_eps": 1e-05,
|
28 |
+
"max_position_embeddings": 514,
|
29 |
+
"model_type": "roberta",
|
30 |
+
"num_attention_heads": 12,
|
31 |
+
"num_hidden_layers": 12,
|
32 |
+
"pad_token_id": 1,
|
33 |
+
"position_embedding_type": "absolute",
|
34 |
+
"torch_dtype": "float32",
|
35 |
+
"transformers_version": "4.44.2",
|
36 |
+
"type_vocab_size": 1,
|
37 |
+
"use_cache": true,
|
38 |
+
"vocab_size": 50262
|
39 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ec4ff3150f8b54e9dc6cbef1e83df920e2d9ac69d5474829b2a565f66fd6b29
|
3 |
+
size 496244100
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": true,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": true,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": true,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": true,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": true,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tb/events.out.tfevents.1725474455.a5c501872057.1590.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:678f600fa33f4f3e68d8868c24bbc8754fe1267151c8964a43ea17d33cabab3d
|
3 |
+
size 5645
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": true,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"0": {
|
5 |
+
"content": "<s>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": true,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"1": {
|
13 |
+
"content": "<pad>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": true,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"2": {
|
21 |
+
"content": "</s>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": true,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"3": {
|
29 |
+
"content": "<unk>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
},
|
36 |
+
"50261": {
|
37 |
+
"content": "<mask>",
|
38 |
+
"lstrip": true,
|
39 |
+
"normalized": true,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": true
|
43 |
+
}
|
44 |
+
},
|
45 |
+
"bos_token": "<s>",
|
46 |
+
"clean_up_tokenization_spaces": true,
|
47 |
+
"cls_token": "<s>",
|
48 |
+
"eos_token": "</s>",
|
49 |
+
"errors": "replace",
|
50 |
+
"mask_token": "<mask>",
|
51 |
+
"max_len": 512,
|
52 |
+
"model_max_length": 512,
|
53 |
+
"pad_token": "<pad>",
|
54 |
+
"sep_token": "</s>",
|
55 |
+
"tokenizer_class": "RobertaTokenizer",
|
56 |
+
"trim_offsets": true,
|
57 |
+
"unk_token": "<unk>"
|
58 |
+
}
|
train.log
ADDED
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
0 |
0%| | 0/2480 [00:00<?, ?it/s]
|
1 |
0%| | 1/2480 [00:01<1:07:07, 1.62s/it]
|
2 |
0%| | 2/2480 [00:01<33:03, 1.25it/s]
|
3 |
0%| | 3/2480 [00:02<24:43, 1.67it/s]
|
4 |
0%| | 4/2480 [00:02<19:24, 2.13it/s]
|
5 |
0%| | 5/2480 [00:02<15:23, 2.68it/s]
|
6 |
0%| | 6/2480 [00:02<13:36, 3.03it/s]
|
7 |
0%| | 7/2480 [00:03<11:58, 3.44it/s]
|
8 |
0%| | 8/2480 [00:03<10:57, 3.76it/s]
|
9 |
0%| | 9/2480 [00:03<10:29, 3.93it/s]
|
10 |
0%| | 10/2480 [00:03<11:55, 3.45it/s]
|
11 |
0%| | 11/2480 [00:04<12:31, 3.28it/s]
|
12 |
0%| | 12/2480 [00:04<11:38, 3.53it/s]
|
13 |
1%| | 13/2480 [00:04<11:55, 3.45it/s]
|
14 |
1%| | 14/2480 [00:05<10:50, 3.79it/s]
|
15 |
1%| | 15/2480 [00:05<10:23, 3.95it/s]
|
16 |
1%| | 16/2480 [00:05<10:14, 4.01it/s]
|
17 |
1%| | 17/2480 [00:05<10:07, 4.05it/s]
|
18 |
1%| | 18/2480 [00:05<09:43, 4.22it/s]
|
19 |
1%| | 19/2480 [00:06<10:53, 3.77it/s]
|
20 |
1%| | 20/2480 [00:06<10:34, 3.88it/s]
|
21 |
1%| | 21/2480 [00:06<10:42, 3.83it/s]
|
22 |
1%| | 22/2480 [00:06<10:01, 4.09it/s]
|
23 |
1%| | 23/2480 [00:07<09:20, 4.39it/s]
|
24 |
1%| | 24/2480 [00:07<08:53, 4.60it/s]
|
25 |
1%| | 25/2480 [00:07<08:51, 4.62it/s]
|
26 |
1%| | 26/2480 [00:07<08:54, 4.59it/s]
|
27 |
1%| | 27/2480 [00:08<09:03, 4.51it/s]
|
28 |
1%| | 28/2480 [00:08<09:00, 4.54it/s]
|
29 |
1%| | 29/2480 [00:08<09:09, 4.46it/s]
|
30 |
1%| | 30/2480 [00:08<09:54, 4.12it/s]
|
31 |
1%|β | 31/2480 [00:08<09:15, 4.41it/s]
|
32 |
1%|β | 32/2480 [00:09<09:01, 4.52it/s]
|
33 |
1%|β | 33/2480 [00:09<09:02, 4.51it/s]
|
34 |
1%|β | 34/2480 [00:09<09:19, 4.37it/s]
|
35 |
1%|β | 35/2480 [00:09<09:42, 4.20it/s]
|
36 |
1%|β | 36/2480 [00:10<09:13, 4.42it/s]
|
37 |
1%|β | 37/2480 [00:10<12:51, 3.16it/s]
|
38 |
2%|β | 38/2480 [00:10<11:34, 3.52it/s]
|
39 |
2%|β | 39/2480 [00:11<11:25, 3.56it/s]
|
40 |
2%|β | 40/2480 [00:11<10:18, 3.95it/s]
|
41 |
2%|β | 41/2480 [00:11<09:34, 4.25it/s]
|
42 |
2%|β | 42/2480 [00:11<09:16, 4.38it/s]
|
43 |
2%|β | 43/2480 [00:11<08:59, 4.52it/s]
|
44 |
2%|β | 44/2480 [00:12<10:12, 3.98it/s]
|
45 |
2%|β | 45/2480 [00:12<09:52, 4.11it/s]
|
46 |
2%|β | 46/2480 [00:12<11:42, 3.46it/s]
|
47 |
2%|β | 47/2480 [00:13<10:40, 3.80it/s]
|
48 |
2%|β | 48/2480 [00:13<10:11, 3.98it/s]
|
49 |
2%|β | 49/2480 [00:13<11:17, 3.59it/s]
|
50 |
2%|β | 50/2480 [00:13<11:50, 3.42it/s]
|
51 |
2%|β | 51/2480 [00:14<11:00, 3.68it/s]
|
52 |
2%|β | 52/2480 [00:14<10:50, 3.73it/s]
|
53 |
2%|β | 53/2480 [00:14<10:01, 4.04it/s]
|
54 |
2%|β | 54/2480 [00:15<12:09, 3.33it/s]
|
55 |
2%|β | 55/2480 [00:15<12:02, 3.36it/s]
|
56 |
2%|β | 56/2480 [00:15<10:46, 3.75it/s]
|
57 |
2%|β | 57/2480 [00:15<10:55, 3.70it/s]
|
58 |
2%|β | 58/2480 [00:16<10:10, 3.97it/s]
|
59 |
2%|β | 59/2480 [00:16<09:52, 4.09it/s]
|
60 |
2%|β | 60/2480 [00:16<09:39, 4.17it/s]
|
61 |
2%|β | 61/2480 [00:16<08:59, 4.48it/s]
|
62 |
2%|β | 62/2480 [00:16<08:44, 4.61it/s]
|
63 |
3%|β | 63/2480 [00:17<08:49, 4.56it/s]
|
64 |
3%|β | 64/2480 [00:17<10:09, 3.97it/s]
|
65 |
3%|β | 65/2480 [00:17<09:53, 4.07it/s]
|
66 |
3%|β | 66/2480 [00:17<10:26, 3.85it/s]
|
67 |
3%|β | 67/2480 [00:18<10:40, 3.77it/s]
|
68 |
3%|β | 68/2480 [00:18<10:02, 4.00it/s]
|
69 |
3%|β | 69/2480 [00:18<09:33, 4.21it/s]
|
70 |
3%|β | 70/2480 [00:18<09:27, 4.25it/s]
|
71 |
3%|β | 71/2480 [00:19<09:44, 4.12it/s]
|
72 |
3%|β | 72/2480 [00:19<10:20, 3.88it/s]
|
73 |
3%|β | 73/2480 [00:19<09:47, 4.10it/s]
|
74 |
3%|β | 74/2480 [00:20<11:34, 3.47it/s]
|
75 |
3%|β | 75/2480 [00:20<11:25, 3.51it/s]
|
76 |
3%|β | 76/2480 [00:20<11:33, 3.47it/s]
|
77 |
3%|β | 77/2480 [00:20<10:47, 3.71it/s]
|
78 |
3%|β | 78/2480 [00:21<10:10, 3.93it/s]
|
79 |
3%|β | 79/2480 [00:21<09:52, 4.05it/s]
|
80 |
3%|β | 80/2480 [00:21<09:47, 4.09it/s]
|
81 |
3%|β | 81/2480 [00:21<11:08, 3.59it/s]
|
82 |
3%|β | 82/2480 [00:22<10:41, 3.74it/s]
|
83 |
3%|β | 83/2480 [00:22<09:51, 4.05it/s]
|
84 |
3%|β | 84/2480 [00:22<10:24, 3.83it/s]
|
85 |
3%|β | 85/2480 [00:22<10:22, 3.85it/s]
|
86 |
3%|β | 86/2480 [00:23<09:52, 4.04it/s]
|
87 |
4%|β | 87/2480 [00:23<10:24, 3.83it/s]
|
88 |
4%|β | 88/2480 [00:23<09:52, 4.04it/s]
|
89 |
4%|β | 89/2480 [00:23<10:02, 3.97it/s]
|
90 |
4%|β | 90/2480 [00:24<11:55, 3.34it/s]
|
91 |
4%|β | 91/2480 [00:24<11:08, 3.57it/s]
|
92 |
4%|β | 92/2480 [00:24<11:00, 3.62it/s]
|
93 |
4%|β | 93/2480 [00:25<10:41, 3.72it/s]
|
94 |
4%|β | 94/2480 [00:25<10:17, 3.87it/s]
|
95 |
4%|β | 95/2480 [00:25<10:17, 3.86it/s]
|
96 |
4%|β | 96/2480 [00:25<09:22, 4.24it/s]
|
97 |
4%|β | 97/2480 [00:25<09:48, 4.05it/s]
|
98 |
4%|β | 98/2480 [00:26<08:58, 4.43it/s]
|
99 |
4%|β | 99/2480 [00:26<10:07, 3.92it/s]
|
100 |
4%|β | 100/2480 [00:26<09:27, 4.19it/s]
|
101 |
4%|β | 101/2480 [00:26<09:08, 4.33it/s]
|
102 |
4%|β | 102/2480 [00:27<09:17, 4.27it/s]
|
103 |
4%|β | 103/2480 [00:27<08:58, 4.41it/s]
|
104 |
4%|β | 104/2480 [00:27<08:55, 4.44it/s]
|
105 |
4%|β | 105/2480 [00:27<09:26, 4.19it/s]
|
106 |
4%|β | 106/2480 [00:28<09:53, 4.00it/s]
|
107 |
4%|β | 107/2480 [00:28<09:29, 4.17it/s]
|
108 |
4%|β | 108/2480 [00:28<09:57, 3.97it/s]
|
109 |
4%|β | 109/2480 [00:28<09:08, 4.32it/s]
|
110 |
4%|β | 110/2480 [00:29<09:11, 4.30it/s]
|
111 |
4%|β | 111/2480 [00:29<09:05, 4.34it/s]
|
112 |
5%|β | 112/2480 [00:29<08:44, 4.51it/s]
|
113 |
5%|β | 113/2480 [00:29<08:33, 4.61it/s]
|
114 |
5%|β | 114/2480 [00:29<08:39, 4.55it/s]
|
115 |
5%|β | 115/2480 [00:30<08:11, 4.81it/s]
|
116 |
5%|β | 116/2480 [00:30<08:29, 4.64it/s]
|
117 |
5%|β | 117/2480 [00:30<08:45, 4.50it/s]
|
118 |
5%|β | 118/2480 [00:30<10:20, 3.81it/s]
|
119 |
5%|β | 119/2480 [00:31<10:47, 3.65it/s]
|
120 |
5%|β | 120/2480 [00:31<10:03, 3.91it/s]
|
121 |
5%|β | 121/2480 [00:31<11:11, 3.51it/s]
|
122 |
5%|β | 122/2480 [00:31<10:07, 3.88it/s]
|
123 |
5%|β | 123/2480 [00:32<09:46, 4.02it/s]
|
124 |
5%|β | 124/2480 [00:32<09:41, 4.05it/s]
|
125 |
5%|β | 125/2480 [00:32<09:50, 3.99it/s]
|
126 |
5%|β | 126/2480 [00:32<10:29, 3.74it/s]
|
127 |
5%|β | 127/2480 [00:33<11:12, 3.50it/s]
|
128 |
5%|β | 128/2480 [00:33<11:23, 3.44it/s]
|
129 |
5%|β | 129/2480 [00:33<11:44, 3.34it/s]
|
130 |
5%|β | 130/2480 [00:34<10:48, 3.62it/s]
|
131 |
5%|β | 131/2480 [00:34<10:45, 3.64it/s]
|
132 |
5%|β | 132/2480 [00:34<12:09, 3.22it/s]
|
133 |
5%|β | 133/2480 [00:35<11:14, 3.48it/s]
|
134 |
5%|β | 134/2480 [00:35<10:11, 3.83it/s]
|
135 |
5%|β | 135/2480 [00:35<09:29, 4.12it/s]
|
136 |
5%|β | 136/2480 [00:35<08:55, 4.38it/s]
|
137 |
6%|β | 137/2480 [00:35<08:53, 4.39it/s]
|
138 |
6%|β | 138/2480 [00:36<09:30, 4.10it/s]
|
139 |
6%|β | 139/2480 [00:36<09:22, 4.16it/s]
|
140 |
6%|β | 140/2480 [00:36<09:46, 3.99it/s]
|
141 |
6%|β | 141/2480 [00:36<10:17, 3.79it/s]
|
142 |
6%|β | 142/2480 [00:37<09:19, 4.18it/s]
|
143 |
6%|β | 143/2480 [00:37<09:22, 4.15it/s]
|
144 |
6%|β | 144/2480 [00:37<09:46, 3.98it/s]
|
145 |
6%|β | 145/2480 [00:37<09:36, 4.05it/s]
|
146 |
6%|β | 146/2480 [00:38<09:16, 4.19it/s]
|
147 |
6%|β | 147/2480 [00:38<08:56, 4.35it/s]
|
148 |
6%|β | 148/2480 [00:38<09:01, 4.31it/s]
|
149 |
6%|β | 149/2480 [00:38<08:46, 4.43it/s]
|
150 |
6%|β | 150/2480 [00:39<09:03, 4.29it/s]
|
151 |
6%|β | 151/2480 [00:39<08:38, 4.50it/s]
|
152 |
6%|β | 152/2480 [00:39<08:57, 4.33it/s]
|
153 |
6%|β | 153/2480 [00:39<09:12, 4.21it/s]
|
154 |
6%|β | 154/2480 [00:39<09:25, 4.11it/s]
|
155 |
6%|β | 155/2480 [00:40<09:55, 3.91it/s]
|
156 |
6%|β | 156/2480 [00:40<09:35, 4.04it/s]
|
157 |
6%|β | 157/2480 [00:40<09:05, 4.26it/s]
|
158 |
6%|β | 158/2480 [00:40<08:57, 4.32it/s]
|
159 |
6%|β | 159/2480 [00:41<09:42, 3.98it/s]
|
160 |
6%|β | 160/2480 [00:41<09:12, 4.20it/s]
|
161 |
6%|β | 161/2480 [00:41<08:26, 4.58it/s]
|
162 |
7%|β | 162/2480 [00:41<08:04, 4.78it/s]
|
163 |
7%|β | 163/2480 [00:42<08:28, 4.56it/s]
|
164 |
7%|β | 164/2480 [00:42<09:54, 3.90it/s]
|
165 |
7%|β | 165/2480 [00:42<10:08, 3.81it/s]
|
166 |
7%|β | 166/2480 [00:42<09:49, 3.93it/s]
|
167 |
7%|β | 167/2480 [00:43<09:09, 4.21it/s]
|
168 |
7%|β | 168/2480 [00:43<09:38, 4.00it/s]
|
169 |
7%|β | 169/2480 [00:43<09:07, 4.22it/s]
|
170 |
7%|β | 170/2480 [00:43<09:17, 4.14it/s]
|
171 |
7%|β | 171/2480 [00:44<09:21, 4.11it/s]
|
172 |
7%|β | 172/2480 [00:44<09:06, 4.22it/s]
|
173 |
7%|β | 173/2480 [00:44<08:49, 4.36it/s]
|
174 |
7%|β | 174/2480 [00:44<09:14, 4.16it/s]
|
175 |
7%|β | 175/2480 [00:45<09:11, 4.18it/s]
|
176 |
7%|β | 176/2480 [00:45<08:46, 4.38it/s]
|
177 |
7%|β | 177/2480 [00:45<08:29, 4.52it/s]
|
178 |
7%|β | 178/2480 [00:45<08:04, 4.76it/s]
|
179 |
7%|β | 179/2480 [00:45<07:58, 4.81it/s]
|
180 |
7%|β | 180/2480 [00:46<08:13, 4.66it/s]
|
181 |
7%|β | 181/2480 [00:46<08:21, 4.58it/s]
|
182 |
7%|β | 182/2480 [00:46<08:41, 4.41it/s]
|
183 |
7%|β | 183/2480 [00:46<08:39, 4.42it/s]
|
184 |
7%|β | 184/2480 [00:46<08:42, 4.39it/s]
|
185 |
7%|β | 185/2480 [00:47<08:31, 4.48it/s]
|
186 |
8%|β | 186/2480 [00:47<08:22, 4.56it/s]
|
187 |
8%|β | 187/2480 [00:47<08:10, 4.68it/s]
|
188 |
8%|β | 188/2480 [00:47<08:16, 4.61it/s]
|
189 |
8%|β | 189/2480 [00:48<08:21, 4.57it/s]
|
190 |
8%|β | 190/2480 [00:48<08:52, 4.30it/s]
|
191 |
8%|β | 191/2480 [00:48<08:34, 4.45it/s]
|
192 |
8%|β | 192/2480 [00:48<08:58, 4.25it/s]
|
193 |
8%|β | 193/2480 [00:48<08:32, 4.47it/s]
|
194 |
8%|β | 194/2480 [00:49<09:04, 4.20it/s]
|
195 |
8%|β | 195/2480 [00:49<09:47, 3.89it/s]
|
196 |
8%|β | 196/2480 [00:49<09:36, 3.96it/s]
|
197 |
8%|β | 197/2480 [00:50<11:04, 3.44it/s]
|
198 |
8%|β | 198/2480 [00:50<10:01, 3.80it/s]
|
199 |
8%|β | 199/2480 [00:50<09:13, 4.12it/s]
|
200 |
8%|β | 200/2480 [00:50<08:43, 4.35it/s]
|
201 |
8%|β | 201/2480 [00:50<08:30, 4.47it/s]
|
202 |
8%|β | 202/2480 [00:51<08:31, 4.46it/s]
|
203 |
8%|β | 203/2480 [00:51<10:17, 3.69it/s]
|
204 |
8%|β | 204/2480 [00:51<10:29, 3.62it/s]
|
205 |
8%|β | 205/2480 [00:52<09:39, 3.93it/s]
|
206 |
8%|β | 206/2480 [00:52<09:18, 4.07it/s]
|
207 |
8%|β | 207/2480 [00:52<09:36, 3.94it/s]
|
208 |
8%|β | 208/2480 [00:52<10:23, 3.65it/s]
|
209 |
8%|β | 209/2480 [00:53<10:22, 3.65it/s]
|
210 |
8%|β | 210/2480 [00:53<09:54, 3.82it/s]
|
211 |
9%|β | 211/2480 [00:53<09:31, 3.97it/s]
|
212 |
9%|β | 212/2480 [00:53<09:37, 3.93it/s]
|
213 |
9%|β | 213/2480 [00:54<09:25, 4.01it/s]
|
214 |
9%|β | 214/2480 [00:54<09:16, 4.07it/s]
|
215 |
9%|β | 215/2480 [00:54<09:42, 3.89it/s]
|
216 |
9%|β | 216/2480 [00:54<09:37, 3.92it/s]
|
217 |
9%|β | 217/2480 [00:55<09:27, 3.99it/s]
|
218 |
9%|β | 218/2480 [00:55<09:32, 3.95it/s]
|
219 |
9%|β | 219/2480 [00:55<08:53, 4.24it/s]
|
220 |
9%|β | 220/2480 [00:55<10:10, 3.70it/s]
|
221 |
9%|β | 221/2480 [00:56<09:48, 3.84it/s]
|
222 |
9%|β | 222/2480 [00:56<08:54, 4.23it/s]
|
223 |
9%|β | 223/2480 [00:56<09:11, 4.09it/s]
|
224 |
9%|β | 224/2480 [00:56<09:01, 4.17it/s]
|
225 |
9%|β | 225/2480 [00:57<09:45, 3.85it/s]
|
226 |
9%|β | 226/2480 [00:57<09:24, 4.00it/s]
|
227 |
9%|β | 227/2480 [00:57<09:26, 3.98it/s]
|
228 |
9%|β | 228/2480 [00:57<08:58, 4.19it/s]
|
229 |
9%|β | 229/2480 [00:58<08:33, 4.39it/s]
|
230 |
9%|β | 230/2480 [00:58<08:21, 4.48it/s]
|
231 |
9%|β | 231/2480 [00:58<08:32, 4.39it/s]
|
232 |
9%|β | 232/2480 [00:58<09:05, 4.12it/s]
|
233 |
9%|β | 233/2480 [00:59<08:56, 4.19it/s]
|
234 |
9%|β | 234/2480 [00:59<09:25, 3.97it/s]
|
235 |
9%|β | 235/2480 [00:59<09:12, 4.07it/s]
|
236 |
10%|β | 236/2480 [00:59<10:24, 3.60it/s]
|
237 |
10%|β | 237/2480 [01:00<09:34, 3.90it/s]
|
238 |
10%|β | 238/2480 [01:00<09:06, 4.11it/s]
|
239 |
10%|β | 239/2480 [01:00<08:58, 4.16it/s]
|
240 |
10%|β | 240/2480 [01:00<08:21, 4.47it/s]
|
241 |
10%|β | 241/2480 [01:00<07:53, 4.73it/s]
|
242 |
10%|β | 242/2480 [01:01<08:08, 4.58it/s]
|
243 |
10%|β | 243/2480 [01:01<08:34, 4.35it/s]
|
244 |
10%|β | 244/2480 [01:01<09:04, 4.10it/s]
|
245 |
10%|β | 245/2480 [01:01<09:14, 4.03it/s]
|
246 |
10%|β | 246/2480 [01:02<08:50, 4.21it/s]
|
247 |
10%|β | 247/2480 [01:02<08:32, 4.36it/s]
|
248 |
10%|β | 248/2480 [01:02<07:59, 4.65it/s][INFO|trainer.py:811] 2024-09-04 18:28:37,832 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
|
|
|
|
|
|
|
|
|
|
|
|
249 |
0%| | 0/315 [00:00<?, ?it/s][A
|
|
|
250 |
3%|β | 9/315 [00:00<00:03, 87.62it/s][A
|
|
|
251 |
6%|β | 18/315 [00:00<00:03, 84.29it/s][A
|
|
|
252 |
9%|β | 27/315 [00:00<00:03, 82.00it/s][A
|
|
|
253 |
11%|ββ | 36/315 [00:00<00:03, 76.52it/s][A
|
|
|
254 |
14%|ββ | 45/315 [00:00<00:03, 78.83it/s][A
|
|
|
255 |
17%|ββ | 54/315 [00:00<00:03, 79.38it/s][A
|
|
|
256 |
20%|ββ | 63/315 [00:00<00:03, 78.44it/s][A
|
|
|
257 |
23%|βββ | 72/315 [00:00<00:03, 80.31it/s][A
|
|
|
258 |
26%|βββ | 81/315 [00:01<00:02, 79.22it/s][A
|
|
|
259 |
28%|βββ | 89/315 [00:01<00:02, 78.97it/s][A
|
|
|
260 |
31%|βββ | 97/315 [00:01<00:02, 78.20it/s][A
|
|
|
261 |
34%|ββββ | 106/315 [00:01<00:02, 80.06it/s][A
|
|
|
262 |
37%|ββββ | 115/315 [00:01<00:02, 81.29it/s][A
|
|
|
263 |
39%|ββββ | 124/315 [00:01<00:02, 78.38it/s][A
|
|
|
264 |
42%|βββββ | 132/315 [00:01<00:02, 78.59it/s][A
|
|
|
265 |
44%|βββββ | 140/315 [00:01<00:02, 77.86it/s][A
|
|
|
266 |
47%|βββββ | 148/315 [00:01<00:02, 75.27it/s][A
|
|
|
267 |
50%|βββββ | 156/315 [00:01<00:02, 73.46it/s][A
|
|
|
268 |
52%|ββββββ | 164/315 [00:02<00:02, 75.03it/s][A
|
|
|
269 |
55%|ββββββ | 173/315 [00:02<00:01, 76.99it/s][A
|
|
|
270 |
57%|ββββββ | 181/315 [00:02<00:01, 77.31it/s][A
|
|
|
271 |
60%|ββββββ | 190/315 [00:02<00:01, 78.87it/s][A
|
|
|
272 |
63%|βββββββ | 198/315 [00:02<00:01, 78.00it/s][A
|
|
|
273 |
65%|βββββββ | 206/315 [00:02<00:01, 78.04it/s][A
|
|
|
274 |
68%|βββββββ | 214/315 [00:02<00:01, 75.23it/s][A
|
|
|
275 |
70%|βββββββ | 222/315 [00:02<00:01, 73.53it/s][A
|
|
|
276 |
73%|ββββββββ | 230/315 [00:02<00:01, 72.57it/s][A
|
|
|
277 |
76%|ββββββββ | 239/315 [00:03<00:01, 74.60it/s][A
|
|
|
278 |
78%|ββββββββ | 247/315 [00:03<00:00, 74.88it/s][A
|
|
|
279 |
81%|βββββββββ | 256/315 [00:03<00:00, 76.66it/s][A
|
|
|
280 |
84%|βββββββββ | 265/315 [00:03<00:00, 78.61it/s][A
|
|
|
281 |
87%|βββββββββ | 274/315 [00:03<00:00, 81.06it/s][A
|
|
|
282 |
90%|βββββββββ | 283/315 [00:03<00:00, 82.28it/s][A
|
|
|
283 |
93%|ββββββββββ| 292/315 [00:03<00:00, 79.10it/s][A
|
|
|
284 |
96%|ββββββββββ| 301/315 [00:03<00:00, 80.22it/s][A
|
|
|
285 |
98%|ββββββββββ| 310/315 [00:03<00:00, 81.05it/s][A
|
286 |
|
|
|
287 |
|
288 |
10%|β | 248/2480 [01:08<07:59, 4.65it/s]
|
|
|
|
|
289 |
[A[INFO|trainer.py:3503] 2024-09-04 18:28:43,350 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-248
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
290 |
10%|β | 249/2480 [01:11<1:44:55, 2.82s/it]
|
291 |
10%|β | 250/2480 [01:11<1:15:54, 2.04s/it]
|
292 |
10%|β | 251/2480 [01:11<55:54, 1.51s/it]
|
293 |
10%|β | 252/2480 [01:12<41:42, 1.12s/it]
|
294 |
10%|β | 253/2480 [01:12<31:32, 1.18it/s]
|
295 |
10%|β | 254/2480 [01:12<24:51, 1.49it/s]
|
296 |
10%|β | 255/2480 [01:12<21:00, 1.77it/s]
|
297 |
10%|β | 256/2480 [01:13<17:50, 2.08it/s]
|
298 |
10%|β | 257/2480 [01:13<15:06, 2.45it/s]
|
299 |
10%|β | 258/2480 [01:13<12:55, 2.87it/s]
|
300 |
10%|β | 259/2480 [01:13<11:19, 3.27it/s]
|
301 |
10%|β | 260/2480 [01:14<10:42, 3.45it/s]
|
302 |
11%|β | 261/2480 [01:14<10:07, 3.65it/s]
|
303 |
11%|β | 262/2480 [01:14<09:25, 3.92it/s]
|
304 |
11%|β | 263/2480 [01:14<09:55, 3.72it/s]
|
305 |
11%|β | 264/2480 [01:15<10:10, 3.63it/s]
|
306 |
11%|β | 265/2480 [01:15<10:24, 3.55it/s]
|
307 |
11%|β | 266/2480 [01:15<09:56, 3.71it/s]
|
308 |
11%|β | 267/2480 [01:15<09:49, 3.75it/s]
|
309 |
11%|β | 268/2480 [01:16<09:37, 3.83it/s]
|
310 |
11%|β | 269/2480 [01:16<09:43, 3.79it/s]
|
311 |
11%|β | 270/2480 [01:16<09:21, 3.94it/s]
|
312 |
11%|β | 271/2480 [01:16<09:51, 3.74it/s]
|
313 |
11%|β | 272/2480 [01:17<09:45, 3.77it/s]
|
314 |
11%|β | 273/2480 [01:17<09:37, 3.82it/s]
|
315 |
11%|β | 274/2480 [01:17<09:15, 3.97it/s]
|
316 |
11%|β | 275/2480 [01:18<09:48, 3.75it/s]
|
317 |
11%|β | 276/2480 [01:18<09:30, 3.86it/s]
|
318 |
11%|β | 277/2480 [01:18<08:50, 4.15it/s]
|
319 |
11%|β | 278/2480 [01:18<08:05, 4.53it/s]
|
320 |
11%|ββ | 279/2480 [01:18<09:13, 3.98it/s]
|
321 |
11%|ββ | 280/2480 [01:19<08:55, 4.11it/s]
|
322 |
11%|ββ | 281/2480 [01:19<08:30, 4.31it/s]
|
323 |
11%|ββ | 282/2480 [01:19<08:31, 4.29it/s]
|
324 |
11%|ββ | 283/2480 [01:19<08:42, 4.21it/s]
|
325 |
11%|ββ | 284/2480 [01:20<08:57, 4.09it/s]
|
326 |
11%|ββ | 285/2480 [01:20<08:46, 4.17it/s]
|
327 |
12%|ββ | 286/2480 [01:20<08:15, 4.43it/s]
|
328 |
12%|ββ | 287/2480 [01:20<07:45, 4.71it/s]
|
329 |
12%|ββ | 288/2480 [01:21<08:39, 4.22it/s]
|
330 |
12%|ββ | 289/2480 [01:21<09:57, 3.67it/s]
|
331 |
12%|ββ | 290/2480 [01:21<09:34, 3.81it/s]
|
332 |
12%|ββ | 291/2480 [01:21<09:32, 3.82it/s]
|
333 |
12%|ββ | 292/2480 [01:22<09:35, 3.80it/s]
|
334 |
12%|ββ | 293/2480 [01:22<09:23, 3.88it/s]
|
335 |
12%|ββ | 294/2480 [01:22<08:56, 4.08it/s]
|
336 |
12%|ββ | 295/2480 [01:22<09:27, 3.85it/s]
|
337 |
12%|ββ | 296/2480 [01:23<08:49, 4.12it/s]
|
338 |
12%|ββ | 297/2480 [01:23<08:55, 4.08it/s]
|
339 |
12%|ββ | 298/2480 [01:23<08:33, 4.25it/s]
|
340 |
12%|ββ | 299/2480 [01:23<09:39, 3.76it/s]
|
341 |
12%|ββ | 300/2480 [01:24<09:25, 3.86it/s]
|
342 |
12%|ββ | 301/2480 [01:24<08:34, 4.23it/s]
|
343 |
12%|ββ | 302/2480 [01:24<07:59, 4.54it/s]
|
344 |
12%|ββ | 303/2480 [01:25<11:53, 3.05it/s]
|
345 |
12%|ββ | 304/2480 [01:25<11:51, 3.06it/s]
|
346 |
12%|ββ | 305/2480 [01:25<10:15, 3.54it/s]
|
347 |
12%|ββ | 306/2480 [01:25<09:49, 3.69it/s]
|
348 |
12%|ββ | 307/2480 [01:26<09:18, 3.89it/s]
|
349 |
12%|ββ | 308/2480 [01:26<09:14, 3.92it/s]
|
350 |
12%|ββ | 309/2480 [01:26<08:48, 4.11it/s]
|
351 |
12%|ββ | 310/2480 [01:26<08:11, 4.42it/s]
|
352 |
13%|ββ | 311/2480 [01:26<07:49, 4.62it/s]
|
353 |
13%|ββ | 312/2480 [01:27<07:32, 4.79it/s]
|
354 |
13%|ββ | 313/2480 [01:27<07:56, 4.55it/s]
|
355 |
13%|ββ | 314/2480 [01:27<07:45, 4.65it/s]
|
356 |
13%|ββ | 315/2480 [01:27<09:12, 3.92it/s]
|
357 |
13%|ββ | 316/2480 [01:28<09:02, 3.99it/s]
|
358 |
13%|ββ | 317/2480 [01:28<08:47, 4.10it/s]
|
359 |
13%|ββ | 318/2480 [01:28<09:01, 3.99it/s]
|
360 |
13%|ββ | 319/2480 [01:28<08:19, 4.32it/s]
|
361 |
13%|ββ | 320/2480 [01:29<08:30, 4.23it/s]
|
362 |
13%|ββ | 321/2480 [01:29<08:24, 4.28it/s]
|
363 |
13%|ββ | 322/2480 [01:29<09:15, 3.89it/s]
|
364 |
13%|ββ | 323/2480 [01:29<09:26, 3.81it/s]
|
365 |
13%|ββ | 324/2480 [01:30<10:55, 3.29it/s]
|
366 |
13%|ββ | 325/2480 [01:30<09:29, 3.78it/s]
|
367 |
13%|ββ | 326/2480 [01:30<11:22, 3.16it/s]
|
368 |
13%|ββ | 327/2480 [01:31<10:10, 3.52it/s]
|
369 |
13%|ββ | 328/2480 [01:31<09:23, 3.82it/s]
|
370 |
13%|ββ | 329/2480 [01:31<09:28, 3.79it/s]
|
371 |
13%|ββ | 330/2480 [01:31<09:19, 3.84it/s]
|
372 |
13%|ββ | 331/2480 [01:32<08:50, 4.05it/s]
|
373 |
13%|ββ | 332/2480 [01:32<08:49, 4.05it/s]
|
374 |
13%|ββ | 333/2480 [01:32<09:10, 3.90it/s]
|
375 |
13%|ββ | 334/2480 [01:32<08:54, 4.02it/s]
|
376 |
14%|ββ | 335/2480 [01:33<09:15, 3.86it/s]
|
377 |
14%|ββ | 336/2480 [01:33<08:58, 3.98it/s]
|
378 |
14%|ββ | 337/2480 [01:33<08:27, 4.22it/s]
|
379 |
14%|ββ | 338/2480 [01:33<09:11, 3.89it/s]
|
380 |
14%|ββ | 339/2480 [01:34<08:56, 3.99it/s]
|
381 |
14%|ββ | 340/2480 [01:34<08:26, 4.22it/s]
|
382 |
14%|ββ | 341/2480 [01:34<08:03, 4.42it/s]
|
383 |
14%|ββ | 342/2480 [01:34<08:36, 4.14it/s]
|
384 |
14%|ββ | 343/2480 [01:35<08:30, 4.18it/s]
|
385 |
14%|ββ | 344/2480 [01:35<09:19, 3.81it/s]
|
386 |
14%|ββ | 345/2480 [01:35<08:34, 4.15it/s]
|
387 |
14%|ββ | 346/2480 [01:35<08:08, 4.36it/s]
|
388 |
14%|ββ | 347/2480 [01:35<07:56, 4.48it/s]
|
389 |
14%|ββ | 348/2480 [01:36<07:23, 4.81it/s]
|
390 |
14%|ββ | 349/2480 [01:36<07:13, 4.91it/s]
|
391 |
14%|ββ | 350/2480 [01:36<07:20, 4.84it/s]
|
392 |
14%|ββ | 351/2480 [01:36<07:24, 4.78it/s]
|
393 |
14%|ββ | 352/2480 [01:36<08:01, 4.42it/s]
|
394 |
14%|ββ | 353/2480 [01:37<08:36, 4.11it/s]
|
395 |
14%|ββ | 354/2480 [01:37<08:28, 4.18it/s]
|
396 |
14%|ββ | 355/2480 [01:37<08:44, 4.05it/s]
|
397 |
14%|ββ | 356/2480 [01:37<07:58, 4.44it/s]
|
398 |
14%|ββ | 357/2480 [01:38<08:08, 4.35it/s]
|
399 |
14%|ββ | 358/2480 [01:38<07:42, 4.59it/s]
|
400 |
14%|ββ | 359/2480 [01:38<07:46, 4.55it/s]
|
401 |
15%|ββ | 360/2480 [01:38<07:13, 4.89it/s]
|
402 |
15%|ββ | 361/2480 [01:39<07:37, 4.63it/s]
|
403 |
15%|ββ | 362/2480 [01:39<08:03, 4.38it/s]
|
404 |
15%|ββ | 363/2480 [01:39<08:08, 4.34it/s]
|
405 |
15%|ββ | 364/2480 [01:39<08:10, 4.31it/s]
|
406 |
15%|ββ | 365/2480 [01:39<07:53, 4.46it/s]
|
407 |
15%|ββ | 366/2480 [01:40<08:12, 4.29it/s]
|
408 |
15%|ββ | 367/2480 [01:40<07:50, 4.49it/s]
|
409 |
15%|ββ | 368/2480 [01:40<09:52, 3.56it/s]
|
410 |
15%|ββ | 369/2480 [01:41<09:27, 3.72it/s]
|
411 |
15%|ββ | 370/2480 [01:41<09:19, 3.77it/s]
|
412 |
15%|ββ | 371/2480 [01:41<09:02, 3.89it/s]
|
413 |
15%|ββ | 372/2480 [01:41<08:39, 4.06it/s]
|
414 |
15%|ββ | 373/2480 [01:42<09:22, 3.75it/s]
|
415 |
15%|ββ | 374/2480 [01:42<08:36, 4.08it/s]
|
416 |
15%|ββ | 375/2480 [01:42<09:23, 3.74it/s]
|
417 |
15%|ββ | 376/2480 [01:42<08:49, 3.97it/s]
|
418 |
15%|ββ | 377/2480 [01:43<08:18, 4.22it/s]
|
419 |
15%|ββ | 378/2480 [01:43<07:47, 4.49it/s]
|
420 |
15%|ββ | 379/2480 [01:43<08:04, 4.34it/s]
|
421 |
15%|ββ | 380/2480 [01:43<08:11, 4.27it/s]
|
422 |
15%|ββ | 381/2480 [01:43<08:26, 4.14it/s]
|
423 |
15%|ββ | 382/2480 [01:44<08:41, 4.02it/s]
|
424 |
15%|ββ | 383/2480 [01:44<08:20, 4.19it/s]
|
425 |
15%|ββ | 384/2480 [01:44<08:18, 4.21it/s]
|
426 |
16%|ββ | 385/2480 [01:44<08:01, 4.35it/s]
|
427 |
16%|ββ | 386/2480 [01:45<08:05, 4.32it/s]
|
428 |
16%|ββ | 387/2480 [01:45<08:09, 4.28it/s]
|
429 |
16%|ββ | 388/2480 [01:45<07:40, 4.54it/s]
|
430 |
16%|ββ | 389/2480 [01:45<07:19, 4.76it/s]
|
431 |
16%|ββ | 390/2480 [01:46<08:15, 4.21it/s]
|
432 |
16%|ββ | 391/2480 [01:46<08:16, 4.21it/s]
|
433 |
16%|ββ | 392/2480 [01:46<08:03, 4.32it/s]
|
434 |
16%|ββ | 393/2480 [01:46<07:37, 4.56it/s]
|
435 |
16%|ββ | 394/2480 [01:46<07:51, 4.42it/s]
|
436 |
16%|ββ | 395/2480 [01:47<07:30, 4.63it/s]
|
437 |
16%|ββ | 396/2480 [01:47<07:22, 4.71it/s]
|
438 |
16%|ββ | 397/2480 [01:47<07:09, 4.85it/s]
|
439 |
16%|ββ | 398/2480 [01:47<07:15, 4.78it/s]
|
440 |
16%|ββ | 399/2480 [01:47<07:29, 4.63it/s]
|
441 |
16%|ββ | 400/2480 [01:48<08:38, 4.01it/s]
|
442 |
16%|ββ | 401/2480 [01:48<08:26, 4.11it/s]
|
443 |
16%|ββ | 402/2480 [01:48<08:07, 4.26it/s]
|
444 |
16%|ββ | 403/2480 [01:49<09:51, 3.51it/s]
|
445 |
16%|ββ | 404/2480 [01:49<09:03, 3.82it/s]
|
446 |
16%|ββ | 405/2480 [01:49<09:01, 3.83it/s]
|
447 |
16%|ββ | 406/2480 [01:49<08:35, 4.02it/s]
|
448 |
16%|ββ | 407/2480 [01:50<09:48, 3.52it/s]
|
449 |
16%|ββ | 408/2480 [01:50<10:05, 3.42it/s]
|
450 |
16%|ββ | 409/2480 [01:50<09:18, 3.71it/s]
|
451 |
17%|ββ | 410/2480 [01:50<08:44, 3.95it/s]
|
452 |
17%|ββ | 411/2480 [01:51<09:09, 3.76it/s]
|
453 |
17%|ββ | 412/2480 [01:51<09:02, 3.81it/s]
|
454 |
17%|ββ | 413/2480 [01:51<08:49, 3.90it/s]
|
455 |
17%|ββ | 414/2480 [01:51<08:50, 3.89it/s]
|
456 |
17%|ββ | 415/2480 [01:52<08:39, 3.97it/s]
|
|
|
1 |
+
2024-09-04 18:26:58.019800: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
|
2 |
+
2024-09-04 18:26:58.038161: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
|
3 |
+
2024-09-04 18:26:58.059897: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
|
4 |
+
2024-09-04 18:26:58.066439: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
|
5 |
+
2024-09-04 18:26:58.082659: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
|
6 |
+
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
|
7 |
+
2024-09-04 18:26:59.362821: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
|
8 |
+
/usr/local/lib/python3.10/dist-packages/transformers/training_args.py:1525: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of π€ Transformers. Use `eval_strategy` instead
|
9 |
+
warnings.warn(
|
10 |
+
09/04/2024 18:27:00 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1distributed training: True, 16-bits training: False
|
11 |
+
09/04/2024 18:27:00 - INFO - __main__ - Training/evaluation parameters TrainingArguments(
|
12 |
+
_n_gpu=1,
|
13 |
+
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
|
14 |
+
adafactor=False,
|
15 |
+
adam_beta1=0.9,
|
16 |
+
adam_beta2=0.999,
|
17 |
+
adam_epsilon=1e-08,
|
18 |
+
auto_find_batch_size=False,
|
19 |
+
batch_eval_metrics=False,
|
20 |
+
bf16=False,
|
21 |
+
bf16_full_eval=False,
|
22 |
+
data_seed=None,
|
23 |
+
dataloader_drop_last=False,
|
24 |
+
dataloader_num_workers=0,
|
25 |
+
dataloader_persistent_workers=False,
|
26 |
+
dataloader_pin_memory=True,
|
27 |
+
dataloader_prefetch_factor=None,
|
28 |
+
ddp_backend=None,
|
29 |
+
ddp_broadcast_buffers=None,
|
30 |
+
ddp_bucket_cap_mb=None,
|
31 |
+
ddp_find_unused_parameters=None,
|
32 |
+
ddp_timeout=1800,
|
33 |
+
debug=[],
|
34 |
+
deepspeed=None,
|
35 |
+
disable_tqdm=False,
|
36 |
+
dispatch_batches=None,
|
37 |
+
do_eval=True,
|
38 |
+
do_predict=True,
|
39 |
+
do_train=True,
|
40 |
+
eval_accumulation_steps=None,
|
41 |
+
eval_delay=0,
|
42 |
+
eval_do_concat_batches=True,
|
43 |
+
eval_on_start=False,
|
44 |
+
eval_steps=None,
|
45 |
+
eval_strategy=epoch,
|
46 |
+
eval_use_gather_object=False,
|
47 |
+
evaluation_strategy=epoch,
|
48 |
+
fp16=False,
|
49 |
+
fp16_backend=auto,
|
50 |
+
fp16_full_eval=False,
|
51 |
+
fp16_opt_level=O1,
|
52 |
+
fsdp=[],
|
53 |
+
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
|
54 |
+
fsdp_min_num_params=0,
|
55 |
+
fsdp_transformer_layer_cls_to_wrap=None,
|
56 |
+
full_determinism=False,
|
57 |
+
gradient_accumulation_steps=2,
|
58 |
+
gradient_checkpointing=False,
|
59 |
+
gradient_checkpointing_kwargs=None,
|
60 |
+
greater_is_better=True,
|
61 |
+
group_by_length=False,
|
62 |
+
half_precision_backend=auto,
|
63 |
+
hub_always_push=False,
|
64 |
+
hub_model_id=None,
|
65 |
+
hub_private_repo=False,
|
66 |
+
hub_strategy=every_save,
|
67 |
+
hub_token=<HUB_TOKEN>,
|
68 |
+
ignore_data_skip=False,
|
69 |
+
include_inputs_for_metrics=False,
|
70 |
+
include_num_input_tokens_seen=False,
|
71 |
+
include_tokens_per_second=False,
|
72 |
+
jit_mode_eval=False,
|
73 |
+
label_names=None,
|
74 |
+
label_smoothing_factor=0.0,
|
75 |
+
learning_rate=5e-05,
|
76 |
+
length_column_name=length,
|
77 |
+
load_best_model_at_end=True,
|
78 |
+
local_rank=0,
|
79 |
+
log_level=passive,
|
80 |
+
log_level_replica=warning,
|
81 |
+
log_on_each_node=True,
|
82 |
+
logging_dir=/content/dissertation/scripts/ner/output/tb,
|
83 |
+
logging_first_step=False,
|
84 |
+
logging_nan_inf_filter=True,
|
85 |
+
logging_steps=500,
|
86 |
+
logging_strategy=steps,
|
87 |
+
lr_scheduler_kwargs={},
|
88 |
+
lr_scheduler_type=linear,
|
89 |
+
max_grad_norm=1.0,
|
90 |
+
max_steps=-1,
|
91 |
+
metric_for_best_model=f1,
|
92 |
+
mp_parameters=,
|
93 |
+
neftune_noise_alpha=None,
|
94 |
+
no_cuda=False,
|
95 |
+
num_train_epochs=10.0,
|
96 |
+
optim=adamw_torch,
|
97 |
+
optim_args=None,
|
98 |
+
optim_target_modules=None,
|
99 |
+
output_dir=/content/dissertation/scripts/ner/output,
|
100 |
+
overwrite_output_dir=True,
|
101 |
+
past_index=-1,
|
102 |
+
per_device_eval_batch_size=8,
|
103 |
+
per_device_train_batch_size=32,
|
104 |
+
prediction_loss_only=False,
|
105 |
+
push_to_hub=True,
|
106 |
+
push_to_hub_model_id=None,
|
107 |
+
push_to_hub_organization=None,
|
108 |
+
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
|
109 |
+
ray_scope=last,
|
110 |
+
remove_unused_columns=True,
|
111 |
+
report_to=['tensorboard'],
|
112 |
+
restore_callback_states_from_checkpoint=False,
|
113 |
+
resume_from_checkpoint=None,
|
114 |
+
run_name=/content/dissertation/scripts/ner/output,
|
115 |
+
save_on_each_node=False,
|
116 |
+
save_only_model=False,
|
117 |
+
save_safetensors=True,
|
118 |
+
save_steps=500,
|
119 |
+
save_strategy=epoch,
|
120 |
+
save_total_limit=None,
|
121 |
+
seed=42,
|
122 |
+
skip_memory_metrics=True,
|
123 |
+
split_batches=None,
|
124 |
+
tf32=None,
|
125 |
+
torch_compile=False,
|
126 |
+
torch_compile_backend=None,
|
127 |
+
torch_compile_mode=None,
|
128 |
+
torch_empty_cache_steps=None,
|
129 |
+
torchdynamo=None,
|
130 |
+
tpu_metrics_debug=False,
|
131 |
+
tpu_num_cores=None,
|
132 |
+
use_cpu=False,
|
133 |
+
use_ipex=False,
|
134 |
+
use_legacy_prediction_loop=False,
|
135 |
+
use_mps_device=False,
|
136 |
+
warmup_ratio=0.0,
|
137 |
+
warmup_steps=0,
|
138 |
+
weight_decay=0.0,
|
139 |
+
)
|
140 |
+
|
141 |
+
|
142 |
+
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
|
147 |
+
[INFO|configuration_utils.py:733] 2024-09-04 18:27:16,977 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
148 |
+
[INFO|configuration_utils.py:800] 2024-09-04 18:27:16,984 >> Model config RobertaConfig {
|
149 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
150 |
+
"architectures": [
|
151 |
+
"RobertaForMaskedLM"
|
152 |
+
],
|
153 |
+
"attention_probs_dropout_prob": 0.1,
|
154 |
+
"bos_token_id": 0,
|
155 |
+
"classifier_dropout": null,
|
156 |
+
"eos_token_id": 2,
|
157 |
+
"finetuning_task": "ner",
|
158 |
+
"gradient_checkpointing": false,
|
159 |
+
"hidden_act": "gelu",
|
160 |
+
"hidden_dropout_prob": 0.1,
|
161 |
+
"hidden_size": 768,
|
162 |
+
"id2label": {
|
163 |
+
"0": "O",
|
164 |
+
"1": "B-SINTOMA",
|
165 |
+
"2": "I-SINTOMA"
|
166 |
+
},
|
167 |
+
"initializer_range": 0.02,
|
168 |
+
"intermediate_size": 3072,
|
169 |
+
"label2id": {
|
170 |
+
"B-SINTOMA": 1,
|
171 |
+
"I-SINTOMA": 2,
|
172 |
+
"O": 0
|
173 |
+
},
|
174 |
+
"layer_norm_eps": 1e-05,
|
175 |
+
"max_position_embeddings": 514,
|
176 |
+
"model_type": "roberta",
|
177 |
+
"num_attention_heads": 12,
|
178 |
+
"num_hidden_layers": 12,
|
179 |
+
"pad_token_id": 1,
|
180 |
+
"position_embedding_type": "absolute",
|
181 |
+
"transformers_version": "4.44.2",
|
182 |
+
"type_vocab_size": 1,
|
183 |
+
"use_cache": true,
|
184 |
+
"vocab_size": 50262
|
185 |
+
}
|
186 |
+
|
187 |
+
[INFO|configuration_utils.py:733] 2024-09-04 18:27:17,622 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
188 |
+
[INFO|configuration_utils.py:800] 2024-09-04 18:27:17,623 >> Model config RobertaConfig {
|
189 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
190 |
+
"architectures": [
|
191 |
+
"RobertaForMaskedLM"
|
192 |
+
],
|
193 |
+
"attention_probs_dropout_prob": 0.1,
|
194 |
+
"bos_token_id": 0,
|
195 |
+
"classifier_dropout": null,
|
196 |
+
"eos_token_id": 2,
|
197 |
+
"gradient_checkpointing": false,
|
198 |
+
"hidden_act": "gelu",
|
199 |
+
"hidden_dropout_prob": 0.1,
|
200 |
+
"hidden_size": 768,
|
201 |
+
"initializer_range": 0.02,
|
202 |
+
"intermediate_size": 3072,
|
203 |
+
"layer_norm_eps": 1e-05,
|
204 |
+
"max_position_embeddings": 514,
|
205 |
+
"model_type": "roberta",
|
206 |
+
"num_attention_heads": 12,
|
207 |
+
"num_hidden_layers": 12,
|
208 |
+
"pad_token_id": 1,
|
209 |
+
"position_embedding_type": "absolute",
|
210 |
+
"transformers_version": "4.44.2",
|
211 |
+
"type_vocab_size": 1,
|
212 |
+
"use_cache": true,
|
213 |
+
"vocab_size": 50262
|
214 |
+
}
|
215 |
+
|
216 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,902 >> loading file vocab.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/vocab.json
|
217 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,902 >> loading file merges.txt from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/merges.txt
|
218 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,902 >> loading file tokenizer.json from cache at None
|
219 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,903 >> loading file added_tokens.json from cache at None
|
220 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,903 >> loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/special_tokens_map.json
|
221 |
+
[INFO|tokenization_utils_base.py:2269] 2024-09-04 18:27:21,903 >> loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/tokenizer_config.json
|
222 |
+
[INFO|configuration_utils.py:733] 2024-09-04 18:27:21,903 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
223 |
+
[INFO|configuration_utils.py:800] 2024-09-04 18:27:21,904 >> Model config RobertaConfig {
|
224 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
225 |
+
"architectures": [
|
226 |
+
"RobertaForMaskedLM"
|
227 |
+
],
|
228 |
+
"attention_probs_dropout_prob": 0.1,
|
229 |
+
"bos_token_id": 0,
|
230 |
+
"classifier_dropout": null,
|
231 |
+
"eos_token_id": 2,
|
232 |
+
"gradient_checkpointing": false,
|
233 |
+
"hidden_act": "gelu",
|
234 |
+
"hidden_dropout_prob": 0.1,
|
235 |
+
"hidden_size": 768,
|
236 |
+
"initializer_range": 0.02,
|
237 |
+
"intermediate_size": 3072,
|
238 |
+
"layer_norm_eps": 1e-05,
|
239 |
+
"max_position_embeddings": 514,
|
240 |
+
"model_type": "roberta",
|
241 |
+
"num_attention_heads": 12,
|
242 |
+
"num_hidden_layers": 12,
|
243 |
+
"pad_token_id": 1,
|
244 |
+
"position_embedding_type": "absolute",
|
245 |
+
"transformers_version": "4.44.2",
|
246 |
+
"type_vocab_size": 1,
|
247 |
+
"use_cache": true,
|
248 |
+
"vocab_size": 50262
|
249 |
+
}
|
250 |
+
|
251 |
+
/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884
|
252 |
+
warnings.warn(
|
253 |
+
[INFO|configuration_utils.py:733] 2024-09-04 18:27:21,979 >> loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/config.json
|
254 |
+
[INFO|configuration_utils.py:800] 2024-09-04 18:27:21,981 >> Model config RobertaConfig {
|
255 |
+
"_name_or_path": "PlanTL-GOB-ES/bsc-bio-ehr-es",
|
256 |
+
"architectures": [
|
257 |
+
"RobertaForMaskedLM"
|
258 |
+
],
|
259 |
+
"attention_probs_dropout_prob": 0.1,
|
260 |
+
"bos_token_id": 0,
|
261 |
+
"classifier_dropout": null,
|
262 |
+
"eos_token_id": 2,
|
263 |
+
"gradient_checkpointing": false,
|
264 |
+
"hidden_act": "gelu",
|
265 |
+
"hidden_dropout_prob": 0.1,
|
266 |
+
"hidden_size": 768,
|
267 |
+
"initializer_range": 0.02,
|
268 |
+
"intermediate_size": 3072,
|
269 |
+
"layer_norm_eps": 1e-05,
|
270 |
+
"max_position_embeddings": 514,
|
271 |
+
"model_type": "roberta",
|
272 |
+
"num_attention_heads": 12,
|
273 |
+
"num_hidden_layers": 12,
|
274 |
+
"pad_token_id": 1,
|
275 |
+
"position_embedding_type": "absolute",
|
276 |
+
"transformers_version": "4.44.2",
|
277 |
+
"type_vocab_size": 1,
|
278 |
+
"use_cache": true,
|
279 |
+
"vocab_size": 50262
|
280 |
+
}
|
281 |
+
|
282 |
+
[INFO|modeling_utils.py:3678] 2024-09-04 18:27:26,079 >> loading weights file pytorch_model.bin from cache at /root/.cache/huggingface/hub/models--PlanTL-GOB-ES--bsc-bio-ehr-es/snapshots/1e543adb2d21f19d85a89305eebdbd64ab656b99/pytorch_model.bin
|
283 |
+
[INFO|modeling_utils.py:4497] 2024-09-04 18:27:26,219 >> Some weights of the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es were not used when initializing RobertaForTokenClassification: ['lm_head.bias', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.layer_norm.weight']
|
284 |
+
- This IS expected if you are initializing RobertaForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
|
285 |
+
- This IS NOT expected if you are initializing RobertaForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
|
286 |
+
[WARNING|modeling_utils.py:4509] 2024-09-04 18:27:26,219 >> Some weights of RobertaForTokenClassification were not initialized from the model checkpoint at PlanTL-GOB-ES/bsc-bio-ehr-es and are newly initialized: ['classifier.bias', 'classifier.weight']
|
287 |
+
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
|
288 |
+
|
289 |
+
|
290 |
+
|
291 |
+
/content/dissertation/scripts/ner/run_ner_train.py:397: FutureWarning: load_metric is deprecated and will be removed in the next major version of datasets. Use 'evaluate.load' instead, from the new library π€ Evaluate: https://huggingface.co/docs/evaluate
|
292 |
+
metric = load_metric("seqeval", trust_remote_code=True)
|
293 |
+
|
294 |
+
[INFO|trainer.py:811] 2024-09-04 18:27:34,644 >> The following columns in the training set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
295 |
+
[INFO|trainer.py:2134] 2024-09-04 18:27:35,302 >> ***** Running training *****
|
296 |
+
[INFO|trainer.py:2135] 2024-09-04 18:27:35,302 >> Num examples = 15,848
|
297 |
+
[INFO|trainer.py:2136] 2024-09-04 18:27:35,302 >> Num Epochs = 10
|
298 |
+
[INFO|trainer.py:2137] 2024-09-04 18:27:35,302 >> Instantaneous batch size per device = 32
|
299 |
+
[INFO|trainer.py:2140] 2024-09-04 18:27:35,302 >> Total train batch size (w. parallel, distributed & accumulation) = 64
|
300 |
+
[INFO|trainer.py:2141] 2024-09-04 18:27:35,302 >> Gradient Accumulation steps = 2
|
301 |
+
[INFO|trainer.py:2142] 2024-09-04 18:27:35,302 >> Total optimization steps = 2,480
|
302 |
+
[INFO|trainer.py:2143] 2024-09-04 18:27:35,303 >> Number of trainable parameters = 124,055,043
|
303 |
+
|
304 |
0%| | 0/2480 [00:00<?, ?it/s]
|
305 |
0%| | 1/2480 [00:01<1:07:07, 1.62s/it]
|
306 |
0%| | 2/2480 [00:01<33:03, 1.25it/s]
|
307 |
0%| | 3/2480 [00:02<24:43, 1.67it/s]
|
308 |
0%| | 4/2480 [00:02<19:24, 2.13it/s]
|
309 |
0%| | 5/2480 [00:02<15:23, 2.68it/s]
|
310 |
0%| | 6/2480 [00:02<13:36, 3.03it/s]
|
311 |
0%| | 7/2480 [00:03<11:58, 3.44it/s]
|
312 |
0%| | 8/2480 [00:03<10:57, 3.76it/s]
|
313 |
0%| | 9/2480 [00:03<10:29, 3.93it/s]
|
314 |
0%| | 10/2480 [00:03<11:55, 3.45it/s]
|
315 |
0%| | 11/2480 [00:04<12:31, 3.28it/s]
|
316 |
0%| | 12/2480 [00:04<11:38, 3.53it/s]
|
317 |
1%| | 13/2480 [00:04<11:55, 3.45it/s]
|
318 |
1%| | 14/2480 [00:05<10:50, 3.79it/s]
|
319 |
1%| | 15/2480 [00:05<10:23, 3.95it/s]
|
320 |
1%| | 16/2480 [00:05<10:14, 4.01it/s]
|
321 |
1%| | 17/2480 [00:05<10:07, 4.05it/s]
|
322 |
1%| | 18/2480 [00:05<09:43, 4.22it/s]
|
323 |
1%| | 19/2480 [00:06<10:53, 3.77it/s]
|
324 |
1%| | 20/2480 [00:06<10:34, 3.88it/s]
|
325 |
1%| | 21/2480 [00:06<10:42, 3.83it/s]
|
326 |
1%| | 22/2480 [00:06<10:01, 4.09it/s]
|
327 |
1%| | 23/2480 [00:07<09:20, 4.39it/s]
|
328 |
1%| | 24/2480 [00:07<08:53, 4.60it/s]
|
329 |
1%| | 25/2480 [00:07<08:51, 4.62it/s]
|
330 |
1%| | 26/2480 [00:07<08:54, 4.59it/s]
|
331 |
1%| | 27/2480 [00:08<09:03, 4.51it/s]
|
332 |
1%| | 28/2480 [00:08<09:00, 4.54it/s]
|
333 |
1%| | 29/2480 [00:08<09:09, 4.46it/s]
|
334 |
1%| | 30/2480 [00:08<09:54, 4.12it/s]
|
335 |
1%|β | 31/2480 [00:08<09:15, 4.41it/s]
|
336 |
1%|β | 32/2480 [00:09<09:01, 4.52it/s]
|
337 |
1%|β | 33/2480 [00:09<09:02, 4.51it/s]
|
338 |
1%|β | 34/2480 [00:09<09:19, 4.37it/s]
|
339 |
1%|β | 35/2480 [00:09<09:42, 4.20it/s]
|
340 |
1%|β | 36/2480 [00:10<09:13, 4.42it/s]
|
341 |
1%|β | 37/2480 [00:10<12:51, 3.16it/s]
|
342 |
2%|β | 38/2480 [00:10<11:34, 3.52it/s]
|
343 |
2%|β | 39/2480 [00:11<11:25, 3.56it/s]
|
344 |
2%|β | 40/2480 [00:11<10:18, 3.95it/s]
|
345 |
2%|β | 41/2480 [00:11<09:34, 4.25it/s]
|
346 |
2%|β | 42/2480 [00:11<09:16, 4.38it/s]
|
347 |
2%|β | 43/2480 [00:11<08:59, 4.52it/s]
|
348 |
2%|β | 44/2480 [00:12<10:12, 3.98it/s]
|
349 |
2%|β | 45/2480 [00:12<09:52, 4.11it/s]
|
350 |
2%|β | 46/2480 [00:12<11:42, 3.46it/s]
|
351 |
2%|β | 47/2480 [00:13<10:40, 3.80it/s]
|
352 |
2%|β | 48/2480 [00:13<10:11, 3.98it/s]
|
353 |
2%|β | 49/2480 [00:13<11:17, 3.59it/s]
|
354 |
2%|β | 50/2480 [00:13<11:50, 3.42it/s]
|
355 |
2%|β | 51/2480 [00:14<11:00, 3.68it/s]
|
356 |
2%|β | 52/2480 [00:14<10:50, 3.73it/s]
|
357 |
2%|β | 53/2480 [00:14<10:01, 4.04it/s]
|
358 |
2%|β | 54/2480 [00:15<12:09, 3.33it/s]
|
359 |
2%|β | 55/2480 [00:15<12:02, 3.36it/s]
|
360 |
2%|β | 56/2480 [00:15<10:46, 3.75it/s]
|
361 |
2%|β | 57/2480 [00:15<10:55, 3.70it/s]
|
362 |
2%|β | 58/2480 [00:16<10:10, 3.97it/s]
|
363 |
2%|β | 59/2480 [00:16<09:52, 4.09it/s]
|
364 |
2%|β | 60/2480 [00:16<09:39, 4.17it/s]
|
365 |
2%|β | 61/2480 [00:16<08:59, 4.48it/s]
|
366 |
2%|β | 62/2480 [00:16<08:44, 4.61it/s]
|
367 |
3%|β | 63/2480 [00:17<08:49, 4.56it/s]
|
368 |
3%|β | 64/2480 [00:17<10:09, 3.97it/s]
|
369 |
3%|β | 65/2480 [00:17<09:53, 4.07it/s]
|
370 |
3%|β | 66/2480 [00:17<10:26, 3.85it/s]
|
371 |
3%|β | 67/2480 [00:18<10:40, 3.77it/s]
|
372 |
3%|β | 68/2480 [00:18<10:02, 4.00it/s]
|
373 |
3%|β | 69/2480 [00:18<09:33, 4.21it/s]
|
374 |
3%|β | 70/2480 [00:18<09:27, 4.25it/s]
|
375 |
3%|β | 71/2480 [00:19<09:44, 4.12it/s]
|
376 |
3%|β | 72/2480 [00:19<10:20, 3.88it/s]
|
377 |
3%|β | 73/2480 [00:19<09:47, 4.10it/s]
|
378 |
3%|β | 74/2480 [00:20<11:34, 3.47it/s]
|
379 |
3%|β | 75/2480 [00:20<11:25, 3.51it/s]
|
380 |
3%|β | 76/2480 [00:20<11:33, 3.47it/s]
|
381 |
3%|β | 77/2480 [00:20<10:47, 3.71it/s]
|
382 |
3%|β | 78/2480 [00:21<10:10, 3.93it/s]
|
383 |
3%|β | 79/2480 [00:21<09:52, 4.05it/s]
|
384 |
3%|β | 80/2480 [00:21<09:47, 4.09it/s]
|
385 |
3%|β | 81/2480 [00:21<11:08, 3.59it/s]
|
386 |
3%|β | 82/2480 [00:22<10:41, 3.74it/s]
|
387 |
3%|β | 83/2480 [00:22<09:51, 4.05it/s]
|
388 |
3%|β | 84/2480 [00:22<10:24, 3.83it/s]
|
389 |
3%|β | 85/2480 [00:22<10:22, 3.85it/s]
|
390 |
3%|β | 86/2480 [00:23<09:52, 4.04it/s]
|
391 |
4%|β | 87/2480 [00:23<10:24, 3.83it/s]
|
392 |
4%|β | 88/2480 [00:23<09:52, 4.04it/s]
|
393 |
4%|β | 89/2480 [00:23<10:02, 3.97it/s]
|
394 |
4%|β | 90/2480 [00:24<11:55, 3.34it/s]
|
395 |
4%|β | 91/2480 [00:24<11:08, 3.57it/s]
|
396 |
4%|β | 92/2480 [00:24<11:00, 3.62it/s]
|
397 |
4%|β | 93/2480 [00:25<10:41, 3.72it/s]
|
398 |
4%|β | 94/2480 [00:25<10:17, 3.87it/s]
|
399 |
4%|β | 95/2480 [00:25<10:17, 3.86it/s]
|
400 |
4%|β | 96/2480 [00:25<09:22, 4.24it/s]
|
401 |
4%|β | 97/2480 [00:25<09:48, 4.05it/s]
|
402 |
4%|β | 98/2480 [00:26<08:58, 4.43it/s]
|
403 |
4%|β | 99/2480 [00:26<10:07, 3.92it/s]
|
404 |
4%|β | 100/2480 [00:26<09:27, 4.19it/s]
|
405 |
4%|β | 101/2480 [00:26<09:08, 4.33it/s]
|
406 |
4%|β | 102/2480 [00:27<09:17, 4.27it/s]
|
407 |
4%|β | 103/2480 [00:27<08:58, 4.41it/s]
|
408 |
4%|β | 104/2480 [00:27<08:55, 4.44it/s]
|
409 |
4%|β | 105/2480 [00:27<09:26, 4.19it/s]
|
410 |
4%|β | 106/2480 [00:28<09:53, 4.00it/s]
|
411 |
4%|β | 107/2480 [00:28<09:29, 4.17it/s]
|
412 |
4%|β | 108/2480 [00:28<09:57, 3.97it/s]
|
413 |
4%|β | 109/2480 [00:28<09:08, 4.32it/s]
|
414 |
4%|β | 110/2480 [00:29<09:11, 4.30it/s]
|
415 |
4%|β | 111/2480 [00:29<09:05, 4.34it/s]
|
416 |
5%|β | 112/2480 [00:29<08:44, 4.51it/s]
|
417 |
5%|β | 113/2480 [00:29<08:33, 4.61it/s]
|
418 |
5%|β | 114/2480 [00:29<08:39, 4.55it/s]
|
419 |
5%|β | 115/2480 [00:30<08:11, 4.81it/s]
|
420 |
5%|β | 116/2480 [00:30<08:29, 4.64it/s]
|
421 |
5%|β | 117/2480 [00:30<08:45, 4.50it/s]
|
422 |
5%|β | 118/2480 [00:30<10:20, 3.81it/s]
|
423 |
5%|β | 119/2480 [00:31<10:47, 3.65it/s]
|
424 |
5%|β | 120/2480 [00:31<10:03, 3.91it/s]
|
425 |
5%|β | 121/2480 [00:31<11:11, 3.51it/s]
|
426 |
5%|β | 122/2480 [00:31<10:07, 3.88it/s]
|
427 |
5%|β | 123/2480 [00:32<09:46, 4.02it/s]
|
428 |
5%|β | 124/2480 [00:32<09:41, 4.05it/s]
|
429 |
5%|β | 125/2480 [00:32<09:50, 3.99it/s]
|
430 |
5%|β | 126/2480 [00:32<10:29, 3.74it/s]
|
431 |
5%|β | 127/2480 [00:33<11:12, 3.50it/s]
|
432 |
5%|β | 128/2480 [00:33<11:23, 3.44it/s]
|
433 |
5%|β | 129/2480 [00:33<11:44, 3.34it/s]
|
434 |
5%|β | 130/2480 [00:34<10:48, 3.62it/s]
|
435 |
5%|β | 131/2480 [00:34<10:45, 3.64it/s]
|
436 |
5%|β | 132/2480 [00:34<12:09, 3.22it/s]
|
437 |
5%|β | 133/2480 [00:35<11:14, 3.48it/s]
|
438 |
5%|β | 134/2480 [00:35<10:11, 3.83it/s]
|
439 |
5%|β | 135/2480 [00:35<09:29, 4.12it/s]
|
440 |
5%|β | 136/2480 [00:35<08:55, 4.38it/s]
|
441 |
6%|β | 137/2480 [00:35<08:53, 4.39it/s]
|
442 |
6%|β | 138/2480 [00:36<09:30, 4.10it/s]
|
443 |
6%|β | 139/2480 [00:36<09:22, 4.16it/s]
|
444 |
6%|β | 140/2480 [00:36<09:46, 3.99it/s]
|
445 |
6%|β | 141/2480 [00:36<10:17, 3.79it/s]
|
446 |
6%|β | 142/2480 [00:37<09:19, 4.18it/s]
|
447 |
6%|β | 143/2480 [00:37<09:22, 4.15it/s]
|
448 |
6%|β | 144/2480 [00:37<09:46, 3.98it/s]
|
449 |
6%|β | 145/2480 [00:37<09:36, 4.05it/s]
|
450 |
6%|β | 146/2480 [00:38<09:16, 4.19it/s]
|
451 |
6%|β | 147/2480 [00:38<08:56, 4.35it/s]
|
452 |
6%|β | 148/2480 [00:38<09:01, 4.31it/s]
|
453 |
6%|β | 149/2480 [00:38<08:46, 4.43it/s]
|
454 |
6%|β | 150/2480 [00:39<09:03, 4.29it/s]
|
455 |
6%|β | 151/2480 [00:39<08:38, 4.50it/s]
|
456 |
6%|β | 152/2480 [00:39<08:57, 4.33it/s]
|
457 |
6%|β | 153/2480 [00:39<09:12, 4.21it/s]
|
458 |
6%|β | 154/2480 [00:39<09:25, 4.11it/s]
|
459 |
6%|β | 155/2480 [00:40<09:55, 3.91it/s]
|
460 |
6%|β | 156/2480 [00:40<09:35, 4.04it/s]
|
461 |
6%|β | 157/2480 [00:40<09:05, 4.26it/s]
|
462 |
6%|β | 158/2480 [00:40<08:57, 4.32it/s]
|
463 |
6%|β | 159/2480 [00:41<09:42, 3.98it/s]
|
464 |
6%|β | 160/2480 [00:41<09:12, 4.20it/s]
|
465 |
6%|β | 161/2480 [00:41<08:26, 4.58it/s]
|
466 |
7%|β | 162/2480 [00:41<08:04, 4.78it/s]
|
467 |
7%|β | 163/2480 [00:42<08:28, 4.56it/s]
|
468 |
7%|β | 164/2480 [00:42<09:54, 3.90it/s]
|
469 |
7%|β | 165/2480 [00:42<10:08, 3.81it/s]
|
470 |
7%|β | 166/2480 [00:42<09:49, 3.93it/s]
|
471 |
7%|β | 167/2480 [00:43<09:09, 4.21it/s]
|
472 |
7%|β | 168/2480 [00:43<09:38, 4.00it/s]
|
473 |
7%|β | 169/2480 [00:43<09:07, 4.22it/s]
|
474 |
7%|β | 170/2480 [00:43<09:17, 4.14it/s]
|
475 |
7%|β | 171/2480 [00:44<09:21, 4.11it/s]
|
476 |
7%|β | 172/2480 [00:44<09:06, 4.22it/s]
|
477 |
7%|β | 173/2480 [00:44<08:49, 4.36it/s]
|
478 |
7%|β | 174/2480 [00:44<09:14, 4.16it/s]
|
479 |
7%|β | 175/2480 [00:45<09:11, 4.18it/s]
|
480 |
7%|β | 176/2480 [00:45<08:46, 4.38it/s]
|
481 |
7%|β | 177/2480 [00:45<08:29, 4.52it/s]
|
482 |
7%|β | 178/2480 [00:45<08:04, 4.76it/s]
|
483 |
7%|β | 179/2480 [00:45<07:58, 4.81it/s]
|
484 |
7%|β | 180/2480 [00:46<08:13, 4.66it/s]
|
485 |
7%|β | 181/2480 [00:46<08:21, 4.58it/s]
|
486 |
7%|β | 182/2480 [00:46<08:41, 4.41it/s]
|
487 |
7%|β | 183/2480 [00:46<08:39, 4.42it/s]
|
488 |
7%|β | 184/2480 [00:46<08:42, 4.39it/s]
|
489 |
7%|β | 185/2480 [00:47<08:31, 4.48it/s]
|
490 |
8%|β | 186/2480 [00:47<08:22, 4.56it/s]
|
491 |
8%|β | 187/2480 [00:47<08:10, 4.68it/s]
|
492 |
8%|β | 188/2480 [00:47<08:16, 4.61it/s]
|
493 |
8%|β | 189/2480 [00:48<08:21, 4.57it/s]
|
494 |
8%|β | 190/2480 [00:48<08:52, 4.30it/s]
|
495 |
8%|β | 191/2480 [00:48<08:34, 4.45it/s]
|
496 |
8%|β | 192/2480 [00:48<08:58, 4.25it/s]
|
497 |
8%|β | 193/2480 [00:48<08:32, 4.47it/s]
|
498 |
8%|β | 194/2480 [00:49<09:04, 4.20it/s]
|
499 |
8%|β | 195/2480 [00:49<09:47, 3.89it/s]
|
500 |
8%|β | 196/2480 [00:49<09:36, 3.96it/s]
|
501 |
8%|β | 197/2480 [00:50<11:04, 3.44it/s]
|
502 |
8%|β | 198/2480 [00:50<10:01, 3.80it/s]
|
503 |
8%|β | 199/2480 [00:50<09:13, 4.12it/s]
|
504 |
8%|β | 200/2480 [00:50<08:43, 4.35it/s]
|
505 |
8%|β | 201/2480 [00:50<08:30, 4.47it/s]
|
506 |
8%|β | 202/2480 [00:51<08:31, 4.46it/s]
|
507 |
8%|β | 203/2480 [00:51<10:17, 3.69it/s]
|
508 |
8%|β | 204/2480 [00:51<10:29, 3.62it/s]
|
509 |
8%|β | 205/2480 [00:52<09:39, 3.93it/s]
|
510 |
8%|β | 206/2480 [00:52<09:18, 4.07it/s]
|
511 |
8%|β | 207/2480 [00:52<09:36, 3.94it/s]
|
512 |
8%|β | 208/2480 [00:52<10:23, 3.65it/s]
|
513 |
8%|β | 209/2480 [00:53<10:22, 3.65it/s]
|
514 |
8%|β | 210/2480 [00:53<09:54, 3.82it/s]
|
515 |
9%|β | 211/2480 [00:53<09:31, 3.97it/s]
|
516 |
9%|β | 212/2480 [00:53<09:37, 3.93it/s]
|
517 |
9%|β | 213/2480 [00:54<09:25, 4.01it/s]
|
518 |
9%|β | 214/2480 [00:54<09:16, 4.07it/s]
|
519 |
9%|β | 215/2480 [00:54<09:42, 3.89it/s]
|
520 |
9%|β | 216/2480 [00:54<09:37, 3.92it/s]
|
521 |
9%|β | 217/2480 [00:55<09:27, 3.99it/s]
|
522 |
9%|β | 218/2480 [00:55<09:32, 3.95it/s]
|
523 |
9%|β | 219/2480 [00:55<08:53, 4.24it/s]
|
524 |
9%|β | 220/2480 [00:55<10:10, 3.70it/s]
|
525 |
9%|β | 221/2480 [00:56<09:48, 3.84it/s]
|
526 |
9%|β | 222/2480 [00:56<08:54, 4.23it/s]
|
527 |
9%|β | 223/2480 [00:56<09:11, 4.09it/s]
|
528 |
9%|β | 224/2480 [00:56<09:01, 4.17it/s]
|
529 |
9%|β | 225/2480 [00:57<09:45, 3.85it/s]
|
530 |
9%|β | 226/2480 [00:57<09:24, 4.00it/s]
|
531 |
9%|β | 227/2480 [00:57<09:26, 3.98it/s]
|
532 |
9%|β | 228/2480 [00:57<08:58, 4.19it/s]
|
533 |
9%|β | 229/2480 [00:58<08:33, 4.39it/s]
|
534 |
9%|β | 230/2480 [00:58<08:21, 4.48it/s]
|
535 |
9%|β | 231/2480 [00:58<08:32, 4.39it/s]
|
536 |
9%|β | 232/2480 [00:58<09:05, 4.12it/s]
|
537 |
9%|β | 233/2480 [00:59<08:56, 4.19it/s]
|
538 |
9%|β | 234/2480 [00:59<09:25, 3.97it/s]
|
539 |
9%|β | 235/2480 [00:59<09:12, 4.07it/s]
|
540 |
10%|β | 236/2480 [00:59<10:24, 3.60it/s]
|
541 |
10%|β | 237/2480 [01:00<09:34, 3.90it/s]
|
542 |
10%|β | 238/2480 [01:00<09:06, 4.11it/s]
|
543 |
10%|β | 239/2480 [01:00<08:58, 4.16it/s]
|
544 |
10%|β | 240/2480 [01:00<08:21, 4.47it/s]
|
545 |
10%|β | 241/2480 [01:00<07:53, 4.73it/s]
|
546 |
10%|β | 242/2480 [01:01<08:08, 4.58it/s]
|
547 |
10%|β | 243/2480 [01:01<08:34, 4.35it/s]
|
548 |
10%|β | 244/2480 [01:01<09:04, 4.10it/s]
|
549 |
10%|β | 245/2480 [01:01<09:14, 4.03it/s]
|
550 |
10%|β | 246/2480 [01:02<08:50, 4.21it/s]
|
551 |
10%|β | 247/2480 [01:02<08:32, 4.36it/s]
|
552 |
10%|β | 248/2480 [01:02<07:59, 4.65it/s][INFO|trainer.py:811] 2024-09-04 18:28:37,832 >> The following columns in the evaluation set don't have a corresponding argument in `RobertaForTokenClassification.forward` and have been ignored: ner_tags, id, tokens. If ner_tags, id, tokens are not expected by `RobertaForTokenClassification.forward`, you can safely ignore this message.
|
553 |
+
[INFO|trainer.py:3819] 2024-09-04 18:28:37,835 >>
|
554 |
+
***** Running Evaluation *****
|
555 |
+
[INFO|trainer.py:3821] 2024-09-04 18:28:37,835 >> Num examples = 2519
|
556 |
+
[INFO|trainer.py:3824] 2024-09-04 18:28:37,835 >> Batch size = 8
|
557 |
+
|
558 |
+
|
559 |
0%| | 0/315 [00:00<?, ?it/s][A
|
560 |
+
|
561 |
3%|β | 9/315 [00:00<00:03, 87.62it/s][A
|
562 |
+
|
563 |
6%|β | 18/315 [00:00<00:03, 84.29it/s][A
|
564 |
+
|
565 |
9%|β | 27/315 [00:00<00:03, 82.00it/s][A
|
566 |
+
|
567 |
11%|ββ | 36/315 [00:00<00:03, 76.52it/s][A
|
568 |
+
|
569 |
14%|ββ | 45/315 [00:00<00:03, 78.83it/s][A
|
570 |
+
|
571 |
17%|ββ | 54/315 [00:00<00:03, 79.38it/s][A
|
572 |
+
|
573 |
20%|ββ | 63/315 [00:00<00:03, 78.44it/s][A
|
574 |
+
|
575 |
23%|βββ | 72/315 [00:00<00:03, 80.31it/s][A
|
576 |
+
|
577 |
26%|βββ | 81/315 [00:01<00:02, 79.22it/s][A
|
578 |
+
|
579 |
28%|βββ | 89/315 [00:01<00:02, 78.97it/s][A
|
580 |
+
|
581 |
31%|βββ | 97/315 [00:01<00:02, 78.20it/s][A
|
582 |
+
|
583 |
34%|ββββ | 106/315 [00:01<00:02, 80.06it/s][A
|
584 |
+
|
585 |
37%|ββββ | 115/315 [00:01<00:02, 81.29it/s][A
|
586 |
+
|
587 |
39%|ββββ | 124/315 [00:01<00:02, 78.38it/s][A
|
588 |
+
|
589 |
42%|βββββ | 132/315 [00:01<00:02, 78.59it/s][A
|
590 |
+
|
591 |
44%|βββββ | 140/315 [00:01<00:02, 77.86it/s][A
|
592 |
+
|
593 |
47%|βββββ | 148/315 [00:01<00:02, 75.27it/s][A
|
594 |
+
|
595 |
50%|βββββ | 156/315 [00:01<00:02, 73.46it/s][A
|
596 |
+
|
597 |
52%|ββββββ | 164/315 [00:02<00:02, 75.03it/s][A
|
598 |
+
|
599 |
55%|ββββββ | 173/315 [00:02<00:01, 76.99it/s][A
|
600 |
+
|
601 |
57%|ββββββ | 181/315 [00:02<00:01, 77.31it/s][A
|
602 |
+
|
603 |
60%|ββββββ | 190/315 [00:02<00:01, 78.87it/s][A
|
604 |
+
|
605 |
63%|βββββββ | 198/315 [00:02<00:01, 78.00it/s][A
|
606 |
+
|
607 |
65%|βββββββ | 206/315 [00:02<00:01, 78.04it/s][A
|
608 |
+
|
609 |
68%|βββββββ | 214/315 [00:02<00:01, 75.23it/s][A
|
610 |
+
|
611 |
70%|βββββββ | 222/315 [00:02<00:01, 73.53it/s][A
|
612 |
+
|
613 |
73%|ββββββββ | 230/315 [00:02<00:01, 72.57it/s][A
|
614 |
+
|
615 |
76%|ββββββββ | 239/315 [00:03<00:01, 74.60it/s][A
|
616 |
+
|
617 |
78%|ββββββββ | 247/315 [00:03<00:00, 74.88it/s][A
|
618 |
+
|
619 |
81%|βββββββββ | 256/315 [00:03<00:00, 76.66it/s][A
|
620 |
+
|
621 |
84%|βββββββββ | 265/315 [00:03<00:00, 78.61it/s][A
|
622 |
+
|
623 |
87%|βββββββββ | 274/315 [00:03<00:00, 81.06it/s][A
|
624 |
+
|
625 |
90%|βββββββββ | 283/315 [00:03<00:00, 82.28it/s][A
|
626 |
+
|
627 |
93%|ββββββββββ| 292/315 [00:03<00:00, 79.10it/s][A
|
628 |
+
|
629 |
96%|ββββββββββ| 301/315 [00:03<00:00, 80.22it/s][A
|
630 |
+
|
631 |
98%|ββββββββββ| 310/315 [00:03<00:00, 81.05it/s][A
|
632 |
|
633 |
+
|
634 |
|
635 |
10%|β | 248/2480 [01:08<07:59, 4.65it/s]
|
636 |
+
|
637 |
+
|
638 |
[A[INFO|trainer.py:3503] 2024-09-04 18:28:43,350 >> Saving model checkpoint to /content/dissertation/scripts/ner/output/checkpoint-248
|
639 |
+
[INFO|configuration_utils.py:472] 2024-09-04 18:28:43,351 >> Configuration saved in /content/dissertation/scripts/ner/output/checkpoint-248/config.json
|
640 |
+
[INFO|modeling_utils.py:2799] 2024-09-04 18:28:44,360 >> Model weights saved in /content/dissertation/scripts/ner/output/checkpoint-248/model.safetensors
|
641 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-04 18:28:44,361 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/checkpoint-248/tokenizer_config.json
|
642 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-04 18:28:44,362 >> Special tokens file saved in /content/dissertation/scripts/ner/output/checkpoint-248/special_tokens_map.json
|
643 |
+
[INFO|tokenization_utils_base.py:2684] 2024-09-04 18:28:46,425 >> tokenizer config file saved in /content/dissertation/scripts/ner/output/tokenizer_config.json
|
644 |
+
[INFO|tokenization_utils_base.py:2693] 2024-09-04 18:28:46,425 >> Special tokens file saved in /content/dissertation/scripts/ner/output/special_tokens_map.json
|
645 |
+
|
646 |
10%|β | 249/2480 [01:11<1:44:55, 2.82s/it]
|
647 |
10%|β | 250/2480 [01:11<1:15:54, 2.04s/it]
|
648 |
10%|β | 251/2480 [01:11<55:54, 1.51s/it]
|
649 |
10%|β | 252/2480 [01:12<41:42, 1.12s/it]
|
650 |
10%|β | 253/2480 [01:12<31:32, 1.18it/s]
|
651 |
10%|β | 254/2480 [01:12<24:51, 1.49it/s]
|
652 |
10%|β | 255/2480 [01:12<21:00, 1.77it/s]
|
653 |
10%|β | 256/2480 [01:13<17:50, 2.08it/s]
|
654 |
10%|β | 257/2480 [01:13<15:06, 2.45it/s]
|
655 |
10%|β | 258/2480 [01:13<12:55, 2.87it/s]
|
656 |
10%|β | 259/2480 [01:13<11:19, 3.27it/s]
|
657 |
10%|β | 260/2480 [01:14<10:42, 3.45it/s]
|
658 |
11%|β | 261/2480 [01:14<10:07, 3.65it/s]
|
659 |
11%|β | 262/2480 [01:14<09:25, 3.92it/s]
|
660 |
11%|β | 263/2480 [01:14<09:55, 3.72it/s]
|
661 |
11%|β | 264/2480 [01:15<10:10, 3.63it/s]
|
662 |
11%|β | 265/2480 [01:15<10:24, 3.55it/s]
|
663 |
11%|β | 266/2480 [01:15<09:56, 3.71it/s]
|
664 |
11%|β | 267/2480 [01:15<09:49, 3.75it/s]
|
665 |
11%|β | 268/2480 [01:16<09:37, 3.83it/s]
|
666 |
11%|β | 269/2480 [01:16<09:43, 3.79it/s]
|
667 |
11%|β | 270/2480 [01:16<09:21, 3.94it/s]
|
668 |
11%|β | 271/2480 [01:16<09:51, 3.74it/s]
|
669 |
11%|β | 272/2480 [01:17<09:45, 3.77it/s]
|
670 |
11%|β | 273/2480 [01:17<09:37, 3.82it/s]
|
671 |
11%|β | 274/2480 [01:17<09:15, 3.97it/s]
|
672 |
11%|β | 275/2480 [01:18<09:48, 3.75it/s]
|
673 |
11%|β | 276/2480 [01:18<09:30, 3.86it/s]
|
674 |
11%|β | 277/2480 [01:18<08:50, 4.15it/s]
|
675 |
11%|β | 278/2480 [01:18<08:05, 4.53it/s]
|
676 |
11%|ββ | 279/2480 [01:18<09:13, 3.98it/s]
|
677 |
11%|ββ | 280/2480 [01:19<08:55, 4.11it/s]
|
678 |
11%|ββ | 281/2480 [01:19<08:30, 4.31it/s]
|
679 |
11%|ββ | 282/2480 [01:19<08:31, 4.29it/s]
|
680 |
11%|ββ | 283/2480 [01:19<08:42, 4.21it/s]
|
681 |
11%|ββ | 284/2480 [01:20<08:57, 4.09it/s]
|
682 |
11%|ββ | 285/2480 [01:20<08:46, 4.17it/s]
|
683 |
12%|ββ | 286/2480 [01:20<08:15, 4.43it/s]
|
684 |
12%|ββ | 287/2480 [01:20<07:45, 4.71it/s]
|
685 |
12%|ββ | 288/2480 [01:21<08:39, 4.22it/s]
|
686 |
12%|ββ | 289/2480 [01:21<09:57, 3.67it/s]
|
687 |
12%|ββ | 290/2480 [01:21<09:34, 3.81it/s]
|
688 |
12%|ββ | 291/2480 [01:21<09:32, 3.82it/s]
|
689 |
12%|ββ | 292/2480 [01:22<09:35, 3.80it/s]
|
690 |
12%|ββ | 293/2480 [01:22<09:23, 3.88it/s]
|
691 |
12%|ββ | 294/2480 [01:22<08:56, 4.08it/s]
|
692 |
12%|ββ | 295/2480 [01:22<09:27, 3.85it/s]
|
693 |
12%|ββ | 296/2480 [01:23<08:49, 4.12it/s]
|
694 |
12%|ββ | 297/2480 [01:23<08:55, 4.08it/s]
|
695 |
12%|ββ | 298/2480 [01:23<08:33, 4.25it/s]
|
696 |
12%|ββ | 299/2480 [01:23<09:39, 3.76it/s]
|
697 |
12%|ββ | 300/2480 [01:24<09:25, 3.86it/s]
|
698 |
12%|ββ | 301/2480 [01:24<08:34, 4.23it/s]
|
699 |
12%|ββ | 302/2480 [01:24<07:59, 4.54it/s]
|
700 |
12%|ββ | 303/2480 [01:25<11:53, 3.05it/s]
|
701 |
12%|ββ | 304/2480 [01:25<11:51, 3.06it/s]
|
702 |
12%|ββ | 305/2480 [01:25<10:15, 3.54it/s]
|
703 |
12%|ββ | 306/2480 [01:25<09:49, 3.69it/s]
|
704 |
12%|ββ | 307/2480 [01:26<09:18, 3.89it/s]
|
705 |
12%|ββ | 308/2480 [01:26<09:14, 3.92it/s]
|
706 |
12%|ββ | 309/2480 [01:26<08:48, 4.11it/s]
|
707 |
12%|ββ | 310/2480 [01:26<08:11, 4.42it/s]
|
708 |
13%|ββ | 311/2480 [01:26<07:49, 4.62it/s]
|
709 |
13%|ββ | 312/2480 [01:27<07:32, 4.79it/s]
|
710 |
13%|ββ | 313/2480 [01:27<07:56, 4.55it/s]
|
711 |
13%|ββ | 314/2480 [01:27<07:45, 4.65it/s]
|
712 |
13%|ββ | 315/2480 [01:27<09:12, 3.92it/s]
|
713 |
13%|ββ | 316/2480 [01:28<09:02, 3.99it/s]
|
714 |
13%|ββ | 317/2480 [01:28<08:47, 4.10it/s]
|
715 |
13%|ββ | 318/2480 [01:28<09:01, 3.99it/s]
|
716 |
13%|ββ | 319/2480 [01:28<08:19, 4.32it/s]
|
717 |
13%|ββ | 320/2480 [01:29<08:30, 4.23it/s]
|
718 |
13%|ββ | 321/2480 [01:29<08:24, 4.28it/s]
|
719 |
13%|ββ | 322/2480 [01:29<09:15, 3.89it/s]
|
720 |
13%|ββ | 323/2480 [01:29<09:26, 3.81it/s]
|
721 |
13%|ββ | 324/2480 [01:30<10:55, 3.29it/s]
|
722 |
13%|ββ | 325/2480 [01:30<09:29, 3.78it/s]
|
723 |
13%|ββ | 326/2480 [01:30<11:22, 3.16it/s]
|
724 |
13%|ββ | 327/2480 [01:31<10:10, 3.52it/s]
|
725 |
13%|ββ | 328/2480 [01:31<09:23, 3.82it/s]
|
726 |
13%|ββ | 329/2480 [01:31<09:28, 3.79it/s]
|
727 |
13%|ββ | 330/2480 [01:31<09:19, 3.84it/s]
|
728 |
13%|ββ | 331/2480 [01:32<08:50, 4.05it/s]
|
729 |
13%|ββ | 332/2480 [01:32<08:49, 4.05it/s]
|
730 |
13%|ββ | 333/2480 [01:32<09:10, 3.90it/s]
|
731 |
13%|ββ | 334/2480 [01:32<08:54, 4.02it/s]
|
732 |
14%|ββ | 335/2480 [01:33<09:15, 3.86it/s]
|
733 |
14%|ββ | 336/2480 [01:33<08:58, 3.98it/s]
|
734 |
14%|ββ | 337/2480 [01:33<08:27, 4.22it/s]
|
735 |
14%|ββ | 338/2480 [01:33<09:11, 3.89it/s]
|
736 |
14%|ββ | 339/2480 [01:34<08:56, 3.99it/s]
|
737 |
14%|ββ | 340/2480 [01:34<08:26, 4.22it/s]
|
738 |
14%|ββ | 341/2480 [01:34<08:03, 4.42it/s]
|
739 |
14%|ββ | 342/2480 [01:34<08:36, 4.14it/s]
|
740 |
14%|ββ | 343/2480 [01:35<08:30, 4.18it/s]
|
741 |
14%|ββ | 344/2480 [01:35<09:19, 3.81it/s]
|
742 |
14%|ββ | 345/2480 [01:35<08:34, 4.15it/s]
|
743 |
14%|ββ | 346/2480 [01:35<08:08, 4.36it/s]
|
744 |
14%|ββ | 347/2480 [01:35<07:56, 4.48it/s]
|
745 |
14%|ββ | 348/2480 [01:36<07:23, 4.81it/s]
|
746 |
14%|ββ | 349/2480 [01:36<07:13, 4.91it/s]
|
747 |
14%|ββ | 350/2480 [01:36<07:20, 4.84it/s]
|
748 |
14%|ββ | 351/2480 [01:36<07:24, 4.78it/s]
|
749 |
14%|ββ | 352/2480 [01:36<08:01, 4.42it/s]
|
750 |
14%|ββ | 353/2480 [01:37<08:36, 4.11it/s]
|
751 |
14%|ββ | 354/2480 [01:37<08:28, 4.18it/s]
|
752 |
14%|ββ | 355/2480 [01:37<08:44, 4.05it/s]
|
753 |
14%|ββ | 356/2480 [01:37<07:58, 4.44it/s]
|
754 |
14%|ββ | 357/2480 [01:38<08:08, 4.35it/s]
|
755 |
14%|ββ | 358/2480 [01:38<07:42, 4.59it/s]
|
756 |
14%|ββ | 359/2480 [01:38<07:46, 4.55it/s]
|
757 |
15%|ββ | 360/2480 [01:38<07:13, 4.89it/s]
|
758 |
15%|ββ | 361/2480 [01:39<07:37, 4.63it/s]
|
759 |
15%|ββ | 362/2480 [01:39<08:03, 4.38it/s]
|
760 |
15%|ββ | 363/2480 [01:39<08:08, 4.34it/s]
|
761 |
15%|ββ | 364/2480 [01:39<08:10, 4.31it/s]
|
762 |
15%|ββ | 365/2480 [01:39<07:53, 4.46it/s]
|
763 |
15%|ββ | 366/2480 [01:40<08:12, 4.29it/s]
|
764 |
15%|ββ | 367/2480 [01:40<07:50, 4.49it/s]
|
765 |
15%|ββ | 368/2480 [01:40<09:52, 3.56it/s]
|
766 |
15%|ββ | 369/2480 [01:41<09:27, 3.72it/s]
|
767 |
15%|ββ | 370/2480 [01:41<09:19, 3.77it/s]
|
768 |
15%|ββ | 371/2480 [01:41<09:02, 3.89it/s]
|
769 |
15%|ββ | 372/2480 [01:41<08:39, 4.06it/s]
|
770 |
15%|ββ | 373/2480 [01:42<09:22, 3.75it/s]
|
771 |
15%|ββ | 374/2480 [01:42<08:36, 4.08it/s]
|
772 |
15%|ββ | 375/2480 [01:42<09:23, 3.74it/s]
|
773 |
15%|ββ | 376/2480 [01:42<08:49, 3.97it/s]
|
774 |
15%|ββ | 377/2480 [01:43<08:18, 4.22it/s]
|
775 |
15%|ββ | 378/2480 [01:43<07:47, 4.49it/s]
|
776 |
15%|ββ | 379/2480 [01:43<08:04, 4.34it/s]
|
777 |
15%|ββ | 380/2480 [01:43<08:11, 4.27it/s]
|
778 |
15%|ββ | 381/2480 [01:43<08:26, 4.14it/s]
|
779 |
15%|ββ | 382/2480 [01:44<08:41, 4.02it/s]
|
780 |
15%|ββ | 383/2480 [01:44<08:20, 4.19it/s]
|
781 |
15%|ββ | 384/2480 [01:44<08:18, 4.21it/s]
|
782 |
16%|ββ | 385/2480 [01:44<08:01, 4.35it/s]
|
783 |
16%|ββ | 386/2480 [01:45<08:05, 4.32it/s]
|
784 |
16%|ββ | 387/2480 [01:45<08:09, 4.28it/s]
|
785 |
16%|ββ | 388/2480 [01:45<07:40, 4.54it/s]
|
786 |
16%|ββ | 389/2480 [01:45<07:19, 4.76it/s]
|
787 |
16%|ββ | 390/2480 [01:46<08:15, 4.21it/s]
|
788 |
16%|ββ | 391/2480 [01:46<08:16, 4.21it/s]
|
789 |
16%|ββ | 392/2480 [01:46<08:03, 4.32it/s]
|
790 |
16%|ββ | 393/2480 [01:46<07:37, 4.56it/s]
|
791 |
16%|ββ | 394/2480 [01:46<07:51, 4.42it/s]
|
792 |
16%|ββ | 395/2480 [01:47<07:30, 4.63it/s]
|
793 |
16%|ββ | 396/2480 [01:47<07:22, 4.71it/s]
|
794 |
16%|ββ | 397/2480 [01:47<07:09, 4.85it/s]
|
795 |
16%|ββ | 398/2480 [01:47<07:15, 4.78it/s]
|
796 |
16%|ββ | 399/2480 [01:47<07:29, 4.63it/s]
|
797 |
16%|ββ | 400/2480 [01:48<08:38, 4.01it/s]
|
798 |
16%|ββ | 401/2480 [01:48<08:26, 4.11it/s]
|
799 |
16%|ββ | 402/2480 [01:48<08:07, 4.26it/s]
|
800 |
16%|ββ | 403/2480 [01:49<09:51, 3.51it/s]
|
801 |
16%|ββ | 404/2480 [01:49<09:03, 3.82it/s]
|
802 |
16%|ββ | 405/2480 [01:49<09:01, 3.83it/s]
|
803 |
16%|ββ | 406/2480 [01:49<08:35, 4.02it/s]
|
804 |
16%|ββ | 407/2480 [01:50<09:48, 3.52it/s]
|
805 |
16%|ββ | 408/2480 [01:50<10:05, 3.42it/s]
|
806 |
16%|ββ | 409/2480 [01:50<09:18, 3.71it/s]
|
807 |
17%|ββ | 410/2480 [01:50<08:44, 3.95it/s]
|
808 |
17%|ββ | 411/2480 [01:51<09:09, 3.76it/s]
|
809 |
17%|ββ | 412/2480 [01:51<09:02, 3.81it/s]
|
810 |
17%|ββ | 413/2480 [01:51<08:49, 3.90it/s]
|
811 |
17%|ββ | 414/2480 [01:51<08:50, 3.89it/s]
|
812 |
17%|ββ | 415/2480 [01:52<08:39, 3.97it/s]
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:13556e6c97b2f39e25d5830ab0bc61ce81f807bcf643d150d23dd97c2f606c57
|
3 |
+
size 5240
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|