nguyenanh2803 commited on
Commit
ed73e92
·
verified ·
1 Parent(s): 7c35164

Training in progress, step 500

Browse files
README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: FacebookAI/roberta-large
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - accuracy
8
+ - precision
9
+ - recall
10
+ - f1
11
+ model-index:
12
+ - name: absa-train-service-roberta-large
13
+ results: []
14
+ ---
15
+
16
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
+ should probably proofread and complete it, then remove this comment. -->
18
+
19
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/cunho2803032003/absa-1721959498.2993438/runs/tad25dun)
20
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/cunho2803032003/absa-1721959940.7872202/runs/bsprskdy)
21
+ # absa-train-service-roberta-large
22
+
23
+ This model is a fine-tuned version of [FacebookAI/roberta-large](https://huggingface.co/FacebookAI/roberta-large) on the None dataset.
24
+ It achieves the following results on the evaluation set:
25
+ - Loss: 0.8683
26
+ - Accuracy: 0.7424
27
+ - Precision: 0.7345
28
+ - Recall: 0.7367
29
+ - F1: 0.7302
30
+
31
+ ## Model description
32
+
33
+ More information needed
34
+
35
+ ## Intended uses & limitations
36
+
37
+ More information needed
38
+
39
+ ## Training and evaluation data
40
+
41
+ More information needed
42
+
43
+ ## Training procedure
44
+
45
+ ### Training hyperparameters
46
+
47
+ The following hyperparameters were used during training:
48
+ - learning_rate: 0.0002
49
+ - train_batch_size: 8
50
+ - eval_batch_size: 8
51
+ - seed: 42
52
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
53
+ - lr_scheduler_type: linear
54
+ - lr_scheduler_warmup_steps: 500
55
+ - num_epochs: 20
56
+
57
+ ### Training results
58
+
59
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1 |
60
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
61
+ | 2.2255 | 1.0 | 469 | 2.0677 | 0.3296 | 0.1937 | 0.3250 | 0.2297 |
62
+ | 1.8236 | 2.0 | 938 | 1.7061 | 0.504 | 0.5413 | 0.4914 | 0.4567 |
63
+ | 1.5384 | 3.0 | 1407 | 1.4381 | 0.552 | 0.5944 | 0.5549 | 0.5196 |
64
+ | 1.4301 | 4.0 | 1876 | 1.3316 | 0.5984 | 0.6000 | 0.5990 | 0.5618 |
65
+ | 1.3776 | 5.0 | 2345 | 1.1645 | 0.6576 | 0.6817 | 0.6491 | 0.6332 |
66
+ | 1.2078 | 6.0 | 2814 | 1.0967 | 0.6448 | 0.7035 | 0.6348 | 0.6110 |
67
+ | 1.2535 | 7.0 | 3283 | 1.0565 | 0.7008 | 0.7467 | 0.6967 | 0.7066 |
68
+ | 1.2921 | 8.0 | 3752 | 1.0049 | 0.6976 | 0.7013 | 0.6884 | 0.6813 |
69
+ | 1.178 | 9.0 | 4221 | 1.0438 | 0.648 | 0.7746 | 0.6423 | 0.6387 |
70
+ | 1.2324 | 10.0 | 4690 | 1.0203 | 0.6896 | 0.7096 | 0.6831 | 0.6704 |
71
+ | 1.1899 | 11.0 | 5159 | 1.0193 | 0.6864 | 0.7391 | 0.6819 | 0.6834 |
72
+ | 1.1515 | 12.0 | 5628 | 0.9722 | 0.6944 | 0.7164 | 0.6924 | 0.6860 |
73
+ | 1.1604 | 13.0 | 6097 | 0.9372 | 0.7312 | 0.7543 | 0.7311 | 0.7259 |
74
+ | 1.1229 | 14.0 | 6566 | 0.9265 | 0.72 | 0.7278 | 0.7139 | 0.7147 |
75
+ | 1.1459 | 15.0 | 7035 | 0.8896 | 0.7376 | 0.7264 | 0.7323 | 0.7183 |
76
+ | 1.1281 | 16.0 | 7504 | 0.9074 | 0.7152 | 0.7107 | 0.7087 | 0.7012 |
77
+ | 1.1794 | 17.0 | 7973 | 0.8914 | 0.7424 | 0.7293 | 0.7354 | 0.7266 |
78
+ | 1.1101 | 18.0 | 8442 | 0.8707 | 0.7216 | 0.7161 | 0.7141 | 0.7059 |
79
+ | 1.1215 | 19.0 | 8911 | 0.8656 | 0.7408 | 0.7322 | 0.7348 | 0.7274 |
80
+ | 1.0483 | 20.0 | 9380 | 0.8683 | 0.7424 | 0.7345 | 0.7367 | 0.7302 |
81
+
82
+
83
+ ### Framework versions
84
+
85
+ - Transformers 4.43.2
86
+ - Pytorch 2.3.1+cu121
87
+ - Datasets 2.20.0
88
+ - Tokenizers 0.19.1
config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google-bert/bert-base-uncased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "LABEL_0",
14
+ "1": "LABEL_1",
15
+ "2": "LABEL_2",
16
+ "3": "LABEL_3",
17
+ "4": "LABEL_4",
18
+ "5": "LABEL_5",
19
+ "6": "LABEL_6",
20
+ "7": "LABEL_7",
21
+ "8": "LABEL_8",
22
+ "9": "LABEL_9",
23
+ "10": "LABEL_10"
24
+ },
25
+ "initializer_range": 0.02,
26
+ "intermediate_size": 3072,
27
+ "label2id": {
28
+ "LABEL_0": 0,
29
+ "LABEL_1": 1,
30
+ "LABEL_10": 10,
31
+ "LABEL_2": 2,
32
+ "LABEL_3": 3,
33
+ "LABEL_4": 4,
34
+ "LABEL_5": 5,
35
+ "LABEL_6": 6,
36
+ "LABEL_7": 7,
37
+ "LABEL_8": 8,
38
+ "LABEL_9": 9
39
+ },
40
+ "layer_norm_eps": 1e-12,
41
+ "max_position_embeddings": 512,
42
+ "model_type": "bert",
43
+ "num_attention_heads": 12,
44
+ "num_hidden_layers": 12,
45
+ "pad_token_id": 0,
46
+ "position_embedding_type": "absolute",
47
+ "problem_type": "single_label_classification",
48
+ "torch_dtype": "float32",
49
+ "transformers_version": "4.43.3",
50
+ "type_vocab_size": 2,
51
+ "use_cache": true,
52
+ "vocab_size": 30522
53
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa5fa450964384897e4e1f3951412531de915750de9708d2a5752ef6b22b19bb
3
+ size 437986332
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "BertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8698313c9987e0f0f82120c4351eab3d2efa49862c5da38da872974451d727af
3
+ size 5304
vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff