josejointriple commited on
Commit
3cb1186
·
verified ·
1 Parent(s): 5864528

jointriple/brand_classification_2_20240628_model_1

Browse files
Files changed (4) hide show
  1. README.md +18 -18
  2. config.json +10 -9
  3. model.safetensors +2 -2
  4. training_args.bin +1 -1
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
- license: apache-2.0
3
- base_model: bert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  metrics:
@@ -15,10 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # results
17
 
18
- This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.7750
21
- - Accuracy: 0.9257
22
 
23
  ## Model description
24
 
@@ -37,26 +37,26 @@ More information needed
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
- - learning_rate: 5e-05
41
- - train_batch_size: 512
42
- - eval_batch_size: 512
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
- - lr_scheduler_warmup_steps: 500
47
  - num_epochs: 7
48
 
49
  ### Training results
50
 
51
- | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
- |:-------------:|:-----:|:----:|:---------------:|:--------:|
53
- | 6.4334 | 1.0 | 593 | 6.1799 | 0.3660 |
54
- | 4.3096 | 2.0 | 1186 | 3.9473 | 0.8412 |
55
- | 2.6773 | 3.0 | 1779 | 2.3788 | 0.9079 |
56
- | 1.7389 | 4.0 | 2372 | 1.5072 | 0.9197 |
57
- | 1.1692 | 5.0 | 2965 | 1.0537 | 0.9236 |
58
- | 0.9072 | 6.0 | 3558 | 0.8410 | 0.9254 |
59
- | 0.7699 | 7.0 | 4151 | 0.7750 | 0.9257 |
60
 
61
 
62
  ### Framework versions
 
1
  ---
2
+ license: mit
3
+ base_model: roberta-base
4
  tags:
5
  - generated_from_trainer
6
  metrics:
 
15
 
16
  # results
17
 
18
+ This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.3028
21
+ - Accuracy: 0.9288
22
 
23
  ## Model description
24
 
 
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
+ - learning_rate: 2e-05
41
+ - train_batch_size: 128
42
+ - eval_batch_size: 128
43
  - seed: 42
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
+ - lr_scheduler_warmup_steps: 1000
47
  - num_epochs: 7
48
 
49
  ### Training results
50
 
51
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
52
+ |:-------------:|:-----:|:-----:|:---------------:|:--------:|
53
+ | 5.8594 | 1.0 | 2371 | 5.3310 | 0.8132 |
54
+ | 3.1041 | 2.0 | 4742 | 2.7193 | 0.8936 |
55
+ | 1.9352 | 3.0 | 7113 | 1.3843 | 0.9116 |
56
+ | 0.9755 | 4.0 | 9484 | 0.7382 | 0.9176 |
57
+ | 0.537 | 5.0 | 11855 | 0.4559 | 0.9198 |
58
+ | 0.5705 | 6.0 | 14226 | 0.3554 | 0.9236 |
59
+ | 0.2824 | 7.0 | 16597 | 0.3028 | 0.9288 |
60
 
61
 
62
  ### Framework versions
config.json CHANGED
@@ -1,11 +1,12 @@
1
  {
2
- "_name_or_path": "bert-base-uncased",
3
  "architectures": [
4
- "BertForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
 
7
  "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
@@ -4187,17 +4188,17 @@
4187
  "\u0394\u03b9\u03bf\u03b4\u03b9\u03b1": 2084,
4188
  "\u0395\u03bb\u03af\u03bd": 2085
4189
  },
4190
- "layer_norm_eps": 1e-12,
4191
- "max_position_embeddings": 512,
4192
- "model_type": "bert",
4193
  "num_attention_heads": 12,
4194
  "num_hidden_layers": 12,
4195
- "pad_token_id": 0,
4196
  "position_embedding_type": "absolute",
4197
  "problem_type": "single_label_classification",
4198
  "torch_dtype": "float32",
4199
  "transformers_version": "4.39.3",
4200
- "type_vocab_size": 2,
4201
  "use_cache": true,
4202
- "vocab_size": 30522
4203
  }
 
1
  {
2
+ "_name_or_path": "roberta-base",
3
  "architectures": [
4
+ "RobertaForSequenceClassification"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
  "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 768,
 
4188
  "\u0394\u03b9\u03bf\u03b4\u03b9\u03b1": 2084,
4189
  "\u0395\u03bb\u03af\u03bd": 2085
4190
  },
4191
+ "layer_norm_eps": 1e-05,
4192
+ "max_position_embeddings": 514,
4193
+ "model_type": "roberta",
4194
  "num_attention_heads": 12,
4195
  "num_hidden_layers": 12,
4196
+ "pad_token_id": 1,
4197
  "position_embedding_type": "absolute",
4198
  "problem_type": "single_label_classification",
4199
  "torch_dtype": "float32",
4200
  "transformers_version": "4.39.3",
4201
+ "type_vocab_size": 1,
4202
  "use_cache": true,
4203
+ "vocab_size": 50265
4204
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16d7a19a8227ddd87e6e3b53f02dd08fd910ce3bc82c9c5a75589343514b08cb
3
- size 444369032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2094f4135f79892f0b142cf7b392dc7f37a00be5444a7fc941c9bdd83deae718
3
+ size 505023216
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ead15358f313b14eaa0fa2c68ea7494eb00a2343e4786eb4d5f55e17a5c99701
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:271d7a2b9f13f45967aac087fb6d36fb65cb899900c01193e090d0f923ecf37e
3
  size 4856