albertmartinez commited on
Commit
b181cc5
·
verified ·
1 Parent(s): b39a986

Model save

Browse files
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  license: mit
3
  base_model: FacebookAI/xlm-roberta-large
4
  tags:
@@ -17,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
17
 
18
  This model is a fine-tuned version of [FacebookAI/xlm-roberta-large](https://huggingface.co/FacebookAI/xlm-roberta-large) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.6748
21
- - F1: 0.8262
22
 
23
  ## Model description
24
 
@@ -37,29 +38,30 @@ More information needed
37
  ### Training hyperparameters
38
 
39
  The following hyperparameters were used during training:
40
- - learning_rate: 2e-05
41
- - train_batch_size: 32
42
- - eval_batch_size: 32
43
  - seed: 42
44
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
  - lr_scheduler_warmup_steps: 600
47
  - num_epochs: 5.0
 
48
 
49
  ### Training results
50
 
51
- | Training Loss | Epoch | Step | Validation Loss | F1 |
52
- |:-------------:|:-----:|:----:|:---------------:|:------:|
53
- | 1.2213 | 1.0 | 1076 | 0.7617 | 0.7825 |
54
- | 0.6795 | 2.0 | 2152 | 0.6679 | 0.8069 |
55
- | 0.4878 | 3.0 | 3228 | 0.6416 | 0.8185 |
56
- | 0.339 | 4.0 | 4304 | 0.6522 | 0.8263 |
57
- | 0.226 | 5.0 | 5380 | 0.6748 | 0.8262 |
58
 
59
 
60
  ### Framework versions
61
 
62
- - Transformers 4.43.1
63
- - Pytorch 2.3.1+cu121
64
- - Datasets 2.20.0
65
- - Tokenizers 0.19.1
 
1
  ---
2
+ library_name: transformers
3
  license: mit
4
  base_model: FacebookAI/xlm-roberta-large
5
  tags:
 
18
 
19
  This model is a fine-tuned version of [FacebookAI/xlm-roberta-large](https://huggingface.co/FacebookAI/xlm-roberta-large) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - F1: 0.8154
22
+ - Loss: 0.6651
23
 
24
  ## Model description
25
 
 
38
  ### Training hyperparameters
39
 
40
  The following hyperparameters were used during training:
41
+ - learning_rate: 1e-05
42
+ - train_batch_size: 64
43
+ - eval_batch_size: 64
44
  - seed: 42
45
+ - optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
  - lr_scheduler_type: linear
47
  - lr_scheduler_warmup_steps: 600
48
  - num_epochs: 5.0
49
+ - mixed_precision_training: Native AMP
50
 
51
  ### Training results
52
 
53
+ | Training Loss | Epoch | Step | F1 | Validation Loss |
54
+ |:-------------:|:-----:|:----:|:------:|:---------------:|
55
+ | 1.7444 | 1.0 | 538 | 0.7686 | 0.8148 |
56
+ | 0.7749 | 2.0 | 1076 | 0.8000 | 0.7104 |
57
+ | 0.6165 | 3.0 | 1614 | 0.8114 | 0.6536 |
58
+ | 0.5044 | 4.0 | 2152 | 0.8140 | 0.6571 |
59
+ | 0.4217 | 5.0 | 2690 | 0.8154 | 0.6651 |
60
 
61
 
62
  ### Framework versions
63
 
64
+ - Transformers 4.49.0.dev0
65
+ - Pytorch 2.1.2.post304
66
+ - Datasets 3.2.0
67
+ - Tokenizers 0.21.0
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_f1": 0.8262395269824653,
4
- "eval_loss": 0.6748126149177551,
5
- "eval_runtime": 1120.7242,
6
  "eval_samples": 8605,
7
- "eval_samples_per_second": 7.678,
8
- "eval_steps_per_second": 0.24,
9
  "total_flos": 1.603929743474688e+17,
10
- "train_loss": 0.5906805073904725,
11
- "train_runtime": 93341.3352,
12
  "train_samples": 34420,
13
- "train_samples_per_second": 1.844,
14
- "train_steps_per_second": 0.058
15
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_f1": 0.8113637968918935,
4
+ "eval_loss": 0.6536160707473755,
5
+ "eval_runtime": 21.9128,
6
  "eval_samples": 8605,
7
+ "eval_samples_per_second": 392.692,
8
+ "eval_steps_per_second": 6.161,
9
  "total_flos": 1.603929743474688e+17,
10
+ "train_loss": 0.7819821552716223,
11
+ "train_runtime": 1776.2036,
12
  "train_samples": 34420,
13
+ "train_samples_per_second": 96.892,
14
+ "train_steps_per_second": 1.514
15
  }
config.json CHANGED
@@ -59,7 +59,7 @@
59
  "position_embedding_type": "absolute",
60
  "problem_type": "single_label_classification",
61
  "torch_dtype": "float32",
62
- "transformers_version": "4.43.1",
63
  "type_vocab_size": 1,
64
  "use_cache": true,
65
  "vocab_size": 250002
 
59
  "position_embedding_type": "absolute",
60
  "problem_type": "single_label_classification",
61
  "torch_dtype": "float32",
62
+ "transformers_version": "4.49.0.dev0",
63
  "type_vocab_size": 1,
64
  "use_cache": true,
65
  "vocab_size": 250002
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_f1": 0.8262395269824653,
4
- "eval_loss": 0.6748126149177551,
5
- "eval_runtime": 1120.7242,
6
  "eval_samples": 8605,
7
- "eval_samples_per_second": 7.678,
8
- "eval_steps_per_second": 0.24
9
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_f1": 0.8113637968918935,
4
+ "eval_loss": 0.6536160707473755,
5
+ "eval_runtime": 21.9128,
6
  "eval_samples": 8605,
7
+ "eval_samples_per_second": 392.692,
8
+ "eval_steps_per_second": 6.161
9
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:387b5a1e432a9111c78c7a8ee75c63b13aafb3fb3ba0562e0e15334df97732c0
3
  size 2239676072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5cb6f50bd2fb5332a437a955993eaf0bec472eb9b113867fb1081317267532a
3
  size 2239676072
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ffb37461c391f096759f4a9bbbc329da0f36952f88bab061fcf84940c022e98
3
- size 17082999
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
3
+ size 17082734
tokenizer_config.json CHANGED
@@ -42,9 +42,10 @@
42
  }
43
  },
44
  "bos_token": "<s>",
45
- "clean_up_tokenization_spaces": true,
46
  "cls_token": "<s>",
47
  "eos_token": "</s>",
 
48
  "mask_token": "<mask>",
49
  "model_max_length": 512,
50
  "pad_token": "<pad>",
 
42
  }
43
  },
44
  "bos_token": "<s>",
45
+ "clean_up_tokenization_spaces": false,
46
  "cls_token": "<s>",
47
  "eos_token": "</s>",
48
+ "extra_special_tokens": {},
49
  "mask_token": "<mask>",
50
  "model_max_length": 512,
51
  "pad_token": "<pad>",
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 1.603929743474688e+17,
4
- "train_loss": 0.5906805073904725,
5
- "train_runtime": 93341.3352,
6
  "train_samples": 34420,
7
- "train_samples_per_second": 1.844,
8
- "train_steps_per_second": 0.058
9
  }
 
1
  {
2
  "epoch": 5.0,
3
  "total_flos": 1.603929743474688e+17,
4
+ "train_loss": 0.7819821552716223,
5
+ "train_runtime": 1776.2036,
6
  "train_samples": 34420,
7
+ "train_samples_per_second": 96.892,
8
+ "train_steps_per_second": 1.514
9
  }
trainer_state.json CHANGED
@@ -1,105 +1,105 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
- "eval_steps": 500.0,
6
- "global_step": 5380,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.0,
13
- "grad_norm": 17.019458770751953,
14
- "learning_rate": 1.8008368200836822e-05,
15
- "loss": 1.2213,
16
- "step": 1076
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_f1": 0.7824958430254769,
21
- "eval_loss": 0.7617404460906982,
22
- "eval_runtime": 1102.1746,
23
- "eval_samples_per_second": 7.807,
24
- "eval_steps_per_second": 0.244,
25
- "step": 1076
26
  },
27
  {
28
- "epoch": 2.0,
29
- "grad_norm": 21.141799926757812,
30
- "learning_rate": 1.3506276150627616e-05,
31
- "loss": 0.6795,
32
- "step": 2152
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_f1": 0.8069274438342103,
37
- "eval_loss": 0.6678793430328369,
38
- "eval_runtime": 1131.1127,
39
- "eval_samples_per_second": 7.608,
40
- "eval_steps_per_second": 0.238,
41
- "step": 2152
42
  },
43
  {
44
- "epoch": 3.0,
45
- "grad_norm": 20.788061141967773,
46
- "learning_rate": 9.004184100418411e-06,
47
- "loss": 0.4878,
48
- "step": 3228
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_f1": 0.8184543003319226,
53
- "eval_loss": 0.6415576338768005,
54
- "eval_runtime": 1137.7492,
55
- "eval_samples_per_second": 7.563,
56
- "eval_steps_per_second": 0.236,
57
- "step": 3228
58
  },
59
  {
60
- "epoch": 4.0,
61
- "grad_norm": 31.943090438842773,
62
- "learning_rate": 4.5020920502092055e-06,
63
- "loss": 0.339,
64
- "step": 4304
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_f1": 0.8262643018421877,
69
- "eval_loss": 0.6522042155265808,
70
- "eval_runtime": 1113.9155,
71
- "eval_samples_per_second": 7.725,
72
- "eval_steps_per_second": 0.241,
73
- "step": 4304
74
  },
75
  {
76
- "epoch": 5.0,
77
- "grad_norm": 10.032992362976074,
78
- "learning_rate": 0.0,
79
- "loss": 0.226,
80
- "step": 5380
81
  },
82
  {
83
  "epoch": 5.0,
84
- "eval_f1": 0.8262395269824653,
85
- "eval_loss": 0.6748126149177551,
86
- "eval_runtime": 1135.4492,
87
- "eval_samples_per_second": 7.578,
88
- "eval_steps_per_second": 0.237,
89
- "step": 5380
90
  },
91
  {
92
  "epoch": 5.0,
93
- "step": 5380,
94
  "total_flos": 1.603929743474688e+17,
95
- "train_loss": 0.5906805073904725,
96
- "train_runtime": 93341.3352,
97
- "train_samples_per_second": 1.844,
98
- "train_steps_per_second": 0.058
99
  }
100
  ],
101
  "logging_steps": 500,
102
- "max_steps": 5380,
103
  "num_input_tokens_seen": 0,
104
  "num_train_epochs": 5,
105
  "save_steps": 500,
@@ -116,7 +116,7 @@
116
  }
117
  },
118
  "total_flos": 1.603929743474688e+17,
119
- "train_batch_size": 32,
120
  "trial_name": null,
121
  "trial_params": null
122
  }
 
1
  {
2
+ "best_metric": 0.6536160707473755,
3
+ "best_model_checkpoint": "./SDG/checkpoint-1614",
4
  "epoch": 5.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2690,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.929368029739777,
13
+ "grad_norm": 26.18483543395996,
14
+ "learning_rate": 8.266666666666667e-06,
15
+ "loss": 1.7444,
16
+ "step": 500
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_f1": 0.7685770625858339,
21
+ "eval_loss": 0.8148266077041626,
22
+ "eval_runtime": 22.0031,
23
+ "eval_samples_per_second": 391.081,
24
+ "eval_steps_per_second": 6.136,
25
+ "step": 538
26
  },
27
  {
28
+ "epoch": 1.858736059479554,
29
+ "grad_norm": 14.726641654968262,
30
+ "learning_rate": 8.105263157894736e-06,
31
+ "loss": 0.7749,
32
+ "step": 1000
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_f1": 0.7999964392111495,
37
+ "eval_loss": 0.7103919982910156,
38
+ "eval_runtime": 22.0431,
39
+ "eval_samples_per_second": 390.371,
40
+ "eval_steps_per_second": 6.124,
41
+ "step": 1076
42
  },
43
  {
44
+ "epoch": 2.7881040892193307,
45
+ "grad_norm": 18.681787490844727,
46
+ "learning_rate": 5.717703349282297e-06,
47
+ "loss": 0.6165,
48
+ "step": 1500
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_f1": 0.8113637968918935,
53
+ "eval_loss": 0.6536160707473755,
54
+ "eval_runtime": 22.0131,
55
+ "eval_samples_per_second": 390.904,
56
+ "eval_steps_per_second": 6.133,
57
+ "step": 1614
58
  },
59
  {
60
+ "epoch": 3.717472118959108,
61
+ "grad_norm": 15.741679191589355,
62
+ "learning_rate": 3.3253588516746417e-06,
63
+ "loss": 0.5044,
64
+ "step": 2000
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_f1": 0.8140234226099499,
69
+ "eval_loss": 0.6571179628372192,
70
+ "eval_runtime": 22.0192,
71
+ "eval_samples_per_second": 390.795,
72
+ "eval_steps_per_second": 6.131,
73
+ "step": 2152
74
  },
75
  {
76
+ "epoch": 4.646840148698884,
77
+ "grad_norm": 12.586956024169922,
78
+ "learning_rate": 9.330143540669858e-07,
79
+ "loss": 0.4217,
80
+ "step": 2500
81
  },
82
  {
83
  "epoch": 5.0,
84
+ "eval_f1": 0.8154414212199751,
85
+ "eval_loss": 0.6651344299316406,
86
+ "eval_runtime": 21.8841,
87
+ "eval_samples_per_second": 393.209,
88
+ "eval_steps_per_second": 6.169,
89
+ "step": 2690
90
  },
91
  {
92
  "epoch": 5.0,
93
+ "step": 2690,
94
  "total_flos": 1.603929743474688e+17,
95
+ "train_loss": 0.7819821552716223,
96
+ "train_runtime": 1776.2036,
97
+ "train_samples_per_second": 96.892,
98
+ "train_steps_per_second": 1.514
99
  }
100
  ],
101
  "logging_steps": 500,
102
+ "max_steps": 2690,
103
  "num_input_tokens_seen": 0,
104
  "num_train_epochs": 5,
105
  "save_steps": 500,
 
116
  }
117
  },
118
  "total_flos": 1.603929743474688e+17,
119
+ "train_batch_size": 64,
120
  "trial_name": null,
121
  "trial_params": null
122
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dcd7112676da5ad151517cb6328fce426a8d218cf6913d3798f4f6f7aaf5cf68
3
- size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4199969cd5b4951b886fe0bcbeb67c79f95f400afb8f77bdfd4e0aa0554a1896
3
+ size 5368