hungnm commited on
Commit
ee81b15
·
verified ·
1 Parent(s): c0c625a

Model save

Browse files
README.md CHANGED
@@ -9,21 +9,21 @@ metrics:
9
  - precision
10
  - recall
11
  model-index:
12
- - name: clapAI/modernBERT-base-multilingual-sentiment
13
  results: []
14
  ---
15
 
16
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
  should probably proofread and complete it, then remove this comment. -->
18
 
19
- # clapAI/modernBERT-base-multilingual-sentiment
20
 
21
  This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 1.8330
24
- - F1: 0.1291
25
- - Precision: 0.1650
26
- - Recall: 0.1890
27
 
28
  ## Model description
29
 
@@ -42,26 +42,30 @@ More information needed
42
  ### Training hyperparameters
43
 
44
  The following hyperparameters were used during training:
45
- - learning_rate: 6e-05
46
- - train_batch_size: 1024
47
- - eval_batch_size: 1024
48
  - seed: 42
49
  - distributed_type: multi-GPU
50
  - num_devices: 2
 
51
  - total_train_batch_size: 2048
52
- - total_eval_batch_size: 2048
53
  - optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
54
  - lr_scheduler_type: cosine
55
  - lr_scheduler_warmup_ratio: 0.01
56
- - num_epochs: 2.0
57
  - mixed_precision_training: Native AMP
58
 
59
  ### Training results
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall |
62
  |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|
63
- | 1.8373 | 1.0 | 8 | 1.8330 | 0.1291 | 0.1650 | 0.1890 |
64
- | 1.8364 | 2.0 | 16 | 1.8330 | 0.1291 | 0.1650 | 0.1890 |
 
 
 
65
 
66
 
67
  ### Framework versions
 
9
  - precision
10
  - recall
11
  model-index:
12
+ - name: modernBERT-base-multilingual-sentiment
13
  results: []
14
  ---
15
 
16
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
  should probably proofread and complete it, then remove this comment. -->
18
 
19
+ # modernBERT-base-multilingual-sentiment
20
 
21
  This model is a fine-tuned version of [answerdotai/ModernBERT-base](https://huggingface.co/answerdotai/ModernBERT-base) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 0.5464
24
+ - F1: 0.7944
25
+ - Precision: 0.7945
26
+ - Recall: 0.7944
27
 
28
  ## Model description
29
 
 
42
  ### Training hyperparameters
43
 
44
  The following hyperparameters were used during training:
45
+ - learning_rate: 5e-05
46
+ - train_batch_size: 512
47
+ - eval_batch_size: 512
48
  - seed: 42
49
  - distributed_type: multi-GPU
50
  - num_devices: 2
51
+ - gradient_accumulation_steps: 2
52
  - total_train_batch_size: 2048
53
+ - total_eval_batch_size: 1024
54
  - optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
55
  - lr_scheduler_type: cosine
56
  - lr_scheduler_warmup_ratio: 0.01
57
+ - num_epochs: 5.0
58
  - mixed_precision_training: Native AMP
59
 
60
  ### Training results
61
 
62
  | Training Loss | Epoch | Step | Validation Loss | F1 | Precision | Recall |
63
  |:-------------:|:-----:|:----:|:---------------:|:------:|:---------:|:------:|
64
+ | 0.9287 | 1.0 | 1537 | 0.4626 | 0.7910 | 0.7940 | 0.7897 |
65
+ | 0.8356 | 2.0 | 3074 | 0.4441 | 0.8011 | 0.8009 | 0.8015 |
66
+ | 0.7488 | 3.0 | 4611 | 0.4517 | 0.8012 | 0.8020 | 0.8007 |
67
+ | 0.6177 | 4.0 | 6148 | 0.4915 | 0.7990 | 0.7989 | 0.7991 |
68
+ | 0.5174 | 5.0 | 7685 | 0.5464 | 0.7944 | 0.7945 | 0.7944 |
69
 
70
 
71
  ### Framework versions
all_results.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
- "epoch": 2.0,
3
- "eval_f1": 0.12910686958067819,
4
- "eval_loss": 1.8330078125,
5
- "eval_precision": 0.16504066117321736,
6
- "eval_recall": 0.1890018282051825,
7
- "eval_runtime": 0.2271,
8
- "eval_samples_per_second": 8807.622,
9
- "eval_steps_per_second": 4.404,
10
- "test_f1": 0.12457335796698589,
11
- "test_loss": 1.833984375,
12
- "test_precision": 0.16755594823291797,
13
- "test_recall": 0.1749254997504109,
14
- "test_runtime": 0.3221,
15
- "test_samples_per_second": 6208.711,
16
- "test_steps_per_second": 3.104,
17
- "train_loss": 1.836273193359375,
18
- "train_runtime": 55.8529,
19
- "train_samples_per_second": 572.934,
20
- "train_steps_per_second": 0.286
21
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.7729351929743412,
4
+ "train_runtime": 35402.7725,
5
+ "train_samples_per_second": 444.524,
6
+ "train_steps_per_second": 0.217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a54a318fd1bb50cb88a677bf2ff027f4de21277d4ec560034f5043ee00b5c474
3
  size 299228486
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f152c2ee66c141d5e3c8db7cb2e5f370cdd43111bd53550090a854bd26ff1a04
3
  size 299228486
runs/Jan01_01-03-55_hn-fornix-testing-gpu-platform-2/events.out.tfevents.1735693897.hn-fornix-testing-gpu-platform-2.1050019.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19bf24c9129f79411bdf64da387be27e16721b61fb93731ca2320dfa393100cf
3
- size 331926
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f637f923a9e8194e170c3f0a35eb85270b3448100dd12145be613bdeb655932
3
+ size 332700
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 2.0,
3
- "train_loss": 1.836273193359375,
4
- "train_runtime": 55.8529,
5
- "train_samples_per_second": 572.934,
6
- "train_steps_per_second": 0.286
7
  }
 
1
  {
2
+ "epoch": 5.0,
3
+ "train_loss": 0.7729351929743412,
4
+ "train_runtime": 35402.7725,
5
+ "train_samples_per_second": 444.524,
6
+ "train_steps_per_second": 0.217
7
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff