update the model
Browse files- README.md +47 -16
- all_results.json +14 -0
- config.json +1 -1
- eval_results.json +9 -0
- log_mozilla-foundation_common_voice_8_0_ar_test_predictions.txt +0 -0
- mozilla-foundation_common_voice_8_0_ar_test_eval_results.txt +2 -2
- optimizer.pt +0 -3
- pytorch_model.bin +1 -1
- rng_state_0.pth +0 -3
- rng_state_1.pth +0 -3
- rng_state_2.pth +0 -3
- rng_state_3.pth +0 -3
- rng_state_4.pth +0 -3
- rng_state_5.pth +0 -3
- rng_state_6.pth +0 -3
- rng_state_7.pth +0 -3
- scaler.pt +0 -3
- scheduler.pt +0 -3
- train_results.json +8 -0
- trainer_state.json +62 -5
README.md
CHANGED
@@ -22,16 +22,13 @@ model-index:
|
|
22 |
name: Common Voice ar
|
23 |
args: ar
|
24 |
metrics:
|
25 |
-
- type: wer
|
26 |
-
value: 0.
|
27 |
-
name: Test WER
|
28 |
|
29 |
-
- type: cer
|
30 |
-
value: 0.
|
31 |
-
name: Test CER
|
32 |
-
|
33 |
-
WER: 0.18855042016806722
|
34 |
-
CER: 0.05138746531806014
|
35 |
|
36 |
---
|
37 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
@@ -41,12 +38,16 @@ should probably proofread and complete it, then remove this comment. -->
|
|
41 |
|
42 |
# نموذج **صوت سيناء** للتعرف على الأصوات العربية الفصحى و تحويلها إلى نصوص
|
43 |
|
44 |
-
This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the
|
|
|
|
|
|
|
45 |
|
46 |
It achieves the following results on the evaluation set:
|
47 |
-
-
|
48 |
-
-
|
49 |
-
-
|
|
|
50 |
|
51 |
#### Evaluation Commands
|
52 |
1. To evaluate on `mozilla-foundation/common_voice_8_0` with split `test`
|
@@ -95,11 +96,41 @@ The following hyperparameters were used during training:
|
|
95 |
- train_batch_size: 32
|
96 |
- eval_batch_size: 10
|
97 |
- seed: 42
|
98 |
-
-
|
99 |
-
-
|
|
|
|
|
100 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
101 |
- lr_scheduler_type: linear
|
102 |
- lr_scheduler_warmup_steps: 1000
|
103 |
-
- num_epochs:
|
104 |
- mixed_precision_training: Native AMP
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
name: Common Voice ar
|
23 |
args: ar
|
24 |
metrics:
|
25 |
+
- type: wer
|
26 |
+
value: 0.181
|
27 |
+
name: Test WER
|
28 |
|
29 |
+
- type: cer
|
30 |
+
value: 0.049
|
31 |
+
name: Test CER
|
|
|
|
|
|
|
32 |
|
33 |
---
|
34 |
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
|
|
38 |
|
39 |
# نموذج **صوت سيناء** للتعرف على الأصوات العربية الفصحى و تحويلها إلى نصوص
|
40 |
|
41 |
+
This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the MOZILLA-FOUNDATION/COMMON_VOICE_8_0 - AR dataset.
|
42 |
+
It achieves the following results on the evaluation set:
|
43 |
+
- Loss: 0.2141
|
44 |
+
- Wer: 0.1808
|
45 |
|
46 |
It achieves the following results on the evaluation set:
|
47 |
+
- eval_loss = 0.2141
|
48 |
+
- eval_samples = 10388
|
49 |
+
- eval_wer = 0.181
|
50 |
+
- eval_cer = 0.049
|
51 |
|
52 |
#### Evaluation Commands
|
53 |
1. To evaluate on `mozilla-foundation/common_voice_8_0` with split `test`
|
|
|
96 |
- train_batch_size: 32
|
97 |
- eval_batch_size: 10
|
98 |
- seed: 42
|
99 |
+
- distributed_type: multi-GPU
|
100 |
+
- num_devices: 8
|
101 |
+
- total_train_batch_size: 256
|
102 |
+
- total_eval_batch_size: 80
|
103 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
104 |
- lr_scheduler_type: linear
|
105 |
- lr_scheduler_warmup_steps: 1000
|
106 |
+
- num_epochs: 10
|
107 |
- mixed_precision_training: Native AMP
|
108 |
|
109 |
+
|
110 |
+
### Training results
|
111 |
+
|
112 |
+
| Training Loss | Epoch | Step | Validation Loss | Wer |
|
113 |
+
|:-------------:|:-----:|:-----:|:---------------:|:------:|
|
114 |
+
| 1.354 | 0.64 | 1000 | 0.4109 | 0.4493 |
|
115 |
+
| 0.5886 | 1.28 | 2000 | 0.2798 | 0.3099 |
|
116 |
+
| 0.4977 | 1.92 | 3000 | 0.2387 | 0.2673 |
|
117 |
+
| 0.4253 | 2.56 | 4000 | 0.2266 | 0.2523 |
|
118 |
+
| 0.3942 | 3.2 | 5000 | 0.2171 | 0.2437 |
|
119 |
+
| 0.3619 | 3.84 | 6000 | 0.2076 | 0.2253 |
|
120 |
+
| 0.3245 | 4.48 | 7000 | 0.2088 | 0.2186 |
|
121 |
+
| 0.308 | 5.12 | 8000 | 0.2086 | 0.2206 |
|
122 |
+
| 0.2881 | 5.76 | 9000 | 0.2089 | 0.2105 |
|
123 |
+
| 0.2557 | 6.4 | 10000 | 0.2015 | 0.2004 |
|
124 |
+
| 0.248 | 7.04 | 11000 | 0.2044 | 0.1953 |
|
125 |
+
| 0.2251 | 7.68 | 12000 | 0.2058 | 0.1932 |
|
126 |
+
| 0.2052 | 8.32 | 13000 | 0.2117 | 0.1878 |
|
127 |
+
| 0.1976 | 8.96 | 14000 | 0.2104 | 0.1825 |
|
128 |
+
| 0.1845 | 9.6 | 15000 | 0.2156 | 0.1821 |
|
129 |
+
|
130 |
+
|
131 |
+
### Framework versions
|
132 |
+
|
133 |
+
- Transformers 4.16.2
|
134 |
+
- Pytorch 1.10.2+cu113
|
135 |
+
- Datasets 1.18.3
|
136 |
+
- Tokenizers 0.11.0
|
all_results.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_loss": 0.21412786841392517,
|
4 |
+
"eval_runtime": 70.9089,
|
5 |
+
"eval_samples": 10388,
|
6 |
+
"eval_samples_per_second": 146.498,
|
7 |
+
"eval_steps_per_second": 1.833,
|
8 |
+
"eval_wer": 0.18078979457836977,
|
9 |
+
"train_loss": 0.1316310991176183,
|
10 |
+
"train_runtime": 23113.6031,
|
11 |
+
"train_samples": 399991,
|
12 |
+
"train_samples_per_second": 173.054,
|
13 |
+
"train_steps_per_second": 0.676
|
14 |
+
}
|
config.json
CHANGED
@@ -6,7 +6,7 @@
|
|
6 |
"add_adapter": false,
|
7 |
"apply_spec_augment": true,
|
8 |
"architectures": [
|
9 |
-
"
|
10 |
],
|
11 |
"attention_dropout": 0.0,
|
12 |
"bos_token_id": 1,
|
|
|
6 |
"add_adapter": false,
|
7 |
"apply_spec_augment": true,
|
8 |
"architectures": [
|
9 |
+
"Wav2Vec2ForCTC"
|
10 |
],
|
11 |
"attention_dropout": 0.0,
|
12 |
"bos_token_id": 1,
|
eval_results.json
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"eval_loss": 0.21412786841392517,
|
4 |
+
"eval_runtime": 70.9089,
|
5 |
+
"eval_samples": 10388,
|
6 |
+
"eval_samples_per_second": 146.498,
|
7 |
+
"eval_steps_per_second": 1.833,
|
8 |
+
"eval_wer": 0.18078979457836977
|
9 |
+
}
|
log_mozilla-foundation_common_voice_8_0_ar_test_predictions.txt
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
mozilla-foundation_common_voice_8_0_ar_test_eval_results.txt
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
WER: 0.
|
2 |
-
CER: 0.
|
|
|
1 |
+
WER: 0.18172268907563024
|
2 |
+
CER: 0.04875182561226061
|
optimizer.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a2720401989ca4dbdc57ad7199cdc4116d02c36c374567e9a1295b01cd6c45b3
|
3 |
-
size 625461417
|
|
|
|
|
|
|
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1262112241
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:588e6341d51008b353be1115b1e1e34d86bad4f676b32277cba57e5f7cff526a
|
3 |
size 1262112241
|
rng_state_0.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:39223200ea0fd8350b41518c6492d3d14d96d823ca3f6e9a374864389e6dbb1b
|
3 |
-
size 14503
|
|
|
|
|
|
|
|
rng_state_1.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:5091f67e4a1caff5227497b7c37712739cd8baf0783971573e46569913fdfff3
|
3 |
-
size 14567
|
|
|
|
|
|
|
|
rng_state_2.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:16de60f262d42506479df2d553aa073b64cf621ae9eebfc35fbc2b23021cee2d
|
3 |
-
size 14503
|
|
|
|
|
|
|
|
rng_state_3.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a641931222bcf2b19d6a073bd70c446b59abafb3a4077c2d6a5aea1f4001e06a
|
3 |
-
size 14503
|
|
|
|
|
|
|
|
rng_state_4.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:da3f79cf3a4b4d9379e47a4353e6555235b22515af826d448849df6d7a5f04f0
|
3 |
-
size 14567
|
|
|
|
|
|
|
|
rng_state_5.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:16352761baa415d3af2ff3cea0ef555419a76b51449f27bcec56bcdba9a15ff5
|
3 |
-
size 14567
|
|
|
|
|
|
|
|
rng_state_6.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8ea93d555065f78f93a91b315329c1c392289d935801de6b732e124d18b1586a
|
3 |
-
size 14567
|
|
|
|
|
|
|
|
rng_state_7.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:46629c3fa6cb14bd2274cb93b2b9a613c84794d3912a7fb43cf8af9b51524544
|
3 |
-
size 14503
|
|
|
|
|
|
|
|
scaler.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:431319fd21daa87636d8253400f763a0a1a5400306ce1db9e67b38942c76551d
|
3 |
-
size 559
|
|
|
|
|
|
|
|
scheduler.pt
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6f90f0da980bf8cd346163915335c996f7bbdbcfe93a2e624a8b02f902aa6d01
|
3 |
-
size 623
|
|
|
|
|
|
|
|
train_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 10.0,
|
3 |
+
"train_loss": 0.1316310991176183,
|
4 |
+
"train_runtime": 23113.6031,
|
5 |
+
"train_samples": 399991,
|
6 |
+
"train_samples_per_second": 173.054,
|
7 |
+
"train_steps_per_second": 0.676
|
8 |
+
}
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "/workspace/cv-corpus-8.0-2022-01-19/output/checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -279,11 +279,68 @@
|
|
279 |
"eval_steps_per_second": 1.836,
|
280 |
"eval_wer": 0.18776850201669637,
|
281 |
"step": 13000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
282 |
}
|
283 |
],
|
284 |
"max_steps": 15630,
|
285 |
"num_train_epochs": 10,
|
286 |
-
"total_flos":
|
287 |
"trial_name": null,
|
288 |
"trial_params": null
|
289 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.4493387111903199,
|
3 |
+
"best_model_checkpoint": "/workspace/cv-corpus-8.0-2022-01-19/output/checkpoint-1000",
|
4 |
+
"epoch": 10.0,
|
5 |
+
"global_step": 15630,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
279 |
"eval_steps_per_second": 1.836,
|
280 |
"eval_wer": 0.18776850201669637,
|
281 |
"step": 13000
|
282 |
+
},
|
283 |
+
{
|
284 |
+
"epoch": 8.64,
|
285 |
+
"learning_rate": 2.9241285030758714e-05,
|
286 |
+
"loss": 0.2026,
|
287 |
+
"step": 13500
|
288 |
+
},
|
289 |
+
{
|
290 |
+
"epoch": 8.96,
|
291 |
+
"learning_rate": 2.2406015037593985e-05,
|
292 |
+
"loss": 0.1976,
|
293 |
+
"step": 14000
|
294 |
+
},
|
295 |
+
{
|
296 |
+
"epoch": 8.96,
|
297 |
+
"eval_loss": 0.21043309569358826,
|
298 |
+
"eval_runtime": 71.1895,
|
299 |
+
"eval_samples_per_second": 145.92,
|
300 |
+
"eval_steps_per_second": 1.826,
|
301 |
+
"eval_wer": 0.18249695150548728,
|
302 |
+
"step": 14000
|
303 |
+
},
|
304 |
+
{
|
305 |
+
"epoch": 9.28,
|
306 |
+
"learning_rate": 1.5570745044429256e-05,
|
307 |
+
"loss": 0.1875,
|
308 |
+
"step": 14500
|
309 |
+
},
|
310 |
+
{
|
311 |
+
"epoch": 9.6,
|
312 |
+
"learning_rate": 8.735475051264526e-06,
|
313 |
+
"loss": 0.1845,
|
314 |
+
"step": 15000
|
315 |
+
},
|
316 |
+
{
|
317 |
+
"epoch": 9.6,
|
318 |
+
"eval_loss": 0.21563756465911865,
|
319 |
+
"eval_runtime": 71.0722,
|
320 |
+
"eval_samples_per_second": 146.161,
|
321 |
+
"eval_steps_per_second": 1.829,
|
322 |
+
"eval_wer": 0.18212175218084609,
|
323 |
+
"step": 15000
|
324 |
+
},
|
325 |
+
{
|
326 |
+
"epoch": 9.92,
|
327 |
+
"learning_rate": 1.9138755980861244e-06,
|
328 |
+
"loss": 0.1837,
|
329 |
+
"step": 15500
|
330 |
+
},
|
331 |
+
{
|
332 |
+
"epoch": 10.0,
|
333 |
+
"step": 15630,
|
334 |
+
"total_flos": 9.942412569719006e+20,
|
335 |
+
"train_loss": 0.1316310991176183,
|
336 |
+
"train_runtime": 23113.6031,
|
337 |
+
"train_samples_per_second": 173.054,
|
338 |
+
"train_steps_per_second": 0.676
|
339 |
}
|
340 |
],
|
341 |
"max_steps": 15630,
|
342 |
"num_train_epochs": 10,
|
343 |
+
"total_flos": 9.942412569719006e+20,
|
344 |
"trial_name": null,
|
345 |
"trial_params": null
|
346 |
}
|