Titouan
commited on
Commit
•
3fec198
1
Parent(s):
586c789
update model
Browse files- README.md +4 -4
- asr.ckpt +1 -1
- hyperparams.yaml +2 -3
- normalizer.ckpt +1 -1
- pretrained_models/asr-transformer-transformerlm-librispeech/asr.ckpt +1 -0
- pretrained_models/asr-transformer-transformerlm-librispeech/custom.py +1 -0
- pretrained_models/asr-transformer-transformerlm-librispeech/hyperparams.yaml +1 -0
- pretrained_models/asr-transformer-transformerlm-librispeech/lm.ckpt +1 -0
- pretrained_models/asr-transformer-transformerlm-librispeech/normalizer.ckpt +1 -0
- pretrained_models/asr-transformer-transformerlm-librispeech/tokenizer.ckpt +1 -0
README.md
CHANGED
@@ -32,7 +32,7 @@ model-index:
|
|
32 |
metrics:
|
33 |
- name: Test WER
|
34 |
type: wer
|
35 |
-
value: 2.
|
36 |
- task:
|
37 |
name: Automatic Speech Recognition
|
38 |
type: automatic-speech-recognition
|
@@ -46,7 +46,7 @@ model-index:
|
|
46 |
metrics:
|
47 |
- name: Test WER
|
48 |
type: wer
|
49 |
-
value: 5.
|
50 |
---
|
51 |
|
52 |
<iframe src="https://ghbtns.com/github-btn.html?user=speechbrain&repo=speechbrain&type=star&count=true&size=large&v=2" frameborder="0" scrolling="0" width="170" height="30" title="GitHub"></iframe>
|
@@ -62,7 +62,7 @@ The performance of the model is the following:
|
|
62 |
|
63 |
| Release | Test clean WER | Test other WER | GPUs |
|
64 |
|:-------------:|:--------------:|:--------------:|:--------:|
|
65 |
-
| 24-03-22 | 2.
|
66 |
|
67 |
## Pipeline description
|
68 |
|
@@ -122,7 +122,7 @@ cd recipes/LibriSpeech/ASR/transformer
|
|
122 |
python train.py hparams/transformer.yaml --data_folder=your_data_folder
|
123 |
```
|
124 |
|
125 |
-
You can find our training results (models, logs, etc) [here](https://drive.google.com/drive/folders/
|
126 |
|
127 |
### Limitations
|
128 |
The SpeechBrain team does not provide any warranty on the performance achieved by this model when used on other datasets.
|
|
|
32 |
metrics:
|
33 |
- name: Test WER
|
34 |
type: wer
|
35 |
+
value: 2.27
|
36 |
- task:
|
37 |
name: Automatic Speech Recognition
|
38 |
type: automatic-speech-recognition
|
|
|
46 |
metrics:
|
47 |
- name: Test WER
|
48 |
type: wer
|
49 |
+
value: 5.53
|
50 |
---
|
51 |
|
52 |
<iframe src="https://ghbtns.com/github-btn.html?user=speechbrain&repo=speechbrain&type=star&count=true&size=large&v=2" frameborder="0" scrolling="0" width="170" height="30" title="GitHub"></iframe>
|
|
|
62 |
|
63 |
| Release | Test clean WER | Test other WER | GPUs |
|
64 |
|:-------------:|:--------------:|:--------------:|:--------:|
|
65 |
+
| 24-03-22 | 2.27 | 5.53 | 4xV100 32GB |
|
66 |
|
67 |
## Pipeline description
|
68 |
|
|
|
122 |
python train.py hparams/transformer.yaml --data_folder=your_data_folder
|
123 |
```
|
124 |
|
125 |
+
You can find our training results (models, logs, etc) [here](https://drive.google.com/drive/folders/1Nv1OLbHLqVeShyZ8LY9gjhYGE1DBFzFf?usp=sharing).
|
126 |
|
127 |
### Limitations
|
128 |
The SpeechBrain team does not provide any warranty on the performance achieved by this model when used on other datasets.
|
asr.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 291335121
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a04a1b364fe37769ed47904e0237c412e874321efeda55986f08a06822fe6ea
|
3 |
size 291335121
|
hyperparams.yaml
CHANGED
@@ -20,18 +20,17 @@ nhead: 4
|
|
20 |
num_encoder_layers: 12
|
21 |
num_decoder_layers: 6
|
22 |
d_ffn: 2048
|
23 |
-
transformer_dropout: 0.
|
24 |
activation: !name:torch.nn.GELU
|
25 |
output_neurons: 5000
|
26 |
vocab_size: 5000
|
27 |
|
28 |
# Outputs
|
29 |
blank_index: 0
|
30 |
-
label_smoothing: 0.
|
31 |
pad_index: 0
|
32 |
bos_index: 1
|
33 |
eos_index: 2
|
34 |
-
unk_index: 0
|
35 |
|
36 |
# Decoding parameters
|
37 |
min_decode_ratio: 0.0
|
|
|
20 |
num_encoder_layers: 12
|
21 |
num_decoder_layers: 6
|
22 |
d_ffn: 2048
|
23 |
+
transformer_dropout: 0.1
|
24 |
activation: !name:torch.nn.GELU
|
25 |
output_neurons: 5000
|
26 |
vocab_size: 5000
|
27 |
|
28 |
# Outputs
|
29 |
blank_index: 0
|
30 |
+
label_smoothing: 0.0
|
31 |
pad_index: 0
|
32 |
bos_index: 1
|
33 |
eos_index: 2
|
|
|
34 |
|
35 |
# Decoding parameters
|
36 |
min_decode_ratio: 0.0
|
normalizer.ckpt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1703
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:956e9c1751beac2f75bfff8253f9811b76a1d57bb52f28587d3b6d45bbce6315
|
3 |
size 1703
|
pretrained_models/asr-transformer-transformerlm-librispeech/asr.ckpt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/Users/titlet/CloudStation/work/speechbrain/workspace/huggingface_hub/asr-transformer-transformerlm-librispeech/asr.ckpt
|
pretrained_models/asr-transformer-transformerlm-librispeech/custom.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/Users/titlet/CloudStation/work/speechbrain/workspace/huggingface_hub/asr-transformer-transformerlm-librispeech/custom.py
|
pretrained_models/asr-transformer-transformerlm-librispeech/hyperparams.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/Users/titlet/CloudStation/work/speechbrain/workspace/huggingface_hub/asr-transformer-transformerlm-librispeech/hyperparams.yaml
|
pretrained_models/asr-transformer-transformerlm-librispeech/lm.ckpt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/Users/titlet/CloudStation/work/speechbrain/workspace/huggingface_hub/asr-transformer-transformerlm-librispeech/lm.ckpt
|
pretrained_models/asr-transformer-transformerlm-librispeech/normalizer.ckpt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/Users/titlet/CloudStation/work/speechbrain/workspace/huggingface_hub/asr-transformer-transformerlm-librispeech/normalizer.ckpt
|
pretrained_models/asr-transformer-transformerlm-librispeech/tokenizer.ckpt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
/Users/titlet/CloudStation/work/speechbrain/workspace/huggingface_hub/asr-transformer-transformerlm-librispeech/tokenizer.ckpt
|