Update README.md
Browse files
README.md
CHANGED
@@ -33,14 +33,36 @@ pip install speechbrain transformers
|
|
33 |
Please notice that we encourage you to read our tutorials and learn more about
|
34 |
[SpeechBrain](https://speechbrain.github.io).
|
35 |
|
|
|
36 |
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
```python
|
40 |
import torch
|
41 |
from speechbrain.inference.vocoders import UnitHIFIGAN
|
42 |
|
43 |
-
hifi_gan_unit = UnitHIFIGAN.from_hparams(source="speechbrain/hifigan-wavlm-
|
44 |
codes = torch.randint(0, 99, (100, 1))
|
45 |
waveform = hifi_gan_unit.decode_unit(codes)
|
46 |
|
|
|
33 |
Please notice that we encourage you to read our tutorials and learn more about
|
34 |
[SpeechBrain](https://speechbrain.github.io).
|
35 |
|
36 |
+
### Using the Vocoder with DiscreteSSL
|
37 |
|
38 |
+
```python
|
39 |
+
import torch
|
40 |
+
from speechbrain.lobes.models.huggingface_transformers.wavlm import (WavLM)
|
41 |
+
|
42 |
+
inputs = torch.rand([3, 2000])
|
43 |
+
model_hub = "microsoft/wavlm-large"
|
44 |
+
save_path = "savedir"
|
45 |
+
ssl_layer_num = [7,23]
|
46 |
+
deduplicate =[False, True]
|
47 |
+
bpe_tokenizers=[None, None]
|
48 |
+
vocoder_repo_id = "speechbrain/hifigan-wavlm-k1000-LibriTTS"
|
49 |
+
kmeans_dataset = "LibriSpeech"
|
50 |
+
num_clusters = 1000
|
51 |
+
ssl_model = WavLM(model_hub, save_path,output_all_hiddens=True)
|
52 |
+
model = DiscreteSSL(save_path, ssl_model, vocoder_repo_id=vocoder_repo_id, kmeans_dataset=kmeans_dataset,num_clusters=num_clusters)
|
53 |
+
tokens, _, _ = model.encode(inputs,SSL_layers=ssl_layer_num, deduplicates=deduplicate, bpe_tokenizers=bpe_tokenizers)
|
54 |
+
sig = model.decode(tokens, ssl_layer_num)
|
55 |
+
```
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
### Standalone Vocoder Usage
|
60 |
|
61 |
```python
|
62 |
import torch
|
63 |
from speechbrain.inference.vocoders import UnitHIFIGAN
|
64 |
|
65 |
+
hifi_gan_unit = UnitHIFIGAN.from_hparams(source="speechbrain/hifigan-wavlm-k1000-LibriTTS", savedir="pretrained_models/vocoder")
|
66 |
codes = torch.randint(0, 99, (100, 1))
|
67 |
waveform = hifi_gan_unit.decode_unit(codes)
|
68 |
|