speechbrain
English
Vocoder
HiFIGAN
speech-synthesis
chaanks commited on
Commit
10f4826
·
verified ·
1 Parent(s): 05f1ea8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +24 -2
README.md CHANGED
@@ -33,14 +33,36 @@ pip install speechbrain transformers
33
  Please notice that we encourage you to read our tutorials and learn more about
34
  [SpeechBrain](https://speechbrain.github.io).
35
 
 
36
 
37
- ### Using the Vocoder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  ```python
40
  import torch
41
  from speechbrain.inference.vocoders import UnitHIFIGAN
42
 
43
- hifi_gan_unit = UnitHIFIGAN.from_hparams(source="speechbrain/hifigan-wavlm-l1-3-7-12-18-23-k1000-LibriTTS", savedir="pretrained_models/vocoder")
44
  codes = torch.randint(0, 99, (100, 1))
45
  waveform = hifi_gan_unit.decode_unit(codes)
46
 
 
33
  Please notice that we encourage you to read our tutorials and learn more about
34
  [SpeechBrain](https://speechbrain.github.io).
35
 
36
+ ### Using the Vocoder with DiscreteSSL
37
 
38
+ ```python
39
+ import torch
40
+ from speechbrain.lobes.models.huggingface_transformers.wavlm import (WavLM)
41
+
42
+ inputs = torch.rand([3, 2000])
43
+ model_hub = "microsoft/wavlm-large"
44
+ save_path = "savedir"
45
+ ssl_layer_num = [7,23]
46
+ deduplicate =[False, True]
47
+ bpe_tokenizers=[None, None]
48
+ vocoder_repo_id = "speechbrain/hifigan-wavlm-k1000-LibriTTS"
49
+ kmeans_dataset = "LibriSpeech"
50
+ num_clusters = 1000
51
+ ssl_model = WavLM(model_hub, save_path,output_all_hiddens=True)
52
+ model = DiscreteSSL(save_path, ssl_model, vocoder_repo_id=vocoder_repo_id, kmeans_dataset=kmeans_dataset,num_clusters=num_clusters)
53
+ tokens, _, _ = model.encode(inputs,SSL_layers=ssl_layer_num, deduplicates=deduplicate, bpe_tokenizers=bpe_tokenizers)
54
+ sig = model.decode(tokens, ssl_layer_num)
55
+ ```
56
+
57
+
58
+
59
+ ### Standalone Vocoder Usage
60
 
61
  ```python
62
  import torch
63
  from speechbrain.inference.vocoders import UnitHIFIGAN
64
 
65
+ hifi_gan_unit = UnitHIFIGAN.from_hparams(source="speechbrain/hifigan-wavlm-k1000-LibriTTS", savedir="pretrained_models/vocoder")
66
  codes = torch.randint(0, 99, (100, 1))
67
  waveform = hifi_gan_unit.decode_unit(codes)
68