huseinzol05 commited on
Commit
ef67169
·
verified ·
1 Parent(s): 60cce0e

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +69 -0
README.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - ms
4
+ datasets:
5
+ - mesolitica/TTS
6
+ ---
7
+
8
+ # Malay VITS Shafiqah Idayu
9
+
10
+ **This model intended to use by [malaya-speech](https://github.com/mesolitica/malaya-speech) only, it is possible to not use the library but make sure the character vocabulary is correct**.
11
+
12
+ ## requirements
13
+
14
+ You need to install specific malaya-speech version to get better generation,
15
+
16
+ ```bash
17
+ pip3 install git+https://github.com/mesolitica/malaya-speech@1d5a33dd119f32e793d539ce782f1fe37818af75 malaya
18
+ ```
19
+
20
+ ## how to
21
+
22
+ ```python
23
+ from huggingface_hub import snapshot_download
24
+ from malaya_speech.torch_model.vits.model_infer import SynthesizerTrn
25
+ from malaya_speech.torch_model.vits.commons import intersperse
26
+ from malaya_speech.utils.text import TTS_SYMBOLS
27
+ from malaya_speech.tts import load_text_ids
28
+ import torch
29
+ import os
30
+ import json
31
+
32
+ try:
33
+ from malaya_boilerplate.hparams import HParams
34
+ except BaseException:
35
+ from malaya_boilerplate.train.config import HParams
36
+
37
+ folder = snapshot_download(repo_id="mesolitica/VITS-shafiqah-idayu")
38
+
39
+ with open(os.path.join(folder, 'config.json')) as fopen:
40
+ hps = HParams(**json.load(fopen))
41
+
42
+ model = SynthesizerTrn(
43
+ len(TTS_SYMBOLS),
44
+ hps.data.filter_length // 2 + 1,
45
+ hps.train.segment_size // hps.data.hop_length,
46
+ n_speakers=hps.data.n_speakers,
47
+ **hps.model,
48
+ ).eval()
49
+ model.load_state_dict(torch.load(os.path.join(folder, 'model.pth'), map_location='cpu'))
50
+
51
+ normalizer = load_text_ids(pad_to = None, understand_punct = True, is_lower = False)
52
+
53
+ t, ids = normalizer.normalize('saya nak makan nasi ayam yang sedap, lagi lazat, dan hidup sangatlah susah kan.', add_fullstop = False)
54
+ if hps.data.add_blank:
55
+ ids = intersperse(ids, 0)
56
+ ids = torch.LongTensor(ids)
57
+ ids_lengths = torch.LongTensor([ids.size(0)])
58
+ ids = ids.unsqueeze(0)
59
+
60
+ with torch.no_grad():
61
+ audio = model.infer(
62
+ ids,
63
+ ids_lengths,
64
+ noise_scale=0.0,
65
+ noise_scale_w=0.0,
66
+ length_scale=1.0,
67
+ )
68
+ y_ = audio[0].numpy()
69
+ ```