Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -111,7 +111,7 @@ def tokenize(ps):
|
|
111 |
return [i for i in map(VOCAB.get, ps) if i is not None]
|
112 |
|
113 |
CHOICES = {
|
114 |
-
'🇺🇸 🚺 American Female 0': '
|
115 |
'🇺🇸 🚺 Bella': 'af_bella',
|
116 |
'🇺🇸 🚺 Nicole': 'af_nicole',
|
117 |
'🇺🇸 🚹 Michael': 'am_michael',
|
@@ -126,9 +126,10 @@ CHOICES = {
|
|
126 |
'🇯🇵 🚺 Japanese Female 0': 'jf_0',
|
127 |
}
|
128 |
VOICES = {k: torch.load(os.path.join(snapshot, 'voices', f'{k}.pt'), weights_only=True).to(device) for k in CHOICES.values()}
|
129 |
-
def
|
130 |
-
#
|
131 |
-
|
|
|
132 |
|
133 |
np_log_99 = np.log(99)
|
134 |
def s_curve(p):
|
@@ -168,9 +169,9 @@ def forward(tokens, voice, speed):
|
|
168 |
asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
|
169 |
return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
|
170 |
|
171 |
-
def generate(text, voice
|
172 |
if voice not in VOICES:
|
173 |
-
voice =
|
174 |
ps = ps or phonemize(text, voice)
|
175 |
tokens = tokenize(ps)
|
176 |
if not tokens:
|
|
|
111 |
return [i for i in map(VOCAB.get, ps) if i is not None]
|
112 |
|
113 |
CHOICES = {
|
114 |
+
'🇺🇸 🚺 American Female 0': 'af_zero',
|
115 |
'🇺🇸 🚺 Bella': 'af_bella',
|
116 |
'🇺🇸 🚺 Nicole': 'af_nicole',
|
117 |
'🇺🇸 🚹 Michael': 'am_michael',
|
|
|
126 |
'🇯🇵 🚺 Japanese Female 0': 'jf_0',
|
127 |
}
|
128 |
VOICES = {k: torch.load(os.path.join(snapshot, 'voices', f'{k}.pt'), weights_only=True).to(device) for k in CHOICES.values()}
|
129 |
+
def get_random_af_voice():
|
130 |
+
# Must be an American female voice to maintain compatability with
|
131 |
+
# https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena
|
132 |
+
return random.choice(['af_zero', 'af_bella'])
|
133 |
|
134 |
np_log_99 = np.log(99)
|
135 |
def s_curve(p):
|
|
|
169 |
asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
|
170 |
return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
|
171 |
|
172 |
+
def generate(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=4000, closing_cut=2000, ease_in=3000, ease_out=1000, pad_before=5000, pad_after=5000):
|
173 |
if voice not in VOICES:
|
174 |
+
voice = get_random_af_voice()
|
175 |
ps = ps or phonemize(text, voice)
|
176 |
tokens = tokenize(ps)
|
177 |
if not tokens:
|