Spaces:
Running
on
Zero
Running
on
Zero
Upload app.py
Browse files
app.py
CHANGED
@@ -115,14 +115,7 @@ SAMPLE_RATE = 24000
|
|
115 |
|
116 |
@spaces.GPU(duration=10)
|
117 |
@torch.no_grad()
|
118 |
-
def forward(
|
119 |
-
ps = ps or phonemize(text, voice)
|
120 |
-
tokens = [i for i in map(VOCAB.get, ps) if i is not None]
|
121 |
-
if not tokens:
|
122 |
-
return (None, '')
|
123 |
-
elif len(tokens) > 510:
|
124 |
-
tokens = tokens[:510]
|
125 |
-
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
126 |
tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
|
127 |
input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
|
128 |
text_mask = length_to_mask(input_lengths).to(device)
|
@@ -145,7 +138,17 @@ def forward(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=5000,
|
|
145 |
t_en = model.text_encoder(tokens, input_lengths, text_mask)
|
146 |
asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))
|
147 |
out = model.decoder(asr, F0_pred, N_pred, ref_s[:, :128])
|
148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
if reduce_noise > 0:
|
150 |
out = nr.reduce_noise(y=out, sr=SAMPLE_RATE, prop_decrease=reduce_noise, n_fft=512)
|
151 |
opening_cut = max(0, int(opening_cut / speed))
|
|
|
115 |
|
116 |
@spaces.GPU(duration=10)
|
117 |
@torch.no_grad()
|
118 |
+
def forward(tokens, speed):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
|
120 |
input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
|
121 |
text_mask = length_to_mask(input_lengths).to(device)
|
|
|
138 |
t_en = model.text_encoder(tokens, input_lengths, text_mask)
|
139 |
asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))
|
140 |
out = model.decoder(asr, F0_pred, N_pred, ref_s[:, :128])
|
141 |
+
return out.squeeze().cpu().numpy()
|
142 |
+
|
143 |
+
def generate(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=5000, closing_cut=0, ease_in=3000, ease_out=0):
|
144 |
+
ps = ps or phonemize(text, voice)
|
145 |
+
tokens = [i for i in map(VOCAB.get, ps) if i is not None]
|
146 |
+
if not tokens:
|
147 |
+
return (None, '')
|
148 |
+
elif len(tokens) > 510:
|
149 |
+
tokens = tokens[:510]
|
150 |
+
ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
|
151 |
+
out = forward(tokens, speed)
|
152 |
if reduce_noise > 0:
|
153 |
out = nr.reduce_noise(y=out, sr=SAMPLE_RATE, prop_decrease=reduce_noise, n_fft=512)
|
154 |
opening_cut = max(0, int(opening_cut / speed))
|