hexgrad commited on
Commit
6766b80
·
verified ·
1 Parent(s): 8ab445f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -115,14 +115,7 @@ SAMPLE_RATE = 24000
115
 
116
  @spaces.GPU(duration=10)
117
  @torch.no_grad()
118
- def forward(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=5000, closing_cut=0, ease_in=3000, ease_out=0):
119
- ps = ps or phonemize(text, voice)
120
- tokens = [i for i in map(VOCAB.get, ps) if i is not None]
121
- if not tokens:
122
- return (None, '')
123
- elif len(tokens) > 510:
124
- tokens = tokens[:510]
125
- ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
126
  tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
127
  input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
128
  text_mask = length_to_mask(input_lengths).to(device)
@@ -145,7 +138,17 @@ def forward(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=5000,
145
  t_en = model.text_encoder(tokens, input_lengths, text_mask)
146
  asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))
147
  out = model.decoder(asr, F0_pred, N_pred, ref_s[:, :128])
148
- out = out.squeeze().cpu().numpy()
 
 
 
 
 
 
 
 
 
 
149
  if reduce_noise > 0:
150
  out = nr.reduce_noise(y=out, sr=SAMPLE_RATE, prop_decrease=reduce_noise, n_fft=512)
151
  opening_cut = max(0, int(opening_cut / speed))
 
115
 
116
  @spaces.GPU(duration=10)
117
  @torch.no_grad()
118
+ def forward(tokens, speed):
 
 
 
 
 
 
 
119
  tokens = torch.LongTensor([[0, *tokens, 0]]).to(device)
120
  input_lengths = torch.LongTensor([tokens.shape[-1]]).to(device)
121
  text_mask = length_to_mask(input_lengths).to(device)
 
138
  t_en = model.text_encoder(tokens, input_lengths, text_mask)
139
  asr = (t_en @ pred_aln_trg.unsqueeze(0).to(device))
140
  out = model.decoder(asr, F0_pred, N_pred, ref_s[:, :128])
141
+ return out.squeeze().cpu().numpy()
142
+
143
+ def generate(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=5000, closing_cut=0, ease_in=3000, ease_out=0):
144
+ ps = ps or phonemize(text, voice)
145
+ tokens = [i for i in map(VOCAB.get, ps) if i is not None]
146
+ if not tokens:
147
+ return (None, '')
148
+ elif len(tokens) > 510:
149
+ tokens = tokens[:510]
150
+ ps = ''.join(next(k for k, v in VOCAB.items() if i == v) for i in tokens)
151
+ out = forward(tokens, speed)
152
  if reduce_noise > 0:
153
  out = nr.reduce_noise(y=out, sr=SAMPLE_RATE, prop_decrease=reduce_noise, n_fft=512)
154
  opening_cut = max(0, int(opening_cut / speed))