hexgrad commited on
Commit
42f9149
·
verified ·
1 Parent(s): 7562a6f

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -111,7 +111,7 @@ def tokenize(ps):
111
  return [i for i in map(VOCAB.get, ps) if i is not None]
112
 
113
  CHOICES = {
114
- '🇺🇸 🚺 American Female 0': 'af_0',
115
  '🇺🇸 🚺 Bella': 'af_bella',
116
  '🇺🇸 🚺 Nicole': 'af_nicole',
117
  '🇺🇸 🚹 Michael': 'am_michael',
@@ -126,9 +126,10 @@ CHOICES = {
126
  '🇯🇵 🚺 Japanese Female 0': 'jf_0',
127
  }
128
  VOICES = {k: torch.load(os.path.join(snapshot, 'voices', f'{k}.pt'), weights_only=True).to(device) for k in CHOICES.values()}
129
- def get_random_en_voice():
130
- # Top 2 most stable voices in each category of (US/GB) and (F/M)
131
- return random.choice(['af_0', 'af_bella', 'am_michael', 'am_adam', 'bf_0', 'bf_2', 'bm_0', 'bm_1'])
 
132
 
133
  np_log_99 = np.log(99)
134
  def s_curve(p):
@@ -168,9 +169,9 @@ def forward(tokens, voice, speed):
168
  asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
169
  return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
170
 
171
- def generate(text, voice=None, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=4000, closing_cut=2000, ease_in=3000, ease_out=1000, pad_before=5000, pad_after=5000):
172
  if voice not in VOICES:
173
- voice = get_random_en_voice()
174
  ps = ps or phonemize(text, voice)
175
  tokens = tokenize(ps)
176
  if not tokens:
 
111
  return [i for i in map(VOCAB.get, ps) if i is not None]
112
 
113
  CHOICES = {
114
+ '🇺🇸 🚺 American Female 0': 'af_zero',
115
  '🇺🇸 🚺 Bella': 'af_bella',
116
  '🇺🇸 🚺 Nicole': 'af_nicole',
117
  '🇺🇸 🚹 Michael': 'am_michael',
 
126
  '🇯🇵 🚺 Japanese Female 0': 'jf_0',
127
  }
128
  VOICES = {k: torch.load(os.path.join(snapshot, 'voices', f'{k}.pt'), weights_only=True).to(device) for k in CHOICES.values()}
129
+ def get_random_af_voice():
130
+ # Must be an American female voice to maintain compatability with
131
+ # https://huggingface.co/spaces/Pendrokar/TTS-Spaces-Arena
132
+ return random.choice(['af_zero', 'af_bella'])
133
 
134
  np_log_99 = np.log(99)
135
  def s_curve(p):
 
169
  asr = t_en @ pred_aln_trg.unsqueeze(0).to(device)
170
  return model.decoder(asr, F0_pred, N_pred, ref_s[:, :128]).squeeze().cpu().numpy()
171
 
172
+ def generate(text, voice, ps=None, speed=1.0, reduce_noise=0.5, opening_cut=4000, closing_cut=2000, ease_in=3000, ease_out=1000, pad_before=5000, pad_after=5000):
173
  if voice not in VOICES:
174
+ voice = get_random_af_voice()
175
  ps = ps or phonemize(text, voice)
176
  tokens = tokenize(ps)
177
  if not tokens: