Pendrokar commited on
Commit
bba8af9
·
1 Parent(s): 2250a5a

spaces cleanup

Browse files
Files changed (1) hide show
  1. app.py +52 -43
app.py CHANGED
@@ -45,18 +45,19 @@ AVAILABLE_MODELS = {
45
  # 'VoiceCraft 2.0': 'voicecraft',
46
  # 'Parler TTS': 'parler'
47
 
 
48
  'coqui/xtts': 'coqui/xtts',
49
- 'collabora/WhisperSpeech': 'collabora/WhisperSpeech',
50
- # 'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice',
51
- 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2',
52
- 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1',
53
- 'Pendrokar/xVASynth': 'Pendrokar/xVASynth',
54
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
55
- 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS',
56
 
57
  # Parler
58
- 'parler-tts/parler_tts': 'parler-tts/parler_tts',
59
- 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso',
60
 
61
  # TTS w issues
62
  # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
@@ -66,7 +67,7 @@ AVAILABLE_MODELS = {
66
  # 'styletts2/styletts2': '0#0', # API disabled
67
  # 'Manmay/tortoise-tts': '/predict#0', # Cannot skip text-from-file parameter
68
  # 'pytorch/Tacotron2': '0#0', # old gradio
69
- # 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # old gradio - ValueError: Unsupported protocol: sse_v3
70
  # 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # old gradio - ValueError: Unsupported protocol: sse_v3
71
  }
72
 
@@ -131,7 +132,7 @@ HF_SPACES = {
131
  'mrfakename/MeloTTS': {
132
  'name': 'mrfakename/MeloTTS',
133
  'function': '/synthesize',
134
- 'text_param_index': 1,
135
  'return_audio_index': 0,
136
  },
137
 
@@ -199,14 +200,14 @@ OVERRIDE_INPUTS = {
199
  3: 0.7, #Tempo - Gradio Slider issue: takes min. rather than value
200
  },
201
  'Pendrokar/xVASynth': {
202
- 1: 'ccby_nvidia_hifi_92_F', #fine-tuned voice model name
203
  3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
204
  },
205
  'suno/bark': {
206
- 1: 'Speaker 3 (en)',
207
  },
208
  'amphion/Text-to-Speech': {
209
- 1: 'LikeManyWaters',
210
  },
211
  'LeeSangHoon/HierSpeech_TTS': {
212
  1: DEFAULT_VOICE_SAMPLE, # voice sample
@@ -218,24 +219,27 @@ OVERRIDE_INPUTS = {
218
  7: 1111,
219
  },
220
  'Manmay/tortoise-tts': {
221
- 1: None, # text-from-file; FIXME: cannot skip and doesn't work without
222
- 2: 'angie',
223
  3: None,
224
  4: 'No',
225
  },
226
  'mrfakename/MeloTTS': {
227
- 0: 'EN-US', # speaker
228
- 2: 1,
 
229
  3: 'EN', # language
230
  },
231
  'parler-tts/parler_tts': {
232
- 1: 'Laura\'s voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.', # description/prompt
233
  },
234
  'parler-tts/parler-tts-expresso': {
235
- 1: 'Laura\'s voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.', # description/prompt
236
  },
237
  }
238
 
 
 
239
  SPACE_ID = os.getenv('SPACE_ID')
240
  MAX_SAMPLE_TXT_LENGTH = 300
241
  MIN_SAMPLE_TXT_LENGTH = 10
@@ -334,6 +338,7 @@ scheduler = CommitScheduler(
334
  # Router API
335
  ####################################
336
  # router = Client("TTS-AGI/tts-router", hf_token=hf_token)
 
337
  ####################################
338
  # Gradio app
339
  ####################################
@@ -792,15 +797,15 @@ def synthandreturn(text):
792
  pass
793
  # Get two random models
794
  # forced model: your TTS model versus The World!!!
795
- mdl1 = 'Pendrokar/xVASynth'
796
  vsModels = dict(AVAILABLE_MODELS)
797
- del vsModels[mdl1]
798
  # randomize position of the forced model
799
  mdl2 = random.sample(list(vsModels.keys()), 1)
800
  # forced random
801
- mdl1, mdl2 = random.sample(list([mdl1, mdl2[0]]), 2)
802
  # actual random
803
- # mdl1, mdl2 = random.sample(list(AVAILABLE_MODELS.keys()), 2)
804
  log_text(text)
805
  print("[debug] Using", mdl1, mdl2)
806
  def predict_and_update_result(text, model, result_storage):
@@ -812,7 +817,11 @@ def synthandreturn(text):
812
  if model in AVAILABLE_MODELS:
813
  if '/' in model:
814
  # Use public HF Space
815
- mdl_space = Client(model, hf_token=hf_token)
 
 
 
 
816
  # assume the index is one of the first 9 return params
817
  return_audio_index = int(HF_SPACES[model]['return_audio_index'])
818
  endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
@@ -841,9 +850,10 @@ def synthandreturn(text):
841
 
842
  # force text
843
  space_inputs[HF_SPACES[model]['text_param_index']] = text
844
-
 
845
  results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
846
-
847
  # return path to audio
848
  result = results[return_audio_index] if (not isinstance(results, str)) else results
849
  else:
@@ -852,39 +862,37 @@ def synthandreturn(text):
852
  else:
853
  result = router.predict(text, model.lower(), api_name="/synthesize")
854
  break
855
- except:
 
856
  attempt_count += 1
857
- raise gr.Error('Unable to call API, please try again')
858
- print('Done with', model)
859
- # try:
860
- # doresample(result)
861
- # except:
862
- # pass
 
 
 
863
  try:
864
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
865
  audio = AudioSegment.from_file(result)
866
  current_sr = audio.frame_rate
867
  if current_sr > 24000:
868
- print('Resampling', model)
869
  audio = audio.set_frame_rate(24000)
870
  try:
871
- print('Trying to normalize audio', model)
872
  audio = match_target_amplitude(audio, -20)
873
  except:
874
- print('[WARN] Unable to normalize audio')
875
  audio.export(f.name, format="wav")
876
  os.unlink(result)
877
  result = f.name
878
  except:
879
  pass
880
  if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
881
- print(model)
882
- print(f"Running model {model}")
883
  result_storage[model] = result
884
- # try:
885
- # doloudnorm(result)
886
- # except:
887
- # pass
888
 
889
  def _get_param_examples(parameters):
890
  example_inputs = []
@@ -913,7 +921,7 @@ def synthandreturn(text):
913
  try:
914
  for key,value in OVERRIDE_INPUTS[modelname].items():
915
  inputs[key] = value
916
- print(f"Default inputs overridden for {modelname}")
917
  except:
918
  pass
919
 
@@ -1104,3 +1112,4 @@ with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}
1104
 
1105
 
1106
  demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)
 
 
45
  # 'VoiceCraft 2.0': 'voicecraft',
46
  # 'Parler TTS': 'parler'
47
 
48
+ # HF Gradio Spaces:
49
  'coqui/xtts': 'coqui/xtts',
50
+ # 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # old gradio?
51
+ 'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # 4.29.0
52
+ 'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # 4.29.0
53
+ 'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29.0
54
+ 'Pendrokar/xVASynth': 'Pendrokar/xVASynth', # EN-GB 4.29.0 4.42.0
55
  # 'coqui/CoquiTTS': 'coqui/CoquiTTS',
56
+ 'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # 4.29.0
57
 
58
  # Parler
59
+ 'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29.0 4.42.0
60
+ 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29.0 4.42.0
61
 
62
  # TTS w issues
63
  # 'PolyAI/pheme': '/predict#0', # sleepy HF Space
 
67
  # 'styletts2/styletts2': '0#0', # API disabled
68
  # 'Manmay/tortoise-tts': '/predict#0', # Cannot skip text-from-file parameter
69
  # 'pytorch/Tacotron2': '0#0', # old gradio
70
+ # 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # Error with EN # 4.29.0
71
  # 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # old gradio - ValueError: Unsupported protocol: sse_v3
72
  }
73
 
 
132
  'mrfakename/MeloTTS': {
133
  'name': 'mrfakename/MeloTTS',
134
  'function': '/synthesize',
135
+ 'text_param_index': 0,
136
  'return_audio_index': 0,
137
  },
138
 
 
200
  3: 0.7, #Tempo - Gradio Slider issue: takes min. rather than value
201
  },
202
  'Pendrokar/xVASynth': {
203
+ 1: 'ccby_nvidia_hifi_92_F', #fine-tuned voice model name; #92 BRITISH
204
  3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
205
  },
206
  'suno/bark': {
207
+ 1: 'Speaker 3 (en)', # voice
208
  },
209
  'amphion/Text-to-Speech': {
210
+ 1: 'LikeManyWaters', # voice
211
  },
212
  'LeeSangHoon/HierSpeech_TTS': {
213
  1: DEFAULT_VOICE_SAMPLE, # voice sample
 
219
  7: 1111,
220
  },
221
  'Manmay/tortoise-tts': {
222
+ 1: None, # text-from-file; cannot skip and doesn't work without
223
+ 2: 'angie', # voice
224
  3: None,
225
  4: 'No',
226
  },
227
  'mrfakename/MeloTTS': {
228
+ 1: 'EN', # speaker
229
+ # 1: 'EN-US', # speaker
230
+ 2: 1, # speed
231
  3: 'EN', # language
232
  },
233
  'parler-tts/parler_tts': {
234
+ 1: 'Elisabeth\'s voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.', # description/prompt
235
  },
236
  'parler-tts/parler-tts-expresso': {
237
+ 1: 'Elisabeth\'s voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.', # description/prompt
238
  },
239
  }
240
 
241
+ hf_clients = {}
242
+
243
  SPACE_ID = os.getenv('SPACE_ID')
244
  MAX_SAMPLE_TXT_LENGTH = 300
245
  MIN_SAMPLE_TXT_LENGTH = 10
 
338
  # Router API
339
  ####################################
340
  # router = Client("TTS-AGI/tts-router", hf_token=hf_token)
341
+ router = {}
342
  ####################################
343
  # Gradio app
344
  ####################################
 
797
  pass
798
  # Get two random models
799
  # forced model: your TTS model versus The World!!!
800
+ # mdl1 = 'Pendrokar/xVASynth'
801
  vsModels = dict(AVAILABLE_MODELS)
802
+ # del vsModels[mdl1]
803
  # randomize position of the forced model
804
  mdl2 = random.sample(list(vsModels.keys()), 1)
805
  # forced random
806
+ # mdl1, mdl2 = random.sample(list([mdl1, mdl2[0]]), 2)
807
  # actual random
808
+ mdl1, mdl2 = random.sample(list(AVAILABLE_MODELS.keys()), 2)
809
  log_text(text)
810
  print("[debug] Using", mdl1, mdl2)
811
  def predict_and_update_result(text, model, result_storage):
 
817
  if model in AVAILABLE_MODELS:
818
  if '/' in model:
819
  # Use public HF Space
820
+ if (model not in hf_clients):
821
+ hf_clients[model] = Client(model, hf_token=hf_token)
822
+ mdl_space = hf_clients[model]
823
+
824
+ print(f"{model}: Fetching endpoints of HF Space")
825
  # assume the index is one of the first 9 return params
826
  return_audio_index = int(HF_SPACES[model]['return_audio_index'])
827
  endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
 
850
 
851
  # force text
852
  space_inputs[HF_SPACES[model]['text_param_index']] = text
853
+
854
+ print(f"{model}: Sending request to HF Space")
855
  results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
856
+
857
  # return path to audio
858
  result = results[return_audio_index] if (not isinstance(results, str)) else results
859
  else:
 
862
  else:
863
  result = router.predict(text, model.lower(), api_name="/synthesize")
864
  break
865
+ except Exception:
866
+ raise Exception
867
  attempt_count += 1
868
+ print(f"{model}: Unable to call API (attempt: {attempt_count})")
869
+ # sleep for one second before trying again
870
+ time.sleep(1)
871
+
872
+ if attempt_count > 2:
873
+ raise gr.Error(f"{model}: Failed to call model")
874
+ else:
875
+ print('Done with', model)
876
+
877
  try:
878
  with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
879
  audio = AudioSegment.from_file(result)
880
  current_sr = audio.frame_rate
881
  if current_sr > 24000:
882
+ print(f"{model}: Resampling")
883
  audio = audio.set_frame_rate(24000)
884
  try:
885
+ print(f"{model}: Trying to normalize audio")
886
  audio = match_target_amplitude(audio, -20)
887
  except:
888
+ print(f"{model}: [WARN] Unable to normalize audio")
889
  audio.export(f.name, format="wav")
890
  os.unlink(result)
891
  result = f.name
892
  except:
893
  pass
894
  if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
 
 
895
  result_storage[model] = result
 
 
 
 
896
 
897
  def _get_param_examples(parameters):
898
  example_inputs = []
 
921
  try:
922
  for key,value in OVERRIDE_INPUTS[modelname].items():
923
  inputs[key] = value
924
+ print(f"{modelname}: Default inputs overridden")
925
  except:
926
  pass
927
 
 
1112
 
1113
 
1114
  demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)
1115
+ demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False, show_error=True)