Spaces:
Running
on
Zero
Running
on
Zero
spaces cleanup
Browse files
app.py
CHANGED
@@ -45,18 +45,19 @@ AVAILABLE_MODELS = {
|
|
45 |
# 'VoiceCraft 2.0': 'voicecraft',
|
46 |
# 'Parler TTS': 'parler'
|
47 |
|
|
|
48 |
'coqui/xtts': 'coqui/xtts',
|
49 |
-
'collabora/WhisperSpeech': 'collabora/WhisperSpeech',
|
50 |
-
|
51 |
-
'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2',
|
52 |
-
'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1',
|
53 |
-
'Pendrokar/xVASynth': 'Pendrokar/xVASynth',
|
54 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
55 |
-
'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS',
|
56 |
|
57 |
# Parler
|
58 |
-
'parler-tts/parler_tts': 'parler-tts/parler_tts',
|
59 |
-
'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso',
|
60 |
|
61 |
# TTS w issues
|
62 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
@@ -66,7 +67,7 @@ AVAILABLE_MODELS = {
|
|
66 |
# 'styletts2/styletts2': '0#0', # API disabled
|
67 |
# 'Manmay/tortoise-tts': '/predict#0', # Cannot skip text-from-file parameter
|
68 |
# 'pytorch/Tacotron2': '0#0', # old gradio
|
69 |
-
# 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', #
|
70 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # old gradio - ValueError: Unsupported protocol: sse_v3
|
71 |
}
|
72 |
|
@@ -131,7 +132,7 @@ HF_SPACES = {
|
|
131 |
'mrfakename/MeloTTS': {
|
132 |
'name': 'mrfakename/MeloTTS',
|
133 |
'function': '/synthesize',
|
134 |
-
'text_param_index':
|
135 |
'return_audio_index': 0,
|
136 |
},
|
137 |
|
@@ -199,14 +200,14 @@ OVERRIDE_INPUTS = {
|
|
199 |
3: 0.7, #Tempo - Gradio Slider issue: takes min. rather than value
|
200 |
},
|
201 |
'Pendrokar/xVASynth': {
|
202 |
-
1: 'ccby_nvidia_hifi_92_F', #fine-tuned voice model name
|
203 |
3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
|
204 |
},
|
205 |
'suno/bark': {
|
206 |
-
1: 'Speaker 3 (en)',
|
207 |
},
|
208 |
'amphion/Text-to-Speech': {
|
209 |
-
1: 'LikeManyWaters',
|
210 |
},
|
211 |
'LeeSangHoon/HierSpeech_TTS': {
|
212 |
1: DEFAULT_VOICE_SAMPLE, # voice sample
|
@@ -218,24 +219,27 @@ OVERRIDE_INPUTS = {
|
|
218 |
7: 1111,
|
219 |
},
|
220 |
'Manmay/tortoise-tts': {
|
221 |
-
1: None, # text-from-file;
|
222 |
-
2: 'angie',
|
223 |
3: None,
|
224 |
4: 'No',
|
225 |
},
|
226 |
'mrfakename/MeloTTS': {
|
227 |
-
|
228 |
-
|
|
|
229 |
3: 'EN', # language
|
230 |
},
|
231 |
'parler-tts/parler_tts': {
|
232 |
-
1: '
|
233 |
},
|
234 |
'parler-tts/parler-tts-expresso': {
|
235 |
-
1: '
|
236 |
},
|
237 |
}
|
238 |
|
|
|
|
|
239 |
SPACE_ID = os.getenv('SPACE_ID')
|
240 |
MAX_SAMPLE_TXT_LENGTH = 300
|
241 |
MIN_SAMPLE_TXT_LENGTH = 10
|
@@ -334,6 +338,7 @@ scheduler = CommitScheduler(
|
|
334 |
# Router API
|
335 |
####################################
|
336 |
# router = Client("TTS-AGI/tts-router", hf_token=hf_token)
|
|
|
337 |
####################################
|
338 |
# Gradio app
|
339 |
####################################
|
@@ -792,15 +797,15 @@ def synthandreturn(text):
|
|
792 |
pass
|
793 |
# Get two random models
|
794 |
# forced model: your TTS model versus The World!!!
|
795 |
-
mdl1 = 'Pendrokar/xVASynth'
|
796 |
vsModels = dict(AVAILABLE_MODELS)
|
797 |
-
del vsModels[mdl1]
|
798 |
# randomize position of the forced model
|
799 |
mdl2 = random.sample(list(vsModels.keys()), 1)
|
800 |
# forced random
|
801 |
-
mdl1, mdl2 = random.sample(list([mdl1, mdl2[0]]), 2)
|
802 |
# actual random
|
803 |
-
|
804 |
log_text(text)
|
805 |
print("[debug] Using", mdl1, mdl2)
|
806 |
def predict_and_update_result(text, model, result_storage):
|
@@ -812,7 +817,11 @@ def synthandreturn(text):
|
|
812 |
if model in AVAILABLE_MODELS:
|
813 |
if '/' in model:
|
814 |
# Use public HF Space
|
815 |
-
|
|
|
|
|
|
|
|
|
816 |
# assume the index is one of the first 9 return params
|
817 |
return_audio_index = int(HF_SPACES[model]['return_audio_index'])
|
818 |
endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
@@ -841,9 +850,10 @@ def synthandreturn(text):
|
|
841 |
|
842 |
# force text
|
843 |
space_inputs[HF_SPACES[model]['text_param_index']] = text
|
844 |
-
|
|
|
845 |
results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
|
846 |
-
|
847 |
# return path to audio
|
848 |
result = results[return_audio_index] if (not isinstance(results, str)) else results
|
849 |
else:
|
@@ -852,39 +862,37 @@ def synthandreturn(text):
|
|
852 |
else:
|
853 |
result = router.predict(text, model.lower(), api_name="/synthesize")
|
854 |
break
|
855 |
-
except:
|
|
|
856 |
attempt_count += 1
|
857 |
-
|
858 |
-
|
859 |
-
|
860 |
-
|
861 |
-
|
862 |
-
|
|
|
|
|
|
|
863 |
try:
|
864 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
865 |
audio = AudioSegment.from_file(result)
|
866 |
current_sr = audio.frame_rate
|
867 |
if current_sr > 24000:
|
868 |
-
print(
|
869 |
audio = audio.set_frame_rate(24000)
|
870 |
try:
|
871 |
-
print(
|
872 |
audio = match_target_amplitude(audio, -20)
|
873 |
except:
|
874 |
-
print(
|
875 |
audio.export(f.name, format="wav")
|
876 |
os.unlink(result)
|
877 |
result = f.name
|
878 |
except:
|
879 |
pass
|
880 |
if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
|
881 |
-
print(model)
|
882 |
-
print(f"Running model {model}")
|
883 |
result_storage[model] = result
|
884 |
-
# try:
|
885 |
-
# doloudnorm(result)
|
886 |
-
# except:
|
887 |
-
# pass
|
888 |
|
889 |
def _get_param_examples(parameters):
|
890 |
example_inputs = []
|
@@ -913,7 +921,7 @@ def synthandreturn(text):
|
|
913 |
try:
|
914 |
for key,value in OVERRIDE_INPUTS[modelname].items():
|
915 |
inputs[key] = value
|
916 |
-
print(f"Default inputs overridden
|
917 |
except:
|
918 |
pass
|
919 |
|
@@ -1104,3 +1112,4 @@ with gr.Blocks(theme=theme, css="footer {visibility: hidden}textbox{resize:none}
|
|
1104 |
|
1105 |
|
1106 |
demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)
|
|
|
|
45 |
# 'VoiceCraft 2.0': 'voicecraft',
|
46 |
# 'Parler TTS': 'parler'
|
47 |
|
48 |
+
# HF Gradio Spaces:
|
49 |
'coqui/xtts': 'coqui/xtts',
|
50 |
+
# 'collabora/WhisperSpeech': 'collabora/WhisperSpeech', # old gradio?
|
51 |
+
'myshell-ai/OpenVoice': 'myshell-ai/OpenVoice', # 4.29.0
|
52 |
+
'myshell-ai/OpenVoiceV2': 'myshell-ai/OpenVoiceV2', # 4.29.0
|
53 |
+
'mrfakename/MetaVoice-1B-v0.1': 'mrfakename/MetaVoice-1B-v0.1', # 4.29.0
|
54 |
+
'Pendrokar/xVASynth': 'Pendrokar/xVASynth', # EN-GB 4.29.0 4.42.0
|
55 |
# 'coqui/CoquiTTS': 'coqui/CoquiTTS',
|
56 |
+
'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # 4.29.0
|
57 |
|
58 |
# Parler
|
59 |
+
'parler-tts/parler_tts': 'parler-tts/parler_tts', # 4.29.0 4.42.0
|
60 |
+
'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29.0 4.42.0
|
61 |
|
62 |
# TTS w issues
|
63 |
# 'PolyAI/pheme': '/predict#0', # sleepy HF Space
|
|
|
67 |
# 'styletts2/styletts2': '0#0', # API disabled
|
68 |
# 'Manmay/tortoise-tts': '/predict#0', # Cannot skip text-from-file parameter
|
69 |
# 'pytorch/Tacotron2': '0#0', # old gradio
|
70 |
+
# 'mrfakename/MeloTTS': 'mrfakename/MeloTTS', # Error with EN # 4.29.0
|
71 |
# 'parler-tts/parler_tts_mini': 'parler-tts/parler_tts_mini', # old gradio - ValueError: Unsupported protocol: sse_v3
|
72 |
}
|
73 |
|
|
|
132 |
'mrfakename/MeloTTS': {
|
133 |
'name': 'mrfakename/MeloTTS',
|
134 |
'function': '/synthesize',
|
135 |
+
'text_param_index': 0,
|
136 |
'return_audio_index': 0,
|
137 |
},
|
138 |
|
|
|
200 |
3: 0.7, #Tempo - Gradio Slider issue: takes min. rather than value
|
201 |
},
|
202 |
'Pendrokar/xVASynth': {
|
203 |
+
1: 'ccby_nvidia_hifi_92_F', #fine-tuned voice model name; #92 BRITISH
|
204 |
3: 1.0, #pacing/duration - Gradio Slider issue: takes min. rather than value
|
205 |
},
|
206 |
'suno/bark': {
|
207 |
+
1: 'Speaker 3 (en)', # voice
|
208 |
},
|
209 |
'amphion/Text-to-Speech': {
|
210 |
+
1: 'LikeManyWaters', # voice
|
211 |
},
|
212 |
'LeeSangHoon/HierSpeech_TTS': {
|
213 |
1: DEFAULT_VOICE_SAMPLE, # voice sample
|
|
|
219 |
7: 1111,
|
220 |
},
|
221 |
'Manmay/tortoise-tts': {
|
222 |
+
1: None, # text-from-file; cannot skip and doesn't work without
|
223 |
+
2: 'angie', # voice
|
224 |
3: None,
|
225 |
4: 'No',
|
226 |
},
|
227 |
'mrfakename/MeloTTS': {
|
228 |
+
1: 'EN', # speaker
|
229 |
+
# 1: 'EN-US', # speaker
|
230 |
+
2: 1, # speed
|
231 |
3: 'EN', # language
|
232 |
},
|
233 |
'parler-tts/parler_tts': {
|
234 |
+
1: 'Elisabeth\'s voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.', # description/prompt
|
235 |
},
|
236 |
'parler-tts/parler-tts-expresso': {
|
237 |
+
1: 'Elisabeth\'s voice is monotone yet slightly fast in delivery, with a very close recording that almost has no background noise.', # description/prompt
|
238 |
},
|
239 |
}
|
240 |
|
241 |
+
hf_clients = {}
|
242 |
+
|
243 |
SPACE_ID = os.getenv('SPACE_ID')
|
244 |
MAX_SAMPLE_TXT_LENGTH = 300
|
245 |
MIN_SAMPLE_TXT_LENGTH = 10
|
|
|
338 |
# Router API
|
339 |
####################################
|
340 |
# router = Client("TTS-AGI/tts-router", hf_token=hf_token)
|
341 |
+
router = {}
|
342 |
####################################
|
343 |
# Gradio app
|
344 |
####################################
|
|
|
797 |
pass
|
798 |
# Get two random models
|
799 |
# forced model: your TTS model versus The World!!!
|
800 |
+
# mdl1 = 'Pendrokar/xVASynth'
|
801 |
vsModels = dict(AVAILABLE_MODELS)
|
802 |
+
# del vsModels[mdl1]
|
803 |
# randomize position of the forced model
|
804 |
mdl2 = random.sample(list(vsModels.keys()), 1)
|
805 |
# forced random
|
806 |
+
# mdl1, mdl2 = random.sample(list([mdl1, mdl2[0]]), 2)
|
807 |
# actual random
|
808 |
+
mdl1, mdl2 = random.sample(list(AVAILABLE_MODELS.keys()), 2)
|
809 |
log_text(text)
|
810 |
print("[debug] Using", mdl1, mdl2)
|
811 |
def predict_and_update_result(text, model, result_storage):
|
|
|
817 |
if model in AVAILABLE_MODELS:
|
818 |
if '/' in model:
|
819 |
# Use public HF Space
|
820 |
+
if (model not in hf_clients):
|
821 |
+
hf_clients[model] = Client(model, hf_token=hf_token)
|
822 |
+
mdl_space = hf_clients[model]
|
823 |
+
|
824 |
+
print(f"{model}: Fetching endpoints of HF Space")
|
825 |
# assume the index is one of the first 9 return params
|
826 |
return_audio_index = int(HF_SPACES[model]['return_audio_index'])
|
827 |
endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
|
|
|
850 |
|
851 |
# force text
|
852 |
space_inputs[HF_SPACES[model]['text_param_index']] = text
|
853 |
+
|
854 |
+
print(f"{model}: Sending request to HF Space")
|
855 |
results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)
|
856 |
+
|
857 |
# return path to audio
|
858 |
result = results[return_audio_index] if (not isinstance(results, str)) else results
|
859 |
else:
|
|
|
862 |
else:
|
863 |
result = router.predict(text, model.lower(), api_name="/synthesize")
|
864 |
break
|
865 |
+
except Exception:
|
866 |
+
raise Exception
|
867 |
attempt_count += 1
|
868 |
+
print(f"{model}: Unable to call API (attempt: {attempt_count})")
|
869 |
+
# sleep for one second before trying again
|
870 |
+
time.sleep(1)
|
871 |
+
|
872 |
+
if attempt_count > 2:
|
873 |
+
raise gr.Error(f"{model}: Failed to call model")
|
874 |
+
else:
|
875 |
+
print('Done with', model)
|
876 |
+
|
877 |
try:
|
878 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
879 |
audio = AudioSegment.from_file(result)
|
880 |
current_sr = audio.frame_rate
|
881 |
if current_sr > 24000:
|
882 |
+
print(f"{model}: Resampling")
|
883 |
audio = audio.set_frame_rate(24000)
|
884 |
try:
|
885 |
+
print(f"{model}: Trying to normalize audio")
|
886 |
audio = match_target_amplitude(audio, -20)
|
887 |
except:
|
888 |
+
print(f"{model}: [WARN] Unable to normalize audio")
|
889 |
audio.export(f.name, format="wav")
|
890 |
os.unlink(result)
|
891 |
result = f.name
|
892 |
except:
|
893 |
pass
|
894 |
if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
|
|
|
|
|
895 |
result_storage[model] = result
|
|
|
|
|
|
|
|
|
896 |
|
897 |
def _get_param_examples(parameters):
|
898 |
example_inputs = []
|
|
|
921 |
try:
|
922 |
for key,value in OVERRIDE_INPUTS[modelname].items():
|
923 |
inputs[key] = value
|
924 |
+
print(f"{modelname}: Default inputs overridden")
|
925 |
except:
|
926 |
pass
|
927 |
|
|
|
1112 |
|
1113 |
|
1114 |
demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)
|
1115 |
+
demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False, show_error=True)
|