Pendrokar commited on
Commit
dea73c9
·
1 Parent(s): 481d982

Added TTS: MaskGCT & StyleTTS kokoro; Edge space fixed

Browse files
Files changed (1) hide show
  1. app.py +64 -3
app.py CHANGED
@@ -98,16 +98,24 @@ AVAILABLE_MODELS = {
98
  # 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0 # overlly jolly
99
 
100
  # # Microsoft Edge TTS
101
- #'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
102
 
103
  # IMS-Toucan
104
- # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
105
 
106
  # IMS-Toucan English non-artificial
107
  'Flux9665/EnglishToucan': 'Flux9665/EnglishToucan', # 5.1
108
 
109
  # StyleTTS v2
110
- 'Pendrokar/style-tts-2': 'Pendrokar/style-tts-2',
 
 
 
 
 
 
 
 
111
 
112
  # HF TTS w issues
113
  'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
@@ -276,10 +284,38 @@ HF_SPACES = {
276
  'function': '/synthesize',
277
  'text_param_index': 0,
278
  'return_audio_index': 0,
 
 
 
 
 
 
 
 
 
 
279
  'is_zero_gpu_space': True,
280
  'series': 'StyleTTS',
281
  },
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  # TTS w issues
284
  # 'PolyAI/pheme': '/predict#0', #sleepy HF Space
285
  # 'amphion/Text-to-Speech': '/predict#0', #takes a whole minute to synthesize
@@ -411,6 +447,31 @@ OVERRIDE_INPUTS = {
411
  3: 8, # lngsteps
412
  },
413
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
414
  }
415
 
416
  hf_clients: Tuple[Client] = {}
 
98
  # 'parler-tts/parler-tts-expresso': 'parler-tts/parler-tts-expresso', # 4.29 4.32 4.36.1 4.42.0 # overlly jolly
99
 
100
  # # Microsoft Edge TTS
101
+ 'innoai/Edge-TTS-Text-to-Speech': 'innoai/Edge-TTS-Text-to-Speech', # 4.29
102
 
103
  # IMS-Toucan
104
+ # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1 # randomly changes pitch
105
 
106
  # IMS-Toucan English non-artificial
107
  'Flux9665/EnglishToucan': 'Flux9665/EnglishToucan', # 5.1
108
 
109
  # StyleTTS v2
110
+ # 'Pendrokar/style-tts-2': 'Pendrokar/style-tts-2',
111
+ # StyleTTS kokoro
112
+ 'hexgrad/kokoro': 'hexgrad/kokoro',
113
+
114
+ # MaskGCT (by Amphion)
115
+ # DEMANDS 300 seconds of ZeroGPU
116
+ # 'amphion/maskgct': 'amphion/maskgct',
117
+ # default ZeroGPU borrow time
118
+ # 'Svngoku/maskgct-audio-lab': 'Svngoku/maskgct-audio-lab',
119
 
120
  # HF TTS w issues
121
  'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
 
284
  'function': '/synthesize',
285
  'text_param_index': 0,
286
  'return_audio_index': 0,
287
+ # 'is_zero_gpu_space': True,
288
+ 'series': 'StyleTTS',
289
+ },
290
+
291
+ # StyleTTS v2 kokoro fine tune
292
+ 'hexgrad/kokoro': {
293
+ 'name': 'StyleTTS kokoro',
294
+ 'function': '/generate',
295
+ 'text_param_index': 0,
296
+ 'return_audio_index': 0,
297
  'is_zero_gpu_space': True,
298
  'series': 'StyleTTS',
299
  },
300
 
301
+ # StyleTTS v2 kokoro fine tune
302
+ 'amphion/maskgct': {
303
+ 'name': 'MaskGCT',
304
+ 'function': '/predict',
305
+ 'text_param_index': 1,
306
+ 'return_audio_index': 0,
307
+ 'is_zero_gpu_space': True,
308
+ 'series': 'MaskGCT',
309
+ },
310
+ 'Svngoku/maskgct-audio-lab': {
311
+ 'name': 'MaskGCT',
312
+ 'function': '/predict',
313
+ 'text_param_index': 1,
314
+ 'return_audio_index': 0,
315
+ 'is_zero_gpu_space': True,
316
+ 'series': 'MaskGCT',
317
+ },
318
+
319
  # TTS w issues
320
  # 'PolyAI/pheme': '/predict#0', #sleepy HF Space
321
  # 'amphion/Text-to-Speech': '/predict#0', #takes a whole minute to synthesize
 
447
  3: 8, # lngsteps
448
  },
449
 
450
+ # StyleTTS 2 kokoro
451
+ 'hexgrad/kokoro': {
452
+ 1: "af_0", #voice
453
+ 2: None, #ps
454
+ 3: 1, #speed
455
+ 4: 0.5, #reduce_noise
456
+ 5: 4000, #opening_cut
457
+ 6: 2000, #closing_cut
458
+ 7: 3000, #ease_in
459
+ 8: 1000, #ease_out
460
+ 9: 5000, #pad_before
461
+ 10: 5000, #pad_after
462
+ },
463
+
464
+ # maskGCT (by amphion)
465
+ 'amphion/maskgct': {
466
+ 0: DEFAULT_VOICE_SAMPLE, #prompt_wav
467
+ 2: -1, #target_len
468
+ 3: 25, #n_timesteps
469
+ },
470
+ 'Svngoku/maskgct-audio-lab': {
471
+ 0: DEFAULT_VOICE_SAMPLE, #prompt_wav
472
+ 2: -1, #target_len
473
+ 3: 25, #n_timesteps
474
+ },
475
  }
476
 
477
  hf_clients: Tuple[Client] = {}