fffiloni commited on
Commit
f634467
·
verified ·
1 Parent(s): 6bc49fc

add maskGCT api option

Browse files
Files changed (1) hide show
  1. app.py +46 -2
app.py CHANGED
@@ -190,6 +190,22 @@ def get_whisperspeech(prompt_audio_whisperspeech, audio_to_clone):
190
  print(result)
191
  return result, gr.update(value=result, visible=True)
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  ########################
195
  # TALKING PORTRAIT GEN #
@@ -264,7 +280,7 @@ css = '''
264
  #video-block {
265
  flex: 9;
266
  }
267
- #audio-block, #audio-clone-elm {
268
  flex: 1;
269
  }
270
  div#audio-clone-elm > .audio-container > button {
@@ -273,6 +289,12 @@ div#audio-clone-elm > .audio-container > button {
273
  div#audio-clone-elm > .audio-container > button > .wrap {
274
  font-size: 0.9em;
275
  }
 
 
 
 
 
 
276
  #text-synth, #voice-desc{
277
  height: 130px;
278
  }
@@ -285,7 +307,7 @@ div#audio-clone-elm > .audio-container > button > .wrap {
285
  #gen-voice-btn {
286
  flex: 1;
287
  }
288
- #parler-tab, #whisperspeech-tab {
289
  padding: 0;
290
  }
291
  #main-submit{
@@ -405,6 +427,20 @@ with gr.Blocks(css=css) as demo:
405
  elem_id = "audio-clone-elm"
406
  )
407
  gen_wsp_voice_btn = gr.Button("Generate voice clone (optional)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
408
 
409
  with gr.Column(elem_id="result-column"):
410
 
@@ -501,6 +537,14 @@ with gr.Blocks(css=css) as demo:
501
  show_api = False
502
  )
503
 
 
 
 
 
 
 
 
 
504
  submit_btn.click(
505
  fn = generate_talking_portrait,
506
  inputs = [portrait, voice],
 
190
  print(result)
191
  return result, gr.update(value=result, visible=True)
192
 
193
+ def get_maskGCT_TTS(prompt_audio_maskGCT, audio_to_clone):
194
+ try:
195
+ client = Client("amphion/maskgct")
196
+ except:
197
+ raise gr.Error(f"amphion/maskgct space's api might not be ready, please wait, or upload an audio instead.")
198
+
199
+ result = client.predict(
200
+ prompt_wav = handle_file(audio_to_clone),
201
+ target_text = prompt_audio_maskGCT,
202
+ target_len=-1,
203
+ n_timesteps=25,
204
+ api_name="/predict"
205
+ )
206
+ print(result)
207
+ return result, gr.update(value=result, visible=True)
208
+
209
 
210
  ########################
211
  # TALKING PORTRAIT GEN #
 
280
  #video-block {
281
  flex: 9;
282
  }
283
+ #audio-block, #audio-clone-elm, audio-clone-elm-maskGCT {
284
  flex: 1;
285
  }
286
  div#audio-clone-elm > .audio-container > button {
 
289
  div#audio-clone-elm > .audio-container > button > .wrap {
290
  font-size: 0.9em;
291
  }
292
+ div#audio-clone-elm-maskGCT > .audio-container > button {
293
+ height: 180px!important;
294
+ }
295
+ div#audio-clone-elm-maskGCT > .audio-container > button > .wrap {
296
+ font-size: 0.9em;
297
+ }
298
  #text-synth, #voice-desc{
299
  height: 130px;
300
  }
 
307
  #gen-voice-btn {
308
  flex: 1;
309
  }
310
+ #parler-tab, #whisperspeech-tab, maskGCT-tab {
311
  padding: 0;
312
  }
313
  #main-submit{
 
427
  elem_id = "audio-clone-elm"
428
  )
429
  gen_wsp_voice_btn = gr.Button("Generate voice clone (optional)")
430
+
431
+ with gr.Tab("MaskGCT TTS", elem_id="maskGCT-tab"):
432
+ prompt_audio_maskGCT = gr.Textbox(
433
+ label = "Text to synthetize",
434
+ lines = 2,
435
+ max_lines = 2,
436
+ elem_id = "text-synth-maskGCT"
437
+ )
438
+ audio_to_clone_maskGCT = gr.Audio(
439
+ label = "Voice to clone",
440
+ type = "filepath",
441
+ elem_id = "audio-clone-elm-maskGCT"
442
+ )
443
+ gen_maskGCT_voice_btn = gr.Button("Generate voice clone (optional)")
444
 
445
  with gr.Column(elem_id="result-column"):
446
 
 
537
  show_api = False
538
  )
539
 
540
+ gen_maskGCT_voice_btn.click(
541
+ fn = get_maskGCT_TTS,
542
+ inputs = [prompt_audio_maskGCT, audio_to_clone_maskGCT],
543
+ outputs = [voice, preprocess_audio_file],
544
+ queue = False,
545
+ show_api = False
546
+ )
547
+
548
  submit_btn.click(
549
  fn = generate_talking_portrait,
550
  inputs = [portrait, voice],