Spaces:
Running
Running
File size: 1,876 Bytes
9434e0e 18ab0a5 9434e0e 18ab0a5 9434e0e 18ab0a5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import os
import sys
from fastapi import Request
import gradio as gr
from TTS.api import TTS
from TTS.utils.manage import ModelManager
from io import BytesIO
import base64
model_names = TTS().list_models()
print(model_names.__dict__)
print(model_names.__dir__())
os.environ["COQUI_TOS_AGREED"] = "1"
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(model_name, gpu=False)
tts.to("cuda")
def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
if agree:
speaker_wav = mic_file_path if use_mic and mic_file_path else audio_file_pth
if not speaker_wav:
return None, "Please provide a reference audio."
if len(prompt) < 2:
return None, "Please provide a longer text prompt."
if len(prompt) > 10000:
return None, "Text length is limited to 10000 characters. Please try a shorter text."
try:
if language == "fr" and "your" in model_name:
language = "fr-fr"
if "/fr/" in model_name:
language = None
tts.tts_to_file(
text=prompt,
file_path="output.wav",
speaker_wav=speaker_wav,
language=language
)
except RuntimeError as e:
if "device-assert" in str(e):
return None, "Runtime error encountered. Please try again later."
else:
raise e
with open("output.wav", "rb") as audio_file:
audio_bytes = BytesIO(audio_file.read())
audio = base64.b64encode(audio_bytes.read()).decode("utf-8")
audio_player = f'<audio src="data:audio/wav;base64,{audio}" controls autoplay></audio>'
return gr.make_waveform(audio="output.wav"),audio_player
else:
return None, "Please accept the Terms & Conditions."
|