EyeSee_chi / tts.py
Niki Zhang
Update tts.py
18ab0a5 verified
raw
history blame
1.88 kB
import os
import sys
from fastapi import Request
import gradio as gr
from TTS.api import TTS
from TTS.utils.manage import ModelManager
from io import BytesIO
import base64
model_names = TTS().list_models()
print(model_names.__dict__)
print(model_names.__dir__())
os.environ["COQUI_TOS_AGREED"] = "1"
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(model_name, gpu=False)
tts.to("cuda")
def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
if agree:
speaker_wav = mic_file_path if use_mic and mic_file_path else audio_file_pth
if not speaker_wav:
return None, "Please provide a reference audio."
if len(prompt) < 2:
return None, "Please provide a longer text prompt."
if len(prompt) > 10000:
return None, "Text length is limited to 10000 characters. Please try a shorter text."
try:
if language == "fr" and "your" in model_name:
language = "fr-fr"
if "/fr/" in model_name:
language = None
tts.tts_to_file(
text=prompt,
file_path="output.wav",
speaker_wav=speaker_wav,
language=language
)
except RuntimeError as e:
if "device-assert" in str(e):
return None, "Runtime error encountered. Please try again later."
else:
raise e
with open("output.wav", "rb") as audio_file:
audio_bytes = BytesIO(audio_file.read())
audio = base64.b64encode(audio_bytes.read()).decode("utf-8")
audio_player = f'<audio src="data:audio/wav;base64,{audio}" controls autoplay></audio>'
return gr.make_waveform(audio="output.wav"),audio_player
else:
return None, "Please accept the Terms & Conditions."