EyeSee_chi

Running

File size: 1,876 Bytes

import os
import sys
from fastapi import Request
import gradio as gr
from TTS.api import TTS
from TTS.utils.manage import ModelManager
from io import BytesIO
import base64

model_names = TTS().list_models()
print(model_names.__dict__)
print(model_names.__dir__())

os.environ["COQUI_TOS_AGREED"] = "1"

model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(model_name, gpu=False)
tts.to("cuda")

def predict(prompt, language, audio_file_pth, mic_file_path, use_mic, agree):
    if agree:
        speaker_wav = mic_file_path if use_mic and mic_file_path else audio_file_pth

        if not speaker_wav:
            return None, "Please provide a reference audio."

        if len(prompt) < 2:
            return None, "Please provide a longer text prompt."

        if len(prompt) > 10000:
            return None, "Text length is limited to 10000 characters. Please try a shorter text."

        try:
            if language == "fr" and "your" in model_name:
                language = "fr-fr"
            if "/fr/" in model_name:
                language = None

            tts.tts_to_file(
                text=prompt,
                file_path="output.wav",
                speaker_wav=speaker_wav,
                language=language
            )
        except RuntimeError as e:
            if "device-assert" in str(e):
                return None, "Runtime error encountered. Please try again later."
            else:
                raise e

        with open("output.wav", "rb") as audio_file:
            audio_bytes = BytesIO(audio_file.read())
        audio = base64.b64encode(audio_bytes.read()).decode("utf-8")
        audio_player = f'<audio src="data:audio/wav;base64,{audio}" controls autoplay></audio>'
        return gr.make_waveform(audio="output.wav"),audio_player
    else:
        return None, "Please accept the Terms & Conditions."