Voice-clone / app.py
Abhaykoul's picture
Update app.py
01dfdfb verified
import os
import gradio as gr
import torch
from TTS.api import TTS
import spaces # Ensure this is the correct module for your environment
# Agree to Coqui TOS
os.environ["COQUI_TOS_AGREED"] = "1"
# Determine the device
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize TTS model without the 'device' parameter
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
tts.to(device)
@spaces.GPU() # Removed enable_queue=True since it's now always True
def clone(text, audio):
output_path = "./output.wav"
tts.tts_to_file(text=text, speaker_wav=audio, language="en", file_path=output_path)
return output_path
# Define the UI using Gradio Blocks with enhanced styling
with gr.Blocks(title="Advanced Voice Clone", theme=gr.themes.Soft(primary_hue="teal")) as demo:
gr.Markdown(
"""
# 🎤 Voice Clone
**by Vortex**
This application uses the **xtts_v2** model for voice cloning.
*Non-commercial use only.*
[Coqui Public Model License](https://coqui.ai/cpml) |
---
"""
)
# Inject custom CSS using Markdown and <style> tags
gr.Markdown(
"""
<style>
#audio_upload > label {
background-color: #14b8a6;
color: white;
padding: 10px;
border-radius: 5px;
cursor: pointer;
}
#audio_upload > label:hover {
background-color: #0d9488;
}
</style>
"""
)
with gr.Row():
with gr.Column(scale=1):
text_input = gr.Textbox(
label="Enter Text",
placeholder="Type the text you want to clone...",
lines=4
)
audio_input = gr.Audio(
type="filepath",
label="Upload Reference Voice",
elem_id="audio_upload"
)
clone_button = gr.Button("Clone Voice", variant="primary")
with gr.Column(scale=1):
output_audio = gr.Audio(
type="filepath",
label="Cloned Voice Output",
interactive=False
)
gr.Markdown(
"""
---
❤️ If you find this tool useful, please consider giving it a thumbs up!
"""
)
# Connect the button to the function
clone_button.click(
clone,
inputs=[text_input, audio_input],
outputs=output_audio,
queue=True
)
# Launch the app
demo.launch(server_name="0.0.0.0", server_port=7860)