Spaces:
Sleeping
Sleeping
import os | |
from io import BytesIO | |
from typing import IO, Optional | |
import time | |
import uuid | |
from pathlib import Path | |
from pydub import AudioSegment | |
import gradio as gr | |
from elevenlabs import Voice, VoiceSettings, save | |
from elevenlabs.client import ElevenLabs | |
def generate_random_filename(parent, extension="txt"): | |
""" | |
Generates a random filename using UUID and current timestamp. | |
Args: | |
extension (str): The file extension for the generated filename. Default is 'txt'. | |
Returns: | |
str: A random filename with the specified extension. | |
""" | |
# Generate a random UUID | |
random_uuid = uuid.uuid4() | |
# Get the current timestamp | |
timestamp = int(time.time()) | |
# Combine UUID and timestamp to create a unique filename | |
filename = f"{random_uuid}_{timestamp}.{extension}" | |
file_path = os.path.join(parent, filename) | |
return file_path | |
ELEVEN_LABS_MODEL = os.getenv("ELEVEN_LABS_MODEL", "eleven_multilingual_v2") | |
ELEVEN_LABS_LANGUAGE_SUPPORTS = [ | |
"English", | |
"Chinese", | |
"Spanish", | |
"Hindi", | |
"Portuguese", | |
"French", | |
"German", | |
"Japanese", | |
"Arabic", | |
"Korean", | |
"Indonesian", | |
"Italian", | |
"Dutch", | |
"Turkish", | |
"Polish", | |
"Swedish", | |
"Filipino", | |
"Malay", | |
"Russian", | |
"Romanian", | |
"Ukrainian", | |
"Greek", | |
"Czech", | |
"Danish", | |
"Finnish", | |
"Bulgarian", | |
"Croatian", | |
"Slovak", | |
"Tamil", | |
] | |
class ElevenLabsPipeline: | |
def __init__(self): | |
eleven_labs_api_key = os.getenv("ELEVENLABS_API_KEY", "sk_f4f7d77bc8065b15824cf52ea46c7d99e0e5db2a0f93b673") | |
if eleven_labs_api_key is None: | |
raise Exception("ELEVENLABS_API_KEY ํ๊ฒฝ๋ณ์๋ฅผ ์ค์ ํด์ฃผ์ธ์.") | |
self.client = ElevenLabs( | |
api_key=eleven_labs_api_key, # Defaults to ELEVEN_API_KEY | |
) | |
os.makedirs("./tmp", exist_ok=True) | |
def clone_voice(self, audio, name, description=None): | |
response = self.client.voices.get_all() | |
for voice in response.voices: | |
if voice.name == name: | |
return "์กด์ฌํ๋ ์์ฑ์ ๋๋ค. ์์ฑ ์์ฑ์ ์์ํด์ฃผ์ธ์." | |
try: | |
voice = self.client.clone( | |
name=name, | |
description=description, # Optional | |
files=[audio], | |
) | |
return "Voice Clone์ ์ฑ๊ณต์ ์ผ๋ก ์์ฑํ์ต๋๋ค." | |
except Exception as e: | |
return str(e) | |
def _get_voice(self, name: str): | |
response = self.client.voices.get_all() | |
current_voice = None | |
for voice in response.voices: | |
if voice.name == name: | |
current_voice = voice | |
break | |
return current_voice | |
def generate_voice( | |
self, | |
text: str, | |
audio: str = None, | |
language: str = "ko", | |
mute_before_ms: Optional[int] = 0, | |
mute_after_ms: Optional[int] = 0, | |
stability: float = 0.5, | |
similarity_boost: float = 0.75, | |
style: float = 0.0, | |
use_speaker_boost=True, | |
) -> str: | |
if audio is not None: | |
name = Path(audio).stem | |
self.clone_voice(audio, name) | |
else: | |
gr.Info("์์ฑ์ด ์์ฃผ์ด์ก์ต๋๋ค. ๊ธฐ๋ณธ ์์ฑ์ผ๋ก ์์ฑํ๊ฒ ์ต๋๋ค.", duration=2) | |
name = "Laura" | |
current_voice = self._get_voice(name) | |
if current_voice is None: | |
current_voice = self._get_voice(name) | |
response = self.client.generate( | |
text=text, | |
model=ELEVEN_LABS_MODEL, | |
voice=Voice( | |
voice_id=current_voice.voice_id, | |
settings=VoiceSettings( | |
stability=stability, | |
similarity_boost=similarity_boost, | |
style=style, | |
use_speaker_boost=use_speaker_boost, | |
language=language, | |
), | |
), | |
) | |
# Create a BytesIO object to hold the audio data in memory | |
audio_stream = BytesIO() | |
# Write each chunk of audio data to the stream | |
for chunk in response: | |
if chunk: | |
audio_stream.write(chunk) | |
# Reset stream position to the beginning | |
audio_stream.seek(0) | |
# Load the audio stream into an AudioSegment | |
audio_segment = AudioSegment.from_file(audio_stream, format="mp3") | |
# Create silent segments for before and after | |
mute_before = AudioSegment.silent(duration=mute_before_ms) | |
mute_after = AudioSegment.silent(duration=mute_after_ms) | |
# Concatenate the segments | |
combined_segment = mute_before + audio_segment + mute_after | |
tmp_file = generate_random_filename("./tmp", "mp3") | |
# Export the combined audio to the specified file | |
combined_segment.export(tmp_file, format="mp3", bitrate="128k") | |
return tmp_file |