yerang commited on
Commit
d98c79a
ยท
1 Parent(s): a531730
Files changed (1) hide show
  1. elevenlabs_utils.py +175 -0
elevenlabs_utils.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from io import BytesIO
3
+ from typing import IO, Optional
4
+ import time
5
+ import uuid
6
+ from pathlib import Path
7
+
8
+ from pydub import AudioSegment
9
+ import gradio as gr
10
+ from elevenlabs import Voice, VoiceSettings, save
11
+ from elevenlabs.client import ElevenLabs
12
+
13
+
14
+
15
+
16
+ def generate_random_filename(parent, extension="txt"):
17
+ """
18
+ Generates a random filename using UUID and current timestamp.
19
+
20
+ Args:
21
+ extension (str): The file extension for the generated filename. Default is 'txt'.
22
+
23
+ Returns:
24
+ str: A random filename with the specified extension.
25
+ """
26
+ # Generate a random UUID
27
+ random_uuid = uuid.uuid4()
28
+
29
+ # Get the current timestamp
30
+ timestamp = int(time.time())
31
+
32
+ # Combine UUID and timestamp to create a unique filename
33
+ filename = f"{random_uuid}_{timestamp}.{extension}"
34
+ file_path = os.path.join(parent, filename)
35
+ return file_path
36
+
37
+ ELEVEN_LABS_MODEL = os.getenv("ELEVEN_LABS_MODEL", "eleven_multilingual_v2")
38
+ ELEVEN_LABS_LANGUAGE_SUPPORTS = [
39
+ "English",
40
+ "Chinese",
41
+ "Spanish",
42
+ "Hindi",
43
+ "Portuguese",
44
+ "French",
45
+ "German",
46
+ "Japanese",
47
+ "Arabic",
48
+ "Korean",
49
+ "Indonesian",
50
+ "Italian",
51
+ "Dutch",
52
+ "Turkish",
53
+ "Polish",
54
+ "Swedish",
55
+ "Filipino",
56
+ "Malay",
57
+ "Russian",
58
+ "Romanian",
59
+ "Ukrainian",
60
+ "Greek",
61
+ "Czech",
62
+ "Danish",
63
+ "Finnish",
64
+ "Bulgarian",
65
+ "Croatian",
66
+ "Slovak",
67
+ "Tamil",
68
+ ]
69
+
70
+
71
+ class ElevenLabsPipeline:
72
+ def __init__(self):
73
+
74
+ eleven_labs_api_key = os.getenv("ELEVENLABS_API_KEY", "sk_f4f7d77bc8065b15824cf52ea46c7d99e0e5db2a0f93b673")
75
+ if eleven_labs_api_key is None:
76
+ raise Exception("ELEVENLABS_API_KEY ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ์„ค์ •ํ•ด์ฃผ์„ธ์š”.")
77
+ self.client = ElevenLabs(
78
+ api_key=eleven_labs_api_key, # Defaults to ELEVEN_API_KEY
79
+ )
80
+ os.makedirs("./tmp", exist_ok=True)
81
+
82
+ def clone_voice(self, audio, name, description=None):
83
+ response = self.client.voices.get_all()
84
+ for voice in response.voices:
85
+ if voice.name == name:
86
+ return "์กด์žฌํ•˜๋Š” ์Œ์„ฑ์ž…๋‹ˆ๋‹ค. ์Œ์„ฑ ์ƒ์„ฑ์„ ์‹œ์ž‘ํ•ด์ฃผ์„ธ์š”."
87
+
88
+ try:
89
+ voice = self.client.clone(
90
+ name=name,
91
+ description=description, # Optional
92
+ files=[audio],
93
+ )
94
+ return "Voice Clone์„ ์„ฑ๊ณต์ ์œผ๋กœ ์ƒ์„ฑํ–ˆ์Šต๋‹ˆ๋‹ค."
95
+ except Exception as e:
96
+ return str(e)
97
+
98
+ def _get_voice(self, name: str):
99
+ response = self.client.voices.get_all()
100
+ current_voice = None
101
+ for voice in response.voices:
102
+ if voice.name == name:
103
+ current_voice = voice
104
+ break
105
+
106
+ return current_voice
107
+
108
+ def generate_voice(
109
+ self,
110
+ text: str,
111
+ audio: str = None,
112
+ language: str = "ko",
113
+ mute_before_ms: Optional[int] = 0,
114
+ mute_after_ms: Optional[int] = 0,
115
+ stability: float = 0.5,
116
+ similarity_boost: float = 0.75,
117
+ style: float = 0.0,
118
+ use_speaker_boost=True,
119
+ ) -> str:
120
+
121
+ if audio is not None:
122
+ name = Path(audio).stem
123
+ self.clone_voice(audio, name)
124
+ else:
125
+ gr.Info("์Œ์„ฑ์ด ์•ˆ์ฃผ์–ด์กŒ์Šต๋‹ˆ๋‹ค. ๊ธฐ๋ณธ ์Œ์„ฑ์œผ๋กœ ์ƒ์„ฑํ•˜๊ฒ ์Šต๋‹ˆ๋‹ค.", duration=2)
126
+ name = "Laura"
127
+
128
+ current_voice = self._get_voice(name)
129
+
130
+ if current_voice is None:
131
+ current_voice = self._get_voice(name)
132
+
133
+
134
+ response = self.client.generate(
135
+ text=text,
136
+ model=ELEVEN_LABS_MODEL,
137
+ voice=Voice(
138
+ voice_id=current_voice.voice_id,
139
+ settings=VoiceSettings(
140
+ stability=stability,
141
+ similarity_boost=similarity_boost,
142
+ style=style,
143
+ use_speaker_boost=use_speaker_boost,
144
+ language=language,
145
+ ),
146
+ ),
147
+ )
148
+
149
+ # Create a BytesIO object to hold the audio data in memory
150
+ audio_stream = BytesIO()
151
+
152
+ # Write each chunk of audio data to the stream
153
+ for chunk in response:
154
+ if chunk:
155
+ audio_stream.write(chunk)
156
+
157
+ # Reset stream position to the beginning
158
+ audio_stream.seek(0)
159
+
160
+ # Load the audio stream into an AudioSegment
161
+ audio_segment = AudioSegment.from_file(audio_stream, format="mp3")
162
+
163
+ # Create silent segments for before and after
164
+ mute_before = AudioSegment.silent(duration=mute_before_ms)
165
+ mute_after = AudioSegment.silent(duration=mute_after_ms)
166
+
167
+ # Concatenate the segments
168
+ combined_segment = mute_before + audio_segment + mute_after
169
+
170
+ tmp_file = generate_random_filename("./tmp", "mp3")
171
+
172
+ # Export the combined audio to the specified file
173
+ combined_segment.export(tmp_file, format="mp3", bitrate="128k")
174
+
175
+ return tmp_file