ysharma HF staff commited on
Commit
c845ca2
·
verified ·
1 Parent(s): b2fcaf7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +128 -92
app.py CHANGED
@@ -24,15 +24,79 @@ voices = {
24
  }
25
 
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  custom_css = """
28
  .container-wrap {
29
  display: flex !important;
30
  gap: 5px !important;
 
 
 
31
  }
32
 
33
  .vert-group {
34
- min-width: 80px !important;
35
- width: 90px !important;
36
  flex: 0 0 auto !important;
37
  }
38
 
@@ -40,7 +104,7 @@ custom_css = """
40
  white-space: nowrap !important;
41
  overflow: visible !important;
42
  width: auto !important;
43
- font-size: 0.8em !important;
44
  transform-origin: left center !important;
45
  transform: rotate(0deg) translateX(-50%) !important;
46
  position: relative !important;
@@ -48,6 +112,7 @@ custom_css = """
48
  display: inline-block !important;
49
  text-align: center !important;
50
  margin-bottom: 5px !important;
 
51
  }
52
 
53
  .vert-group .wrap label {
@@ -59,24 +124,15 @@ custom_css = """
59
  .slider_input_container {
60
  height: 200px !important;
61
  position: relative !important;
62
- width: 40px !important;
63
  margin: 0 auto !important;
64
  overflow: hidden !important;
65
  }
66
 
67
- ::-webkit-scrollbar {
68
- display: none !important;
69
- }
70
-
71
- * {
72
- -ms-overflow-style: none !important;
73
- scrollbar-width: none !important;
74
- }
75
-
76
  .slider_input_container input[type="range"] {
77
  position: absolute !important;
78
  width: 200px !important;
79
- left: -80px !important;
80
  top: 100px !important;
81
  transform: rotate(90deg) !important;
82
  }
@@ -102,97 +158,73 @@ custom_css = """
102
  border: none !important;
103
  min-width: unset !important;
104
  }
105
- """
106
-
107
-
108
- def parse_voice_formula(formula):
109
- """Parse the voice formula string and return the combined voice tensor."""
110
- if not formula.strip():
111
- raise ValueError("Empty voice formula")
112
-
113
- # Initialize the weighted sum
114
- weighted_sum = None
115
-
116
- # Split the formula into terms
117
- terms = formula.split('+')
118
-
119
- for term in terms:
120
- # Parse each term (format: "0.333 * voice_name")
121
- weight, voice_name = term.strip().split('*')
122
- weight = float(weight.strip())
123
- voice_name = voice_name.strip()
124
-
125
- # Get the voice tensor
126
- if voice_name not in voices:
127
- raise ValueError(f"Unknown voice: {voice_name}")
128
-
129
- voice_tensor = voices[voice_name]
130
-
131
- # Add to weighted sum
132
- if weighted_sum is None:
133
- weighted_sum = weight * voice_tensor
134
- else:
135
- weighted_sum += weight * voice_tensor
136
-
137
- return weighted_sum
138
-
139
- def get_new_voice(formula):
140
- try:
141
- # Parse the formula and get the combined voice tensor
142
- weighted_voices = parse_voice_formula(formula)
143
-
144
- # Save and load the combined voice
145
- torch.save(weighted_voices, "weighted_normalised_voices.pt")
146
- VOICEPACK = torch.load("weighted_normalised_voices.pt", weights_only=False).to(device)
147
- return VOICEPACK
148
- except Exception as e:
149
- raise gr.Error(f"Failed to create voice: {str(e)}")
150
 
151
- def text_to_speech(text, formula):
152
- try:
153
- if not text.strip():
154
- raise gr.Error("Please enter some text")
155
-
156
- if not formula.strip():
157
- raise gr.Error("Please select at least one voice")
158
-
159
- # Get the combined voice
160
- VOICEPACK = get_new_voice(formula)
161
-
162
- # Generate audio
163
- audio, phonemes = generate(MODEL, text, VOICEPACK, lang='a')
164
- return (24000, audio)
165
- except Exception as e:
166
- raise gr.Error(f"Failed to generate speech: {str(e)}")
167
 
 
 
 
 
 
 
168
 
169
  with gr.Blocks(css=custom_css, theme="ocean") as demo:
 
 
 
 
 
 
 
 
 
 
170
  with gr.Row(variant="default", equal_height=True, elem_classes="container-wrap"):
171
  checkboxes = []
172
  sliders = []
173
-
174
- # Define slider configurations
175
  slider_configs = [
176
- ("af", "af"), ("af_bella", "af_bella"), ("af_sarah", "af_sarah"),
177
- ("af_nicole", "af_nicole"), ("af_sky", "af_sky"), ("am_adam", "am_adam"),
178
- ("am_michael", "am_michael"), ("bf_emma", "bf_emma"),
179
- ("bf_isabella", "bf_isabella"), ("bm_george", "bm_george"),
180
- ("bm_lewis", "bm_lewis")
 
 
 
 
 
 
181
  ]
182
 
183
  # Create columns for each slider
184
- for label, name in slider_configs:
185
  with gr.Column(min_width=70, scale=1, variant="default", elem_classes="vert-group"):
186
  checkbox = gr.Checkbox(label='')
187
- slider = gr.Slider(label=name, minimum=0, maximum=1, interactive=False, value=0, step=0.01)
188
  checkboxes.append(checkbox)
189
  sliders.append(slider)
190
 
191
  # Add voice combination formula display
192
  with gr.Row(equal_height=True):
193
- formula_display = gr.Textbox(label="Voice Combination Formula", value="", lines=2, scale=4)
194
- input_text = gr.Textbox(label="Input Text", placeholder="Enter text to convert to speech", lines=2, scale=4)
195
- button_tts = gr.Button("Generate Voice", scale=2, min_width=100)
 
 
 
 
 
 
 
 
 
 
 
196
 
197
  # Generate speech from the selected custom voice
198
  with gr.Row(equal_height=True):
@@ -208,7 +240,7 @@ with gr.Blocks(css=custom_css, theme="ocean") as demo:
208
  slider_values = list(values[n:])
209
 
210
  # Get active sliders and their names
211
- active_pairs = [(slider_values[i], slider_configs[i][1])
212
  for i in range(len(slider_configs))
213
  if checkbox_values[i] and slider_values[i] > 0]
214
 
@@ -221,7 +253,11 @@ with gr.Blocks(css=custom_css, theme="ocean") as demo:
221
  if total_sum == 0:
222
  return ""
223
 
224
- # Generate normalized formula
 
 
 
 
225
  terms = []
226
  for value, name in active_pairs:
227
  normalized_value = value / total_sum
@@ -232,7 +268,7 @@ with gr.Blocks(css=custom_css, theme="ocean") as demo:
232
  def check_box(checkbox):
233
  """Handle checkbox changes."""
234
  if checkbox:
235
- return gr.Slider(interactive=True, value=0.5)
236
  else:
237
  return gr.Slider(interactive=False, value=0)
238
 
@@ -246,7 +282,6 @@ with gr.Blocks(css=custom_css, theme="ocean") as demo:
246
  inputs=[checkbox],
247
  outputs=[slider]
248
  )
249
-
250
  # Update formula on checkbox changes
251
  checkbox.change(
252
  fn=generate_voice_formula,
@@ -264,9 +299,10 @@ with gr.Blocks(css=custom_css, theme="ocean") as demo:
264
 
265
  button_tts.click(
266
  fn=text_to_speech,
267
- inputs=[input_text, formula_display,],
268
  outputs=[kokoro_tts]
269
  )
270
 
 
271
  if __name__ == "__main__":
272
  demo.launch()
 
24
  }
25
 
26
 
27
+ def parse_voice_formula(formula):
28
+ """Parse the voice formula string and return the combined voice tensor."""
29
+ if not formula.strip():
30
+ raise ValueError("Empty voice formula")
31
+
32
+ # Initialize the weighted sum
33
+ weighted_sum = None
34
+
35
+ # Split the formula into terms
36
+ terms = formula.split('+')
37
+
38
+ for term in terms:
39
+ # Parse each term (format: "0.333 * voice_name")
40
+ weight, voice_name = term.strip().split('*')
41
+ weight = float(weight.strip())
42
+ voice_name = voice_name.strip()
43
+
44
+ # Get the voice tensor
45
+ if voice_name not in voices:
46
+ raise ValueError(f"Unknown voice: {voice_name}")
47
+
48
+ voice_tensor = voices[voice_name]
49
+
50
+ # Add to weighted sum
51
+ if weighted_sum is None:
52
+ weighted_sum = weight * voice_tensor
53
+ else:
54
+ weighted_sum += weight * voice_tensor
55
+
56
+ return weighted_sum
57
+
58
+ def get_new_voice(formula):
59
+ try:
60
+ # Parse the formula and get the combined voice tensor
61
+ weighted_voices = parse_voice_formula(formula)
62
+
63
+ # Save and load the combined voice
64
+ torch.save(weighted_voices, "weighted_normalised_voices.pt")
65
+ VOICEPACK = torch.load("weighted_normalised_voices.pt", weights_only=False).to(device)
66
+ return VOICEPACK
67
+ except Exception as e:
68
+ raise gr.Error(f"Failed to create voice: {str(e)}")
69
+
70
+ def text_to_speech(text, formula):
71
+ try:
72
+ if not text.strip():
73
+ raise gr.Error("Please enter some text")
74
+
75
+ if not formula.strip():
76
+ raise gr.Error("Please select at least one voice")
77
+
78
+ # Get the combined voice
79
+ VOICEPACK = get_new_voice(formula)
80
+
81
+ # Generate audio
82
+ audio, phonemes = generate(MODEL, text, VOICEPACK, lang='a')
83
+ return (24000, audio)
84
+ except Exception as e:
85
+ raise gr.Error(f"Failed to generate speech: {str(e)}")
86
+
87
+
88
  custom_css = """
89
  .container-wrap {
90
  display: flex !important;
91
  gap: 5px !important;
92
+ justify-content: center !important;
93
+ margin: 0 auto !important;
94
+ max-width: 1400px !important; /* Increased max-width */
95
  }
96
 
97
  .vert-group {
98
+ min-width: 100px !important; /* Increased from 80px */
99
+ width: 120px !important; /* Increased from 90px */
100
  flex: 0 0 auto !important;
101
  }
102
 
 
104
  white-space: nowrap !important;
105
  overflow: visible !important;
106
  width: auto !important;
107
+ font-size: 0.85em !important; /* Slightly increased font size */
108
  transform-origin: left center !important;
109
  transform: rotate(0deg) translateX(-50%) !important;
110
  position: relative !important;
 
112
  display: inline-block !important;
113
  text-align: center !important;
114
  margin-bottom: 5px !important;
115
+ padding: 0 5px !important; /* Added padding */
116
  }
117
 
118
  .vert-group .wrap label {
 
124
  .slider_input_container {
125
  height: 200px !important;
126
  position: relative !important;
127
+ width: 50px !important; /* Increased from 40px */
128
  margin: 0 auto !important;
129
  overflow: hidden !important;
130
  }
131
 
 
 
 
 
 
 
 
 
 
132
  .slider_input_container input[type="range"] {
133
  position: absolute !important;
134
  width: 200px !important;
135
+ left: -75px !important; /* Adjusted from -80px */
136
  top: 100px !important;
137
  transform: rotate(90deg) !important;
138
  }
 
158
  border: none !important;
159
  min-width: unset !important;
160
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
+ .heading {
163
+ text-align: center !important;
164
+ margin-bottom: 1rem !important;
165
+ }
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
+ .description {
168
+ text-align: center !important;
169
+ margin-bottom: 2rem !important;
170
+ color: rgba(255, 255, 255, 0.7) !important;
171
+ }
172
+ """
173
 
174
  with gr.Blocks(css=custom_css, theme="ocean") as demo:
175
+ gr.Markdown(
176
+ """
177
+ # 🎙️ Voice Mixer - Kokoro TTS
178
+ ### Mix and match different voices to create your perfect text-to-speech voice
179
+
180
+ This app lets you combine multiple voices with different weights to create custom voice combinations.
181
+ Select voices using checkboxes and adjust their weights using the sliders below.
182
+ """
183
+ )
184
+
185
  with gr.Row(variant="default", equal_height=True, elem_classes="container-wrap"):
186
  checkboxes = []
187
  sliders = []
188
+
189
+ # Define slider configurations with emojis
190
  slider_configs = [
191
+ ("af", "Default 👩‍🦰"),
192
+ ("af_bella", "Bella 👩‍🦰 🇺🇸"),
193
+ ("af_sarah", "Sarah 👩‍🦰 🇺🇸"),
194
+ ("af_nicole", "Nicole 👩‍🦰 🇺🇸"),
195
+ ("af_sky", "Sky 👩‍🦰 🇺🇸"),
196
+ ("am_adam", "Adam 👨 🇺🇸"),
197
+ ("am_michael", "Michael 👨 🇺🇸"),
198
+ ("bf_emma", "Emma 👩‍🦰 🇬🇧"),
199
+ ("bf_isabella", "Isabella 👩‍🦰 🇬🇧"),
200
+ ("bm_george", "George 👨 🇬🇧"),
201
+ ("bm_lewis", "Lewis 👨 🇬🇧")
202
  ]
203
 
204
  # Create columns for each slider
205
+ for value, label in slider_configs:
206
  with gr.Column(min_width=70, scale=1, variant="default", elem_classes="vert-group"):
207
  checkbox = gr.Checkbox(label='')
208
+ slider = gr.Slider(label=label, minimum=0, maximum=1, interactive=False, value=0, step=0.01)
209
  checkboxes.append(checkbox)
210
  sliders.append(slider)
211
 
212
  # Add voice combination formula display
213
  with gr.Row(equal_height=True):
214
+ formula_display = gr.Textbox(
215
+ label="Voice Combination Formula",
216
+ value="",
217
+ lines=2,
218
+ scale=4,
219
+ interactive=False
220
+ )
221
+ input_text = gr.Textbox(
222
+ label="Input Text",
223
+ placeholder="Enter text to convert to speech",
224
+ lines=2,
225
+ scale=4
226
+ )
227
+ button_tts = gr.Button("🎙️ Generate Voice", scale=2, min_width=100)
228
 
229
  # Generate speech from the selected custom voice
230
  with gr.Row(equal_height=True):
 
240
  slider_values = list(values[n:])
241
 
242
  # Get active sliders and their names
243
+ active_pairs = [(slider_values[i], slider_configs[i][0]) # Use value instead of label
244
  for i in range(len(slider_configs))
245
  if checkbox_values[i] and slider_values[i] > 0]
246
 
 
253
  if total_sum == 0:
254
  return ""
255
 
256
+ # For single voice, always use weight 1.0
257
+ if len(active_pairs) == 1:
258
+ return f"1.000 * {active_pairs[0][1]}"
259
+
260
+ # Generate normalized formula for multiple voices
261
  terms = []
262
  for value, name in active_pairs:
263
  normalized_value = value / total_sum
 
268
  def check_box(checkbox):
269
  """Handle checkbox changes."""
270
  if checkbox:
271
+ return gr.Slider(interactive=True, value=1.0) # Changed default to 1.0
272
  else:
273
  return gr.Slider(interactive=False, value=0)
274
 
 
282
  inputs=[checkbox],
283
  outputs=[slider]
284
  )
 
285
  # Update formula on checkbox changes
286
  checkbox.change(
287
  fn=generate_voice_formula,
 
299
 
300
  button_tts.click(
301
  fn=text_to_speech,
302
+ inputs=[input_text, formula_display],
303
  outputs=[kokoro_tts]
304
  )
305
 
306
+
307
  if __name__ == "__main__":
308
  demo.launch()