Spaces:

ysharma
/

Make_Custom_Voices_With_KokoroTTS

Running on T4

App Files Files Community

ysharma HF staff commited on 4 days ago

Commit

c845ca2

verified ·

1 Parent(s): b2fcaf7

Update app.py

Browse files

Files changed (1) hide show

app.py +128 -92

app.py CHANGED Viewed

@@ -24,15 +24,79 @@ voices = {
 }
 custom_css = """
 .container-wrap {
     display: flex !important;
     gap: 5px !important;
 }
 .vert-group {
-    min-width: 80px !important;
-    width: 90px !important;
     flex: 0 0 auto !important;
 }
@@ -40,7 +104,7 @@ custom_css = """
     white-space: nowrap !important;
     overflow: visible !important;
     width: auto !important;
-    font-size: 0.8em !important;
     transform-origin: left center !important;
     transform: rotate(0deg) translateX(-50%) !important;
     position: relative !important;
@@ -48,6 +112,7 @@ custom_css = """
     display: inline-block !important;
     text-align: center !important;
     margin-bottom: 5px !important;
 }
 .vert-group .wrap label {
@@ -59,24 +124,15 @@ custom_css = """
 .slider_input_container {
     height: 200px !important;
     position: relative !important;
-    width: 40px !important;
     margin: 0 auto !important;
     overflow: hidden !important;
 }
-::-webkit-scrollbar {
-    display: none !important;
-}
-* {
-    -ms-overflow-style: none !important;
-    scrollbar-width: none !important;
-}
 .slider_input_container input[type="range"] {
     position: absolute !important;
     width: 200px !important;
-    left: -80px !important;
     top: 100px !important;
     transform: rotate(90deg) !important;
 }
@@ -102,97 +158,73 @@ custom_css = """
     border: none !important;
     min-width: unset !important;
 }
-"""
-def parse_voice_formula(formula):
-    """Parse the voice formula string and return the combined voice tensor."""
-    if not formula.strip():
-        raise ValueError("Empty voice formula")
-    # Initialize the weighted sum
-    weighted_sum = None
-    # Split the formula into terms
-    terms = formula.split('+')
-    for term in terms:
-        # Parse each term (format: "0.333 * voice_name")
-        weight, voice_name = term.strip().split('*')
-        weight = float(weight.strip())
-        voice_name = voice_name.strip()
-        # Get the voice tensor
-        if voice_name not in voices:
-            raise ValueError(f"Unknown voice: {voice_name}")
-        voice_tensor = voices[voice_name]
-        # Add to weighted sum
-        if weighted_sum is None:
-            weighted_sum = weight * voice_tensor
-        else:
-            weighted_sum += weight * voice_tensor
-    return weighted_sum
-def get_new_voice(formula):
-    try:
-        # Parse the formula and get the combined voice tensor
-        weighted_voices = parse_voice_formula(formula)
-        # Save and load the combined voice
-        torch.save(weighted_voices, "weighted_normalised_voices.pt")
-        VOICEPACK = torch.load("weighted_normalised_voices.pt", weights_only=False).to(device)
-        return VOICEPACK
-    except Exception as e:
-        raise gr.Error(f"Failed to create voice: {str(e)}")
-def text_to_speech(text, formula):
-    try:
-        if not text.strip():
-            raise gr.Error("Please enter some text")
-        if not formula.strip():
-            raise gr.Error("Please select at least one voice")
-        # Get the combined voice
-        VOICEPACK = get_new_voice(formula)
-        # Generate audio
-        audio, phonemes = generate(MODEL, text, VOICEPACK, lang='a')
-        return (24000, audio)
-    except Exception as e:
-        raise gr.Error(f"Failed to generate speech: {str(e)}")
 with gr.Blocks(css=custom_css, theme="ocean") as demo:
     with gr.Row(variant="default", equal_height=True, elem_classes="container-wrap"):
         checkboxes = []
         sliders = []
-        # Define slider configurations
         slider_configs = [
-            ("af", "af"), ("af_bella", "af_bella"), ("af_sarah", "af_sarah"),
-            ("af_nicole", "af_nicole"), ("af_sky", "af_sky"), ("am_adam", "am_adam"),
-            ("am_michael", "am_michael"), ("bf_emma", "bf_emma"),
-            ("bf_isabella", "bf_isabella"), ("bm_george", "bm_george"),
-            ("bm_lewis", "bm_lewis")
         ]
         # Create columns for each slider
-        for label, name in slider_configs:
             with gr.Column(min_width=70, scale=1, variant="default", elem_classes="vert-group"):
                 checkbox = gr.Checkbox(label='')
-                slider = gr.Slider(label=name, minimum=0, maximum=1, interactive=False, value=0, step=0.01)
                 checkboxes.append(checkbox)
                 sliders.append(slider)
     # Add voice combination formula display
     with gr.Row(equal_height=True):
-        formula_display = gr.Textbox(label="Voice Combination Formula", value="", lines=2, scale=4)
-        input_text = gr.Textbox(label="Input Text", placeholder="Enter text to convert to speech", lines=2, scale=4)
-        button_tts = gr.Button("Generate Voice", scale=2, min_width=100)
     # Generate speech from the selected custom voice
     with gr.Row(equal_height=True):
@@ -208,7 +240,7 @@ with gr.Blocks(css=custom_css, theme="ocean") as demo:
         slider_values = list(values[n:])
         # Get active sliders and their names
-        active_pairs = [(slider_values[i], slider_configs[i][1])
                        for i in range(len(slider_configs))
                        if checkbox_values[i] and slider_values[i] > 0]
@@ -221,7 +253,11 @@ with gr.Blocks(css=custom_css, theme="ocean") as demo:
         if total_sum == 0:
             return ""
-        # Generate normalized formula
         terms = []
         for value, name in active_pairs:
             normalized_value = value / total_sum
@@ -232,7 +268,7 @@ with gr.Blocks(css=custom_css, theme="ocean") as demo:
     def check_box(checkbox):
         """Handle checkbox changes."""
         if checkbox:
-            return gr.Slider(interactive=True, value=0.5)
         else:
             return gr.Slider(interactive=False, value=0)
@@ -246,7 +282,6 @@ with gr.Blocks(css=custom_css, theme="ocean") as demo:
             inputs=[checkbox],
             outputs=[slider]
         )
         # Update formula on checkbox changes
         checkbox.change(
             fn=generate_voice_formula,
@@ -264,9 +299,10 @@ with gr.Blocks(css=custom_css, theme="ocean") as demo:
     button_tts.click(
         fn=text_to_speech,
-        inputs=[input_text, formula_display,],
         outputs=[kokoro_tts]
     )
 if __name__ == "__main__":
     demo.launch()

 }
+def parse_voice_formula(formula):
+    """Parse the voice formula string and return the combined voice tensor."""
+    if not formula.strip():
+        raise ValueError("Empty voice formula")
+    # Initialize the weighted sum
+    weighted_sum = None
+    # Split the formula into terms
+    terms = formula.split('+')
+    for term in terms:
+        # Parse each term (format: "0.333 * voice_name")
+        weight, voice_name = term.strip().split('*')
+        weight = float(weight.strip())
+        voice_name = voice_name.strip()
+        # Get the voice tensor
+        if voice_name not in voices:
+            raise ValueError(f"Unknown voice: {voice_name}")
+        voice_tensor = voices[voice_name]
+        # Add to weighted sum
+        if weighted_sum is None:
+            weighted_sum = weight * voice_tensor
+        else:
+            weighted_sum += weight * voice_tensor
+    return weighted_sum
+def get_new_voice(formula):
+    try:
+        # Parse the formula and get the combined voice tensor
+        weighted_voices = parse_voice_formula(formula)
+        # Save and load the combined voice
+        torch.save(weighted_voices, "weighted_normalised_voices.pt")
+        VOICEPACK = torch.load("weighted_normalised_voices.pt", weights_only=False).to(device)
+        return VOICEPACK
+    except Exception as e:
+        raise gr.Error(f"Failed to create voice: {str(e)}")
+def text_to_speech(text, formula):
+    try:
+        if not text.strip():
+            raise gr.Error("Please enter some text")
+        if not formula.strip():
+            raise gr.Error("Please select at least one voice")
+        # Get the combined voice
+        VOICEPACK = get_new_voice(formula)
+        # Generate audio
+        audio, phonemes = generate(MODEL, text, VOICEPACK, lang='a')
+        return (24000, audio)
+    except Exception as e:
+        raise gr.Error(f"Failed to generate speech: {str(e)}")
 custom_css = """
 .container-wrap {
     display: flex !important;
     gap: 5px !important;
+    justify-content: center !important;
+    margin: 0 auto !important;
+    max-width: 1400px !important;  /* Increased max-width */
 }
 .vert-group {
+    min-width: 100px !important;   /* Increased from 80px */
+    width: 120px !important;       /* Increased from 90px */
     flex: 0 0 auto !important;
 }
     white-space: nowrap !important;
     overflow: visible !important;
     width: auto !important;
+    font-size: 0.85em !important;  /* Slightly increased font size */
     transform-origin: left center !important;
     transform: rotate(0deg) translateX(-50%) !important;
     position: relative !important;
     display: inline-block !important;
     text-align: center !important;
     margin-bottom: 5px !important;
+    padding: 0 5px !important;     /* Added padding */
 }
 .vert-group .wrap label {
 .slider_input_container {
     height: 200px !important;
     position: relative !important;
+    width: 50px !important;        /* Increased from 40px */
     margin: 0 auto !important;
     overflow: hidden !important;
 }
 .slider_input_container input[type="range"] {
     position: absolute !important;
     width: 200px !important;
+    left: -75px !important;        /* Adjusted from -80px */
     top: 100px !important;
     transform: rotate(90deg) !important;
 }
     border: none !important;
     min-width: unset !important;
 }
+.heading {
+    text-align: center !important;
+    margin-bottom: 1rem !important;
+}
+.description {
+    text-align: center !important;
+    margin-bottom: 2rem !important;
+    color: rgba(255, 255, 255, 0.7) !important;
+}
+"""
 with gr.Blocks(css=custom_css, theme="ocean") as demo:
+    gr.Markdown(
+        """
+        # 🎙️ Voice Mixer - Kokoro TTS
+        ### Mix and match different voices to create your perfect text-to-speech voice
+        This app lets you combine multiple voices with different weights to create custom voice combinations.
+        Select voices using checkboxes and adjust their weights using the sliders below.
+        """
+    )
     with gr.Row(variant="default", equal_height=True, elem_classes="container-wrap"):
         checkboxes = []
         sliders = []
+        # Define slider configurations with emojis
         slider_configs = [
+            ("af", "Default 👩‍🦰"),
+            ("af_bella", "Bella 👩‍🦰 🇺🇸"),
+            ("af_sarah", "Sarah 👩‍🦰 🇺🇸"),
+            ("af_nicole", "Nicole 👩‍🦰 🇺🇸"),
+            ("af_sky", "Sky 👩‍🦰 🇺🇸"),
+            ("am_adam", "Adam 👨 🇺🇸"),
+            ("am_michael", "Michael 👨 🇺🇸"),
+            ("bf_emma", "Emma 👩‍🦰 🇬🇧"),
+            ("bf_isabella", "Isabella 👩‍🦰 🇬🇧"),
+            ("bm_george", "George 👨 🇬🇧"),
+            ("bm_lewis", "Lewis 👨 🇬🇧")
         ]
         # Create columns for each slider
+        for value, label in slider_configs:
             with gr.Column(min_width=70, scale=1, variant="default", elem_classes="vert-group"):
                 checkbox = gr.Checkbox(label='')
+                slider = gr.Slider(label=label, minimum=0, maximum=1, interactive=False, value=0, step=0.01)
                 checkboxes.append(checkbox)
                 sliders.append(slider)
     # Add voice combination formula display
     with gr.Row(equal_height=True):
+        formula_display = gr.Textbox(
+            label="Voice Combination Formula",
+            value="",
+            lines=2,
+            scale=4,
+            interactive=False
+        )
+        input_text = gr.Textbox(
+            label="Input Text",
+            placeholder="Enter text to convert to speech",
+            lines=2,
+            scale=4
+        )
+        button_tts = gr.Button("🎙️ Generate Voice", scale=2, min_width=100)
     # Generate speech from the selected custom voice
     with gr.Row(equal_height=True):
         slider_values = list(values[n:])
         # Get active sliders and their names
+        active_pairs = [(slider_values[i], slider_configs[i][0])  # Use value instead of label
                        for i in range(len(slider_configs))
                        if checkbox_values[i] and slider_values[i] > 0]
         if total_sum == 0:
             return ""
+        # For single voice, always use weight 1.0
+        if len(active_pairs) == 1:
+            return f"1.000 * {active_pairs[0][1]}"
+        # Generate normalized formula for multiple voices
         terms = []
         for value, name in active_pairs:
             normalized_value = value / total_sum
     def check_box(checkbox):
         """Handle checkbox changes."""
         if checkbox:
+            return gr.Slider(interactive=True, value=1.0)  # Changed default to 1.0
         else:
             return gr.Slider(interactive=False, value=0)
             inputs=[checkbox],
             outputs=[slider]
         )
         # Update formula on checkbox changes
         checkbox.change(
             fn=generate_voice_formula,
     button_tts.click(
         fn=text_to_speech,
+        inputs=[input_text, formula_display],
         outputs=[kokoro_tts]
     )
 if __name__ == "__main__":
     demo.launch()