Spaces:

lambdaofgod
/

page2speech

Running on Zero

File size: 4,878 Bytes

import gradio as gr
import requests
from bs4 import BeautifulSoup
from kokoro_tts import generate_audio
import logging


logging.basicConfig(level=logging.INFO)


class Voices:
    flags = {
        "a": "🇺🇸",
        "b": "🇬🇧",
        "e": "🇪🇸",
        "f": "🇫🇷",
        "h": "🇮🇳",
        "i": "🇮🇹",
        "j": "🇯🇵",
        "p": "🇧🇷",
        "z": "🇨🇳",
    }

    flags_win = {
        "a": "american",
        "b": "british",
        "e": "spanish",
        "f": "french",
        "h": "hindi",
        "i": "italian",
        "j": "japanese",
        "p": "portuguese",
        "z": "chinese",
    }

    voices = {
        "a": [
            "af_alloy",
            "af_aoede",
            "af_bella",
            "af_heart",
            "af_jessica",
            "af_kore",
            "af_nicole",
            "af_nova",
            "af_river",
            "af_sarah",
            "af_sky",
            "am_adam",
            "am_echo",
            "am_eric",
            "am_fenrir",
            "am_liam",
            "am_michael",
            "am_onyx",
            "am_puck",
            "am_santa",
        ],
        "b": [
            "bf_alice",
            "bf_emma",
            "bf_isabella",
            "bf_lily",
            "bm_daniel",
            "bm_fable",
            "bm_george",
            "bm_lewis",
        ],
        "e": ["ef_dora", "em_alex", "em_santa"],
        "f": ["ff_siwis"],
        "h": ["hf_alpha", "hf_beta", "hm_omega", "hm_psi"],
        "i": ["if_sara", "im_nicola"],
        "j": ["jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo"],
        "p": ["pf_dora", "pm_alex", "pm_santa"],
        "z": [
            "zf_xiaobei",
            "zf_xiaoni",
            "zf_xiaoxiao",
            "zf_xiaoyi",
            "zm_yunjian",
            "zm_yunxi",
            "zm_yunxia",
            "zm_yunyang",
        ],
    }


def extract_text_from_url(url):
    try:
        # Download the webpage content
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for bad status codes

        # Parse the HTML content
        soup = BeautifulSoup(response.text, "html.parser")

        # Remove script and style elements
        for script in soup(["script", "style"]):
            script.decompose()

        # Get text and clean it up
        text = soup.get_text(separator="\n", strip=True)

        # Remove excessive newlines and whitespace
        lines = (line.strip() for line in text.splitlines())
        text = "\n".join(line for line in lines if line)

        return text
    except Exception as e:
        return f"Error: {str(e)}"


def get_language_choices():
    return [
        (f"{Voices.flags[code]} {Voices.flags_win[code].title()}", code)
        for code in Voices.voices.keys()
    ]


def get_voice_choices(lang_code):
    if lang_code in Voices.voices:
        return Voices.voices[lang_code]
    return []


def text_to_audio(text, lang_code, voice, progress=gr.Progress()):
    try:
        audio_data = generate_audio(
            text, lang_code=lang_code, voice=voice, progress=progress
        )
        return (24000, audio_data)  # Return tuple of (sample_rate, audio_data)
    except Exception as e:
        print(f"Error generating audio: {e}")
        return None


# Create Gradio interface
with gr.Blocks(title="Web Page Text Extractor & Audio Generator") as demo:
    gr.Markdown("# Web Page Text Extractor & Audio Generator")
    gr.Markdown(
        "Scrape a website and generate text using [hexgrad/Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M)"
    )
    with gr.Row():
        url_input = gr.Textbox(
            label="Enter URL", value="https://paulgraham.com/words.html"
        )
        extract_btn = gr.Button("Extract Text")

    text_output = gr.Textbox(label="Extracted Text", lines=10, interactive=True)

    with gr.Row():
        lang_dropdown = gr.Dropdown(
            choices=get_language_choices(),
            label="Language",
            value="a",  # Default to English
        )
        voice_dropdown = gr.Dropdown(
            choices=Voices.voices["a"],  # Default to English voices
            label="Voice",
            value="am_onyx",  # Default voice
        )

    generate_btn = gr.Button("Generate Audio")
    audio_output = gr.Audio(label="Generated Audio")

    def update_voices(lang_code):
        return gr.Dropdown(choices=get_voice_choices(lang_code))

    extract_btn.click(fn=extract_text_from_url, inputs=url_input, outputs=text_output)
    lang_dropdown.change(fn=update_voices, inputs=lang_dropdown, outputs=voice_dropdown)
    generate_btn.click(
        fn=text_to_audio,
        inputs=[text_output, lang_dropdown, voice_dropdown],
        outputs=audio_output,
    )

if __name__ == "__main__":
    demo.launch()