Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
from kokoro_tts import generate_audio | |
import logging | |
logging.basicConfig(level=logging.INFO) | |
class Voices: | |
flags = { | |
"a": "๐บ๐ธ", | |
"b": "๐ฌ๐ง", | |
"e": "๐ช๐ธ", | |
"f": "๐ซ๐ท", | |
"h": "๐ฎ๐ณ", | |
"i": "๐ฎ๐น", | |
"j": "๐ฏ๐ต", | |
"p": "๐ง๐ท", | |
"z": "๐จ๐ณ", | |
} | |
flags_win = { | |
"a": "american", | |
"b": "british", | |
"e": "spanish", | |
"f": "french", | |
"h": "hindi", | |
"i": "italian", | |
"j": "japanese", | |
"p": "portuguese", | |
"z": "chinese", | |
} | |
voices = { | |
"a": [ | |
"af_alloy", | |
"af_aoede", | |
"af_bella", | |
"af_heart", | |
"af_jessica", | |
"af_kore", | |
"af_nicole", | |
"af_nova", | |
"af_river", | |
"af_sarah", | |
"af_sky", | |
"am_adam", | |
"am_echo", | |
"am_eric", | |
"am_fenrir", | |
"am_liam", | |
"am_michael", | |
"am_onyx", | |
"am_puck", | |
"am_santa", | |
], | |
"b": [ | |
"bf_alice", | |
"bf_emma", | |
"bf_isabella", | |
"bf_lily", | |
"bm_daniel", | |
"bm_fable", | |
"bm_george", | |
"bm_lewis", | |
], | |
"e": ["ef_dora", "em_alex", "em_santa"], | |
"f": ["ff_siwis"], | |
"h": ["hf_alpha", "hf_beta", "hm_omega", "hm_psi"], | |
"i": ["if_sara", "im_nicola"], | |
"j": ["jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo"], | |
"p": ["pf_dora", "pm_alex", "pm_santa"], | |
"z": [ | |
"zf_xiaobei", | |
"zf_xiaoni", | |
"zf_xiaoxiao", | |
"zf_xiaoyi", | |
"zm_yunjian", | |
"zm_yunxi", | |
"zm_yunxia", | |
"zm_yunyang", | |
], | |
} | |
def extract_text_from_url(url): | |
try: | |
# Download the webpage content | |
response = requests.get(url) | |
response.raise_for_status() # Raise an exception for bad status codes | |
# Parse the HTML content | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Remove script and style elements | |
for script in soup(["script", "style"]): | |
script.decompose() | |
# Get text and clean it up | |
text = soup.get_text(separator="\n", strip=True) | |
# Remove excessive newlines and whitespace | |
lines = (line.strip() for line in text.splitlines()) | |
text = "\n".join(line for line in lines if line) | |
return text | |
except Exception as e: | |
return f"Error: {str(e)}" | |
def get_language_choices(): | |
return [ | |
(f"{Voices.flags[code]} {Voices.flags_win[code].title()}", code) | |
for code in Voices.voices.keys() | |
] | |
def get_voice_choices(lang_code): | |
if lang_code in Voices.voices: | |
return Voices.voices[lang_code] | |
return [] | |
def text_to_audio(text, lang_code, voice, progress=gr.Progress()): | |
try: | |
audio_data = generate_audio( | |
text, lang_code=lang_code, voice=voice, progress=progress | |
) | |
return (24000, audio_data) # Return tuple of (sample_rate, audio_data) | |
except Exception as e: | |
print(f"Error generating audio: {e}") | |
return None | |
# Create Gradio interface | |
with gr.Blocks(title="Web Page Text Extractor & Audio Generator") as demo: | |
gr.Markdown("# Web Page Text Extractor & Audio Generator") | |
gr.Markdown( | |
"Scrape a website and generate text using [hexgrad/Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M)" | |
) | |
with gr.Row(): | |
url_input = gr.Textbox( | |
label="Enter URL", value="https://paulgraham.com/words.html" | |
) | |
extract_btn = gr.Button("Extract Text") | |
text_output = gr.Textbox(label="Extracted Text", lines=10, interactive=True) | |
with gr.Row(): | |
lang_dropdown = gr.Dropdown( | |
choices=get_language_choices(), | |
label="Language", | |
value="a", # Default to English | |
) | |
voice_dropdown = gr.Dropdown( | |
choices=Voices.voices["a"], # Default to English voices | |
label="Voice", | |
value="am_onyx", # Default voice | |
) | |
generate_btn = gr.Button("Generate Audio") | |
audio_output = gr.Audio(label="Generated Audio") | |
def update_voices(lang_code): | |
return gr.Dropdown(choices=get_voice_choices(lang_code)) | |
extract_btn.click(fn=extract_text_from_url, inputs=url_input, outputs=text_output) | |
lang_dropdown.change(fn=update_voices, inputs=lang_dropdown, outputs=voice_dropdown) | |
generate_btn.click( | |
fn=text_to_audio, | |
inputs=[text_output, lang_dropdown, voice_dropdown], | |
outputs=audio_output, | |
) | |
if __name__ == "__main__": | |
demo.launch() | |