page2speech / app.py
lambdaofgod's picture
zerogpu update
096bc17
raw
history blame
4.88 kB
import gradio as gr
import requests
from bs4 import BeautifulSoup
from kokoro_tts import generate_audio
import logging
logging.basicConfig(level=logging.INFO)
class Voices:
flags = {
"a": "๐Ÿ‡บ๐Ÿ‡ธ",
"b": "๐Ÿ‡ฌ๐Ÿ‡ง",
"e": "๐Ÿ‡ช๐Ÿ‡ธ",
"f": "๐Ÿ‡ซ๐Ÿ‡ท",
"h": "๐Ÿ‡ฎ๐Ÿ‡ณ",
"i": "๐Ÿ‡ฎ๐Ÿ‡น",
"j": "๐Ÿ‡ฏ๐Ÿ‡ต",
"p": "๐Ÿ‡ง๐Ÿ‡ท",
"z": "๐Ÿ‡จ๐Ÿ‡ณ",
}
flags_win = {
"a": "american",
"b": "british",
"e": "spanish",
"f": "french",
"h": "hindi",
"i": "italian",
"j": "japanese",
"p": "portuguese",
"z": "chinese",
}
voices = {
"a": [
"af_alloy",
"af_aoede",
"af_bella",
"af_heart",
"af_jessica",
"af_kore",
"af_nicole",
"af_nova",
"af_river",
"af_sarah",
"af_sky",
"am_adam",
"am_echo",
"am_eric",
"am_fenrir",
"am_liam",
"am_michael",
"am_onyx",
"am_puck",
"am_santa",
],
"b": [
"bf_alice",
"bf_emma",
"bf_isabella",
"bf_lily",
"bm_daniel",
"bm_fable",
"bm_george",
"bm_lewis",
],
"e": ["ef_dora", "em_alex", "em_santa"],
"f": ["ff_siwis"],
"h": ["hf_alpha", "hf_beta", "hm_omega", "hm_psi"],
"i": ["if_sara", "im_nicola"],
"j": ["jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo"],
"p": ["pf_dora", "pm_alex", "pm_santa"],
"z": [
"zf_xiaobei",
"zf_xiaoni",
"zf_xiaoxiao",
"zf_xiaoyi",
"zm_yunjian",
"zm_yunxi",
"zm_yunxia",
"zm_yunyang",
],
}
def extract_text_from_url(url):
try:
# Download the webpage content
response = requests.get(url)
response.raise_for_status() # Raise an exception for bad status codes
# Parse the HTML content
soup = BeautifulSoup(response.text, "html.parser")
# Remove script and style elements
for script in soup(["script", "style"]):
script.decompose()
# Get text and clean it up
text = soup.get_text(separator="\n", strip=True)
# Remove excessive newlines and whitespace
lines = (line.strip() for line in text.splitlines())
text = "\n".join(line for line in lines if line)
return text
except Exception as e:
return f"Error: {str(e)}"
def get_language_choices():
return [
(f"{Voices.flags[code]} {Voices.flags_win[code].title()}", code)
for code in Voices.voices.keys()
]
def get_voice_choices(lang_code):
if lang_code in Voices.voices:
return Voices.voices[lang_code]
return []
def text_to_audio(text, lang_code, voice, progress=gr.Progress()):
try:
audio_data = generate_audio(
text, lang_code=lang_code, voice=voice, progress=progress
)
return (24000, audio_data) # Return tuple of (sample_rate, audio_data)
except Exception as e:
print(f"Error generating audio: {e}")
return None
# Create Gradio interface
with gr.Blocks(title="Web Page Text Extractor & Audio Generator") as demo:
gr.Markdown("# Web Page Text Extractor & Audio Generator")
gr.Markdown(
"Scrape a website and generate text using [hexgrad/Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M)"
)
with gr.Row():
url_input = gr.Textbox(
label="Enter URL", value="https://paulgraham.com/words.html"
)
extract_btn = gr.Button("Extract Text")
text_output = gr.Textbox(label="Extracted Text", lines=10, interactive=True)
with gr.Row():
lang_dropdown = gr.Dropdown(
choices=get_language_choices(),
label="Language",
value="a", # Default to English
)
voice_dropdown = gr.Dropdown(
choices=Voices.voices["a"], # Default to English voices
label="Voice",
value="am_onyx", # Default voice
)
generate_btn = gr.Button("Generate Audio")
audio_output = gr.Audio(label="Generated Audio")
def update_voices(lang_code):
return gr.Dropdown(choices=get_voice_choices(lang_code))
extract_btn.click(fn=extract_text_from_url, inputs=url_input, outputs=text_output)
lang_dropdown.change(fn=update_voices, inputs=lang_dropdown, outputs=voice_dropdown)
generate_btn.click(
fn=text_to_audio,
inputs=[text_output, lang_dropdown, voice_dropdown],
outputs=audio_output,
)
if __name__ == "__main__":
demo.launch()