lambdaofgod commited on
Commit
8035662
·
1 Parent(s): af74b58
Files changed (4) hide show
  1. app.py +175 -0
  2. kokoro_tts.py +55 -0
  3. pyproject.toml +13 -0
  4. requirements.txt +119 -0
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import soundfile as sf
5
+ from kokoro_tts import generate_audio
6
+
7
+
8
+ class Voices:
9
+ flags = {
10
+ "a": "🇺🇸",
11
+ "b": "🇬🇧",
12
+ "e": "🇪🇸",
13
+ "f": "🇫🇷",
14
+ "h": "🇮🇳",
15
+ "i": "🇮🇹",
16
+ "j": "🇯🇵",
17
+ "p": "🇧🇷",
18
+ "z": "🇨🇳",
19
+ }
20
+
21
+ flags_win = {
22
+ "a": "american",
23
+ "b": "british",
24
+ "e": "spanish",
25
+ "f": "french",
26
+ "h": "hindi",
27
+ "i": "italian",
28
+ "j": "japanese",
29
+ "p": "portuguese",
30
+ "z": "chinese",
31
+ }
32
+
33
+ voices = {
34
+ "a": [
35
+ "af_alloy",
36
+ "af_aoede",
37
+ "af_bella",
38
+ "af_heart",
39
+ "af_jessica",
40
+ "af_kore",
41
+ "af_nicole",
42
+ "af_nova",
43
+ "af_river",
44
+ "af_sarah",
45
+ "af_sky",
46
+ "am_adam",
47
+ "am_echo",
48
+ "am_eric",
49
+ "am_fenrir",
50
+ "am_liam",
51
+ "am_michael",
52
+ "am_onyx",
53
+ "am_puck",
54
+ "am_santa",
55
+ ],
56
+ "b": [
57
+ "bf_alice",
58
+ "bf_emma",
59
+ "bf_isabella",
60
+ "bf_lily",
61
+ "bm_daniel",
62
+ "bm_fable",
63
+ "bm_george",
64
+ "bm_lewis",
65
+ ],
66
+ "e": ["ef_dora", "em_alex", "em_santa"],
67
+ "f": ["ff_siwis"],
68
+ "h": ["hf_alpha", "hf_beta", "hm_omega", "hm_psi"],
69
+ "i": ["if_sara", "im_nicola"],
70
+ "j": ["jf_alpha", "jf_gongitsune", "jf_nezumi", "jf_tebukuro", "jm_kumo"],
71
+ "p": ["pf_dora", "pm_alex", "pm_santa"],
72
+ "z": [
73
+ "zf_xiaobei",
74
+ "zf_xiaoni",
75
+ "zf_xiaoxiao",
76
+ "zf_xiaoyi",
77
+ "zm_yunjian",
78
+ "zm_yunxi",
79
+ "zm_yunxia",
80
+ "zm_yunyang",
81
+ ],
82
+ }
83
+
84
+
85
+ def extract_text_from_url(url):
86
+ try:
87
+ # Download the webpage content
88
+ response = requests.get(url)
89
+ response.raise_for_status() # Raise an exception for bad status codes
90
+
91
+ # Parse the HTML content
92
+ soup = BeautifulSoup(response.text, "html.parser")
93
+
94
+ # Remove script and style elements
95
+ for script in soup(["script", "style"]):
96
+ script.decompose()
97
+
98
+ # Get text and clean it up
99
+ text = soup.get_text(separator="\n", strip=True)
100
+
101
+ # Remove excessive newlines and whitespace
102
+ lines = (line.strip() for line in text.splitlines())
103
+ text = "\n".join(line for line in lines if line)
104
+
105
+ return text
106
+ except Exception as e:
107
+ return f"Error: {str(e)}"
108
+
109
+
110
+ def get_language_choices():
111
+ return [
112
+ (f"{Voices.flags[code]} {Voices.flags_win[code].title()}", code)
113
+ for code in Voices.voices.keys()
114
+ ]
115
+
116
+
117
+ def get_voice_choices(lang_code):
118
+ if lang_code in Voices.voices:
119
+ return Voices.voices[lang_code]
120
+ return []
121
+
122
+
123
+ def text_to_audio(text, lang_code, voice, progress=gr.Progress()):
124
+ try:
125
+ audio_data = generate_audio(
126
+ text, lang_code=lang_code, voice=voice, progress=progress
127
+ )
128
+ return (24000, audio_data) # Return tuple of (sample_rate, audio_data)
129
+ except Exception as e:
130
+ print(f"Error generating audio: {e}")
131
+ return None
132
+
133
+
134
+ # Create Gradio interface
135
+ with gr.Blocks(title="Web Page Text Extractor & Audio Generator") as demo:
136
+ gr.Markdown("# Web Page Text Extractor & Audio Generator")
137
+ gr.Markdown(
138
+ "Scrape a website and generate text using [hexgrad/Kokoro-82M](https://huggingface.co/hexgrad/Kokoro-82M)"
139
+ )
140
+ with gr.Row():
141
+ url_input = gr.Textbox(
142
+ label="Enter URL", value="https://paulgraham.com/words.html"
143
+ )
144
+ extract_btn = gr.Button("Extract Text")
145
+
146
+ text_output = gr.Textbox(label="Extracted Text", lines=10, interactive=True)
147
+
148
+ with gr.Row():
149
+ lang_dropdown = gr.Dropdown(
150
+ choices=get_language_choices(),
151
+ label="Language",
152
+ value="a", # Default to English
153
+ )
154
+ voice_dropdown = gr.Dropdown(
155
+ choices=Voices.voices["a"], # Default to English voices
156
+ label="Voice",
157
+ value="am_onyx", # Default voice
158
+ )
159
+
160
+ generate_btn = gr.Button("Generate Audio")
161
+ audio_output = gr.Audio(label="Generated Audio")
162
+
163
+ def update_voices(lang_code):
164
+ return gr.Dropdown(choices=get_voice_choices(lang_code))
165
+
166
+ extract_btn.click(fn=extract_text_from_url, inputs=url_input, outputs=text_output)
167
+ lang_dropdown.change(fn=update_voices, inputs=lang_dropdown, outputs=voice_dropdown)
168
+ generate_btn.click(
169
+ fn=text_to_audio,
170
+ inputs=[text_output, lang_dropdown, voice_dropdown],
171
+ outputs=audio_output,
172
+ )
173
+
174
+ if __name__ == "__main__":
175
+ demo.launch()
kokoro_tts.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from kokoro import KPipeline
2
+ import soundfile as sf
3
+ import numpy as np
4
+ import logging
5
+
6
+
7
+ def generate_audio(
8
+ text,
9
+ lang_code="a",
10
+ voice="af_heart",
11
+ speed=1,
12
+ save_segments=False,
13
+ progress=None,
14
+ ):
15
+ """
16
+ Generate audio from text using Kokoro TTS pipeline
17
+
18
+ Args:
19
+ text (str): Text to convert to speech
20
+ lang_code (str): Language code for the TTS model
21
+ voice (str): Voice ID to use
22
+ speed (float): Speech speed multiplier
23
+ save_segments (bool): Whether to save individual audio segments
24
+
25
+ Returns:
26
+ numpy.ndarray: Combined audio data at 24kHz sample rate
27
+ """
28
+ pipeline = KPipeline(lang_code=lang_code)
29
+
30
+ generator = pipeline(text, voice=voice, speed=speed, split_pattern=r"\.")
31
+
32
+ all_audio = []
33
+ segments = list(generator) # Get total number of segments
34
+
35
+ for i, (gs, ps, audio) in enumerate(
36
+ progress.tqdm(segments, desc="Generating audio")
37
+ ):
38
+ logging.info("Processing segment")
39
+ logging.info(f"Graphemes: {gs}")
40
+ logging.info(f"Phonemes: {ps}")
41
+ all_audio.append(audio)
42
+
43
+ if save_segments:
44
+ sf.write(f"segment_{i}.wav", audio, 24000)
45
+
46
+ # Concatenate all audio segments
47
+ combined_audio = np.concatenate(all_audio)
48
+ return combined_audio
49
+
50
+
51
+ if __name__ == "__main__":
52
+ # Example usage
53
+ sample_text = "Hello world"
54
+ audio_data = generate_audio(sample_text)
55
+ sf.write("out.wav", audio_data, 24000)
pyproject.toml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "page2speech"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ dependencies = [
8
+ "beautifulsoup4>=4.13.3",
9
+ "gradio>=5.16.0",
10
+ "kokoro>=0.3.4",
11
+ "pip>=25.0.1",
12
+ "soundfile>=0.13.1",
13
+ ]
requirements.txt ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ annotated-types==0.7.0
3
+ anyio==4.8.0
4
+ attrs==25.1.0
5
+ babel==2.17.0
6
+ beautifulsoup4==4.13.3
7
+ blis==1.2.0
8
+ catalogue==2.0.10
9
+ certifi==2025.1.31
10
+ cffi==1.17.1
11
+ charset-normalizer==3.4.1
12
+ click==8.1.8
13
+ clldutils==3.21.0
14
+ cloudpathlib==0.20.0
15
+ colorama==0.4.6
16
+ colorlog==6.9.0
17
+ confection==0.1.5
18
+ csvw==3.5.1
19
+ curated-tokenizers==0.0.9
20
+ curated-transformers==0.1.1
21
+ cymem==2.0.11
22
+ dlinfo==2.0.0
23
+ docopt==0.6.2
24
+ en_core_web_sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl#sha256=1932429db727d4bff3deed6b34cfc05df17794f4a52eeb26cf8928f7c1a0fb85
25
+ espeakng-loader==0.2.4
26
+ exceptiongroup==1.2.2
27
+ fastapi==0.115.8
28
+ ffmpy==0.5.0
29
+ filelock==3.17.0
30
+ fsspec==2025.2.0
31
+ gradio==5.16.0
32
+ gradio_client==1.7.0
33
+ h11==0.14.0
34
+ httpcore==1.0.7
35
+ httpx==0.28.1
36
+ huggingface-hub==0.28.1
37
+ idna==3.10
38
+ isodate==0.7.2
39
+ Jinja2==3.1.5
40
+ joblib==1.4.2
41
+ jsonschema==4.23.0
42
+ jsonschema-specifications==2024.10.1
43
+ kokoro==0.7.15
44
+ langcodes==3.5.0
45
+ language-tags==1.2.0
46
+ language_data==1.3.0
47
+ loguru==0.7.3
48
+ lxml==5.3.1
49
+ marisa-trie==1.2.1
50
+ Markdown==3.7
51
+ markdown-it-py==3.0.0
52
+ MarkupSafe==2.1.5
53
+ mdurl==0.1.2
54
+ misaki==0.7.15
55
+ mpmath==1.3.0
56
+ murmurhash==1.0.12
57
+ networkx==3.4.2
58
+ num2words==0.5.14
59
+ numpy==1.26.4
60
+ orjson==3.10.15
61
+ packaging==24.2
62
+ pandas==2.2.3
63
+ phonemizer-fork==3.3.2
64
+ pillow==11.1.0
65
+ preshed==3.0.9
66
+ pycparser==2.22
67
+ pydantic==2.10.6
68
+ pydantic_core==2.27.2
69
+ pydub==0.25.1
70
+ Pygments==2.19.1
71
+ pylatexenc==2.10
72
+ pyparsing==3.2.1
73
+ python-dateutil==2.9.0.post0
74
+ python-multipart==0.0.20
75
+ pytz==2025.1
76
+ PyYAML==6.0.2
77
+ rdflib==7.1.3
78
+ referencing==0.36.2
79
+ regex==2024.11.6
80
+ requests==2.32.3
81
+ rfc3986==1.5.0
82
+ rich==13.9.4
83
+ rpds-py==0.22.3
84
+ ruff==0.9.6
85
+ safehttpx==0.1.6
86
+ safetensors==0.5.2
87
+ scipy==1.15.1
88
+ segments==2.2.1
89
+ semantic-version==2.10.0
90
+ shellingham==1.5.4
91
+ six==1.17.0
92
+ smart-open==7.1.0
93
+ sniffio==1.3.1
94
+ soundfile==0.13.1
95
+ soupsieve==2.6
96
+ spacy==3.8.4
97
+ spacy-curated-transformers==0.3.0
98
+ spacy-legacy==3.0.12
99
+ spacy-loggers==1.0.5
100
+ srsly==2.5.1
101
+ starlette==0.45.3
102
+ sympy==1.13.1
103
+ tabulate==0.9.0
104
+ thinc==8.3.4
105
+ tokenizers==0.21.0
106
+ tomlkit==0.13.2
107
+ torch==2.6.0
108
+ tqdm==4.67.1
109
+ transformers==4.48.3
110
+ typer==0.15.1
111
+ typing_extensions==4.12.2
112
+ tzdata==2025.1
113
+ uritemplate==4.1.1
114
+ urllib3==2.3.0
115
+ uvicorn==0.34.0
116
+ wasabi==1.1.3
117
+ weasel==0.4.1
118
+ websockets==14.2
119
+ wrapt==1.17.2