Spaces:
Runtime error
Runtime error
File size: 4,845 Bytes
53f8a32 5b5d4c0 53f8a32 f4fe081 53f8a32 9846d74 53f8a32 f4fe081 53f8a32 9ef73e2 b79c929 9846d74 f4fe081 9846d74 53f8a32 80ee0e5 53f8a32 0f11bd1 53f8a32 fbe7d93 53f8a32 fbe7d93 babf22d 37e87fa 53f8a32 fbe7d93 53f8a32 9ef73e2 53f8a32 6473463 9ef73e2 ce55168 f4fe081 9ef73e2 2d5fa2d 53f8a32 2d5fa2d 53f8a32 fbe7d93 53f8a32 fbe7d93 53f8a32 9ef73e2 53f8a32 9ef73e2 53f8a32 9ef73e2 f4fe081 9ef73e2 53f8a32 9ef73e2 53f8a32 4263bcd 53f8a32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
"""
Copyright 2022 Balacoon
TTS interactive demo
"""
import os
import glob
import logging
from typing import cast
from threading import Lock
import gradio as gr
from balacoon_tts import TTS
from huggingface_hub import hf_hub_download, list_repo_files
# locker that disallow access to the tts object from more then one thread
locker = Lock()
# global tts module, initialized from a model selected
tts = None
# path to the model that is currently used in tts
cur_model_path = None
# cache of speakers, maps model name to speaker list
model_to_speakers = dict()
model_repo_dir = "data"
for name in list_repo_files(repo_id="balacoon/tts"):
if not os.path.isfile(os.path.join(model_repo_dir, name)):
hf_hub_download(
repo_id="balacoon/tts",
filename=name,
local_dir=model_repo_dir,
)
def main():
logging.basicConfig(level=logging.INFO)
with gr.Blocks() as demo:
gr.Markdown(
"""
<h1 align="center">Balacoon🦝 Text-to-Speech</h1>
1. Write an utterance to generate,
2. Select the model to synthesize with
3. Select speaker
4. Hit "Generate" and listen to the result!
You can learn more about models available
[here](https://huggingface.co/balacoon/tts).
Visit [Balacoon website](https://balacoon.com/) for more info.
"""
)
with gr.Row(variant="panel"):
text = gr.Textbox(label="Text", placeholder="Type something here...")
with gr.Row():
with gr.Column(variant="panel"):
repo_files = os.listdir(model_repo_dir)
model_files = [x for x in repo_files if x.endswith("_cpu.addon")]
model_name = gr.Dropdown(
label="Model",
choices=model_files,
)
with gr.Column(variant="panel"):
speaker = gr.Dropdown(label="Speaker", choices=[])
def set_model(model_name_str: str):
"""
gets value from `model_name`. either
uses cached list of speakers for the given model name
or loads the addon and checks what are the speakers.
"""
global model_to_speakers
if model_name_str in model_to_speakers:
speakers = model_to_speakers[model_name_str]
else:
global tts, cur_model_path, locker
with locker:
# need to load this model to learn the list of speakers
model_path = os.path.join(model_repo_dir, model_name_str)
if tts is not None:
del tts
tts = TTS(model_path)
cur_model_path = model_path
speakers = tts.get_speakers()
model_to_speakers[model_name_str] = speakers
value = speakers[-1]
return gr.Dropdown.update(
choices=speakers, value=value, visible=True
)
model_name.change(set_model, inputs=model_name, outputs=speaker)
with gr.Row(variant="panel"):
generate = gr.Button("Generate")
with gr.Row(variant="panel"):
audio = gr.Audio()
def synthesize_audio(text_str: str, model_name_str: str, speaker_str: str):
"""
gets utterance to synthesize from `text` Textbox
and speaker name from `speaker` dropdown list.
speaker name might be empty for single-speaker models.
Synthesizes the waveform and updates `audio` with it.
"""
if not text_str or not model_name_str or not speaker_str:
logging.info("text, model name or speaker are not provided")
return None
expected_model_path = os.path.join(model_repo_dir, model_name_str)
global tts, cur_model_path, locker
with locker:
if expected_model_path != cur_model_path:
# reload model
if tts is not None:
del tts
tts = TTS(expected_model_path)
cur_model_path = expected_model_path
if len(text_str) > 1024:
# truncate the text
text_str = text_str[:1024]
samples = tts.synthesize(text_str, speaker_str)
return gr.Audio.update(value=(tts.get_sampling_rate(), samples))
generate.click(synthesize_audio, inputs=[text, model_name, speaker], outputs=audio)
demo.queue(concurrency_count=1).launch()
if __name__ == "__main__":
main()
|