tts / app.py
tobiccino's picture
update apppy
0eeec8f
raw
history blame
No virus
6.49 kB
import gradio as gr
import argparse
import yaml
from vietTTS.hifigan.mel2wave import mel2wave
from vietTTS.nat.text2mel import text2mel
from vietTTS.synthesizer import nat_normalize_text
import numpy as np
import gradio as gr
import re
from vietnam_number import n2w
from vietnam_number import n2w_single
from synthesize import synthesizer
import noisereduce as nr
import os
import scipy.io.wavfile as wavf
from scipy.io import wavfile
TITLE = "Saltlux Text to Speech"
DESCRIPTION = "SLT Vietnamese Text to speech demo."
class GradioApplication:
def __init__(self):
inputs = prepare_input()
outputs = prepare_output()
self.iface = gr.Interface(fn=self.infer,
title=TITLE,
description=DESCRIPTION,
inputs=inputs,
outputs=outputs,
allow_flagging='never')
def infer(self, text, lang, duration_rate):
if(lang == "Tacotron2"):
return using_tacotron(text)
else :
return using_viettts(text,duration_rate)
return 1
def run(self):
try:
self.iface.launch(debug=False)
except KeyboardInterrupt:
gr.close_all()
def prepare_input():
text_input = gr.Textbox(lines=2,
placeholder="Lựa chọn model test - VietTTS và Tacotron 2 + Univnet",
value="Thành phố muốn thí điểm thu thuế bất động sản thứ 2, tự quyết nhiều quyết định đầu tư để thu hút nguồn vốn tư nhân",
label="Text")
lang_input = gr.Radio(['VietTTS', 'Tacotron2'],
type='value',
value=None,
label="Model select")
duration_rate_input = gr.Slider(minimum=0.2,
maximum=1,
step=0.1,
value=1.0,
label="Duration (The bigger the value, the slower the speech) - only for vietTTS")
return [text_input, lang_input, duration_rate_input]
def prepare_output():
outputs = [gr.Audio(label="Output before denoise"),gr.Audio(label="Output after denoise")]
return outputs
def text_to_speech(text,stop_duration):
print("starting")
# prevent too long text
if len(text) > 500:
text = text[:500]
# text_to_speech_tacotron(text)
# stop_duration_float = float(stop_duration_text)
text = clean_text(text)
text = nat_normalize_text(text)
mel = text2mel(
text,
"lexicon.txt",
stop_duration,
"acoustic_latest_ckpt.pickle",
"duration_latest_ckpt.pickle",
)
wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
return (wave * (2**15)).astype(np.int16)
def text_to_speech_tacotron(text):
print("starting")
# prevent too long text
if len(text) > 500:
text = text[:500]
wav = synthesizer.tts(text)
output = './out.wav'
synthesizer.save_wav(wav,output)
return output
def using_viettts(text,stop_duration):
y = text_to_speech(text,stop_duration)
fs = 16000
output = './out.wav'
output_denoise = './output_denoise.wav'
wavf.write(output, fs, y)
rate, data = wavfile.read(output)
# perform noise reduction
reduced_noise = nr.reduce_noise(y=data, sr=rate)
wavfile.write(output_denoise, rate, reduced_noise)
return (output,output_denoise)
def using_tacotron(text):
y = text_to_speech_tacotron(text)
output_denoise = "./output_denoise.wav"
rate, data = wavfile.read(y)
# perform noise reduction
reduced_noise = nr.reduce_noise(y=data, sr=rate)
wavfile.write(output_denoise, rate, reduced_noise)
return (y,output_denoise)
def clean_text(test_string):
list_word = test_string.split()
# print(list_word)
regex = r"\d{2}(?P<sep>[-/])\d{1,2}(?P=sep)\d{4}"
for word in list_word :
try:
# print(word)
searchbox_result = re.match(regex, word)
day = searchbox_result.group(0)
day2 = day
day2 = day2.replace('/',' ').replace('-',' ')
list_date = day2.split(' ')
date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' năm ' + n2w(list_date[2])
# print(date_result)
test_string = test_string.replace(word, date_result)
except AttributeError:
# print(word)
# print("can't make a group")
continue
# print(test_string)
regex2 = r"\d{2}(?P<sep>[-/])\d{1,2}"
for word in list_word :
try:
# print(word)
searchbox_result = re.match(regex2, word)
day = searchbox_result.group(0)
day2 = day
day2 = day2.replace('/',' ').replace('-',' ')
list_date = day2.split(' ')
date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1])
# print(date_result)
test_string = test_string.replace(word, date_result)
except AttributeError:
# print(word)
# print("can't make a group")
continue
# print(test_string)
regex3 = r"\d{1,2}(?P<sep>[h:])\d{1,2}"
for word in list_word :
try:
# print(word)
searchbox_result = re.match(regex3, word)
day = searchbox_result.group(0)
day2 = day
day2 = day2.replace('h',' ').replace(':',' ')
list_date = day2.split(' ')
date_result = n2w(list_date[0]) + ' giờ ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' phút '
# print(date_result)
test_string = test_string.replace(word, date_result)
except AttributeError:
# print(word)
# print("can't make a group")
continue
print(test_string)
for word in list_word :
try:
if word.isdigit() :
# print(word)
text_result = n2w_single(word)
# print(text_result)
test_string = test_string.replace(word, text_result, 1)
except AttributeError:
# print(word)
print("can't make a group")
continue
return test_string
if __name__ == '__main__':
# args = parse_args()
gradio_application = GradioApplication()
gradio_application.run()