|
import gradio as gr |
|
import argparse |
|
import yaml |
|
from vietTTS.hifigan.mel2wave import mel2wave |
|
from vietTTS.nat.text2mel import text2mel |
|
from vietTTS.synthesizer import nat_normalize_text |
|
import numpy as np |
|
import gradio as gr |
|
import re |
|
from vietnam_number import n2w |
|
from vietnam_number import n2w_single |
|
from synthesize import synthesizer |
|
import noisereduce as nr |
|
import os |
|
import scipy.io.wavfile as wavf |
|
from scipy.io import wavfile |
|
|
|
|
|
TITLE = "Saltlux Text to Speech" |
|
DESCRIPTION = "SLT Vietnamese Text to speech demo." |
|
|
|
|
|
class GradioApplication: |
|
def __init__(self): |
|
|
|
inputs = prepare_input() |
|
outputs = prepare_output() |
|
|
|
self.iface = gr.Interface(fn=self.infer, |
|
title=TITLE, |
|
description=DESCRIPTION, |
|
inputs=inputs, |
|
outputs=outputs, |
|
allow_flagging='never') |
|
|
|
|
|
|
|
def infer(self, text, lang, duration_rate): |
|
if(lang == "VietTTS"): |
|
return using_viettts(text,duration_rate) |
|
else : |
|
return using_tacotron(text) |
|
return 1 |
|
|
|
def run(self): |
|
try: |
|
self.iface.launch(debug=False) |
|
|
|
except KeyboardInterrupt: |
|
gr.close_all() |
|
|
|
|
|
def prepare_input(): |
|
text_input = gr.Textbox(lines=2, |
|
placeholder="Lựa chọn model test - VietTTS và Tacotron 2 + Univnet", |
|
value="Thành phố muốn thí điểm thu thuế bất động sản thứ 2, tự quyết nhiều quyết định đầu tư để thu hút nguồn vốn tư nhân", |
|
label="Text") |
|
lang_input = gr.Radio(['VietTTS', 'Tacotron2'], |
|
type='value', |
|
value=None, |
|
label="Model select") |
|
duration_rate_input = gr.Slider(minimum=0.2, |
|
maximum=1, |
|
step=0.1, |
|
value=1.0, |
|
label="Duration (The bigger the value, the slower the speech) - only for vietTTS") |
|
|
|
return [text_input, lang_input, duration_rate_input] |
|
|
|
|
|
def prepare_output(): |
|
outputs = [gr.Audio(label="Output before denoise"),gr.Audio(label="Output after denoise")] |
|
return outputs |
|
def text_to_speech(text,stop_duration): |
|
print("starting") |
|
|
|
if len(text) > 500: |
|
text = text[:500] |
|
|
|
|
|
text = clean_text(text) |
|
text = nat_normalize_text(text) |
|
mel = text2mel( |
|
text, |
|
"lexicon.txt", |
|
stop_duration, |
|
"acoustic_latest_ckpt.pickle", |
|
"duration_latest_ckpt.pickle", |
|
) |
|
wave = mel2wave(mel, "config.json", "hk_hifi.pickle") |
|
return (wave * (2**15)).astype(np.int16) |
|
|
|
def text_to_speech_tacotron(text): |
|
print("starting") |
|
|
|
if len(text) > 500: |
|
text = text[:500] |
|
|
|
wav = synthesizer.tts(text) |
|
output = './out.wav' |
|
|
|
synthesizer.save_wav(wav,output) |
|
return output |
|
|
|
|
|
|
|
def using_viettts(text,stop_duration): |
|
y = text_to_speech(text,stop_duration) |
|
fs = 16000 |
|
output = './out.wav' |
|
output_denoise = './output_denoise.wav' |
|
|
|
wavf.write(output, fs, y) |
|
rate, data = wavfile.read(output) |
|
|
|
reduced_noise = nr.reduce_noise(y=data, sr=rate) |
|
wavfile.write(output_denoise, rate, reduced_noise) |
|
return (output,output_denoise) |
|
|
|
def using_tacotron(text): |
|
y = text_to_speech_tacotron(text) |
|
output_denoise = "./output_denoise.wav" |
|
rate, data = wavfile.read(y) |
|
|
|
reduced_noise = nr.reduce_noise(y=data, sr=rate) |
|
wavfile.write(output_denoise, rate, reduced_noise) |
|
return (y,output_denoise) |
|
|
|
|
|
def clean_text(test_string): |
|
list_word = test_string.split() |
|
|
|
|
|
regex = r"\d{2}(?P<sep>[-/])\d{1,2}(?P=sep)\d{4}" |
|
|
|
for word in list_word : |
|
try: |
|
|
|
searchbox_result = re.match(regex, word) |
|
day = searchbox_result.group(0) |
|
day2 = day |
|
day2 = day2.replace('/',' ').replace('-',' ') |
|
list_date = day2.split(' ') |
|
date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' năm ' + n2w(list_date[2]) |
|
|
|
test_string = test_string.replace(word, date_result) |
|
|
|
except AttributeError: |
|
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
regex2 = r"\d{2}(?P<sep>[-/])\d{1,2}" |
|
|
|
for word in list_word : |
|
try: |
|
|
|
searchbox_result = re.match(regex2, word) |
|
day = searchbox_result.group(0) |
|
day2 = day |
|
day2 = day2.replace('/',' ').replace('-',' ') |
|
list_date = day2.split(' ') |
|
date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) |
|
|
|
test_string = test_string.replace(word, date_result) |
|
|
|
except AttributeError: |
|
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
regex3 = r"\d{1,2}(?P<sep>[h:])\d{1,2}" |
|
|
|
for word in list_word : |
|
try: |
|
|
|
searchbox_result = re.match(regex3, word) |
|
day = searchbox_result.group(0) |
|
day2 = day |
|
day2 = day2.replace('h',' ').replace(':',' ') |
|
list_date = day2.split(' ') |
|
date_result = n2w(list_date[0]) + ' giờ ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' phút ' |
|
|
|
test_string = test_string.replace(word, date_result) |
|
|
|
except AttributeError: |
|
|
|
|
|
continue |
|
|
|
|
|
print(test_string) |
|
|
|
for word in list_word : |
|
try: |
|
if word.isdigit() : |
|
|
|
text_result = n2w_single(word) |
|
|
|
test_string = test_string.replace(word, text_result, 1) |
|
|
|
except AttributeError: |
|
|
|
print("can't make a group") |
|
continue |
|
|
|
|
|
return test_string |
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
gradio_application = GradioApplication() |
|
gradio_application.run() |