File size: 6,493 Bytes
8c70653 a7960cf 5386471 a7960cf 12da6cc 5c1aedb 8c70653 5c1aedb a7960cf 8c70653 0eeec8f 8c70653 0eeec8f 8c70653 bba0167 8c70653 320b21c aa14d93 a7960cf 8c70653 3d29f27 320b21c a7960cf 320b21c a7960cf 6194d18 8c70653 a7960cf 8c70653 320b21c 8c70653 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
import gradio as gr
import argparse
import yaml
from vietTTS.hifigan.mel2wave import mel2wave
from vietTTS.nat.text2mel import text2mel
from vietTTS.synthesizer import nat_normalize_text
import numpy as np
import gradio as gr
import re
from vietnam_number import n2w
from vietnam_number import n2w_single
from synthesize import synthesizer
import noisereduce as nr
import os
import scipy.io.wavfile as wavf
from scipy.io import wavfile
TITLE = "Saltlux Text to Speech"
DESCRIPTION = "SLT Vietnamese Text to speech demo."
class GradioApplication:
def __init__(self):
inputs = prepare_input()
outputs = prepare_output()
self.iface = gr.Interface(fn=self.infer,
title=TITLE,
description=DESCRIPTION,
inputs=inputs,
outputs=outputs,
allow_flagging='never')
def infer(self, text, lang, duration_rate):
if(lang == "Tacotron2"):
return using_tacotron(text)
else :
return using_viettts(text,duration_rate)
return 1
def run(self):
try:
self.iface.launch(debug=False)
except KeyboardInterrupt:
gr.close_all()
def prepare_input():
text_input = gr.Textbox(lines=2,
placeholder="Lựa chọn model test - VietTTS và Tacotron 2 + Univnet",
value="Thành phố muốn thí điểm thu thuế bất động sản thứ 2, tự quyết nhiều quyết định đầu tư để thu hút nguồn vốn tư nhân",
label="Text")
lang_input = gr.Radio(['VietTTS', 'Tacotron2'],
type='value',
value=None,
label="Model select")
duration_rate_input = gr.Slider(minimum=0.2,
maximum=1,
step=0.1,
value=1.0,
label="Duration (The bigger the value, the slower the speech) - only for vietTTS")
return [text_input, lang_input, duration_rate_input]
def prepare_output():
outputs = [gr.Audio(label="Output before denoise"),gr.Audio(label="Output after denoise")]
return outputs
def text_to_speech(text,stop_duration):
print("starting")
# prevent too long text
if len(text) > 500:
text = text[:500]
# text_to_speech_tacotron(text)
# stop_duration_float = float(stop_duration_text)
text = clean_text(text)
text = nat_normalize_text(text)
mel = text2mel(
text,
"lexicon.txt",
stop_duration,
"acoustic_latest_ckpt.pickle",
"duration_latest_ckpt.pickle",
)
wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
return (wave * (2**15)).astype(np.int16)
def text_to_speech_tacotron(text):
print("starting")
# prevent too long text
if len(text) > 500:
text = text[:500]
wav = synthesizer.tts(text)
output = './out.wav'
synthesizer.save_wav(wav,output)
return output
def using_viettts(text,stop_duration):
y = text_to_speech(text,stop_duration)
fs = 16000
output = './out.wav'
output_denoise = './output_denoise.wav'
wavf.write(output, fs, y)
rate, data = wavfile.read(output)
# perform noise reduction
reduced_noise = nr.reduce_noise(y=data, sr=rate)
wavfile.write(output_denoise, rate, reduced_noise)
return (output,output_denoise)
def using_tacotron(text):
y = text_to_speech_tacotron(text)
output_denoise = "./output_denoise.wav"
rate, data = wavfile.read(y)
# perform noise reduction
reduced_noise = nr.reduce_noise(y=data, sr=rate)
wavfile.write(output_denoise, rate, reduced_noise)
return (y,output_denoise)
def clean_text(test_string):
list_word = test_string.split()
# print(list_word)
regex = r"\d{2}(?P<sep>[-/])\d{1,2}(?P=sep)\d{4}"
for word in list_word :
try:
# print(word)
searchbox_result = re.match(regex, word)
day = searchbox_result.group(0)
day2 = day
day2 = day2.replace('/',' ').replace('-',' ')
list_date = day2.split(' ')
date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' năm ' + n2w(list_date[2])
# print(date_result)
test_string = test_string.replace(word, date_result)
except AttributeError:
# print(word)
# print("can't make a group")
continue
# print(test_string)
regex2 = r"\d{2}(?P<sep>[-/])\d{1,2}"
for word in list_word :
try:
# print(word)
searchbox_result = re.match(regex2, word)
day = searchbox_result.group(0)
day2 = day
day2 = day2.replace('/',' ').replace('-',' ')
list_date = day2.split(' ')
date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1])
# print(date_result)
test_string = test_string.replace(word, date_result)
except AttributeError:
# print(word)
# print("can't make a group")
continue
# print(test_string)
regex3 = r"\d{1,2}(?P<sep>[h:])\d{1,2}"
for word in list_word :
try:
# print(word)
searchbox_result = re.match(regex3, word)
day = searchbox_result.group(0)
day2 = day
day2 = day2.replace('h',' ').replace(':',' ')
list_date = day2.split(' ')
date_result = n2w(list_date[0]) + ' giờ ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' phút '
# print(date_result)
test_string = test_string.replace(word, date_result)
except AttributeError:
# print(word)
# print("can't make a group")
continue
print(test_string)
for word in list_word :
try:
if word.isdigit() :
# print(word)
text_result = n2w_single(word)
# print(text_result)
test_string = test_string.replace(word, text_result, 1)
except AttributeError:
# print(word)
print("can't make a group")
continue
return test_string
if __name__ == '__main__':
# args = parse_args()
gradio_application = GradioApplication()
gradio_application.run() |