Spaces:

tobiccino
/

tts

Sleeping

App Files Files Community

tts / app.py

tobiccino

update apppy

bba0167 over 1 year ago

raw

history blame

6.49 kB

	import gradio as gr
	import argparse
	import yaml
	from vietTTS.hifigan.mel2wave import mel2wave
	from vietTTS.nat.text2mel import text2mel
	from vietTTS.synthesizer import nat_normalize_text
	import numpy as np
	import gradio as gr
	import re
	from vietnam_number import n2w
	from vietnam_number import n2w_single
	from synthesize import synthesizer
	import noisereduce as nr
	import os
	import scipy.io.wavfile as wavf
	from scipy.io import wavfile


	TITLE = "Saltlux Text to Speech"
	DESCRIPTION = "SLT Vietnamese Text to speech demo."


	class GradioApplication:
	def __init__(self):

	inputs = prepare_input()
	outputs = prepare_output()

	self.iface = gr.Interface(fn=self.infer,
	title=TITLE,
	description=DESCRIPTION,
	inputs=inputs,
	outputs=outputs,
	allow_flagging='never')



	def infer(self, text, lang, duration_rate):
	if(lang == "VietTTS"):
	return using_viettts(text,duration_rate)
	else :
	return using_tacotron(text)
	return 1

	def run(self):
	try:
	self.iface.launch(debug=False)

	except KeyboardInterrupt:
	gr.close_all()


	def prepare_input():
	text_input = gr.Textbox(lines=2,
	placeholder="Lựa chọn model test - VietTTS và Tacotron 2 + Univnet",
	value="Thành phố muốn thí điểm thu thuế bất động sản thứ 2, tự quyết nhiều quyết định đầu tư để thu hút nguồn vốn tư nhân",
	label="Text")
	lang_input = gr.Radio(['VietTTS', 'Tacotron2'],
	type='value',
	value=None,
	label="Model select")
	duration_rate_input = gr.Slider(minimum=0.2,
	maximum=1,
	step=0.1,
	value=1.0,
	label="Duration (The bigger the value, the slower the speech) - only for vietTTS")

	return [text_input, lang_input, duration_rate_input]


	def prepare_output():
	outputs = [gr.Audio(label="Output before denoise"),gr.Audio(label="Output after denoise")]
	return outputs
	def text_to_speech(text,stop_duration):
	print("starting")
	# prevent too long text
	if len(text) > 500:
	text = text[:500]
	# text_to_speech_tacotron(text)
	# stop_duration_float = float(stop_duration_text)
	text = clean_text(text)
	text = nat_normalize_text(text)
	mel = text2mel(
	text,
	"lexicon.txt",
	stop_duration,
	"acoustic_latest_ckpt.pickle",
	"duration_latest_ckpt.pickle",
	)
	wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
	return (wave * (2**15)).astype(np.int16)

	def text_to_speech_tacotron(text):
	print("starting")
	# prevent too long text
	if len(text) > 500:
	text = text[:500]

	wav = synthesizer.tts(text)
	output = './out.wav'

	synthesizer.save_wav(wav,output)
	return output



	def using_viettts(text,stop_duration):
	y = text_to_speech(text,stop_duration)
	fs = 16000
	output = './out.wav'
	output_denoise = './output_denoise.wav'

	wavf.write(output, fs, y)
	rate, data = wavfile.read(output)
	# perform noise reduction
	reduced_noise = nr.reduce_noise(y=data, sr=rate)
	wavfile.write(output_denoise, rate, reduced_noise)
	return (output,output_denoise)

	def using_tacotron(text):
	y = text_to_speech_tacotron(text)
	output_denoise = "./output_denoise.wav"
	rate, data = wavfile.read(y)
	# perform noise reduction
	reduced_noise = nr.reduce_noise(y=data, sr=rate)
	wavfile.write(output_denoise, rate, reduced_noise)
	return (y,output_denoise)


	def clean_text(test_string):
	list_word = test_string.split()
	# print(list_word)

	regex = r"\d{2}(?P<sep>[-/])\d{1,2}(?P=sep)\d{4}"

	for word in list_word :
	try:
	# print(word)
	searchbox_result = re.match(regex, word)
	day = searchbox_result.group(0)
	day2 = day
	day2 = day2.replace('/',' ').replace('-',' ')
	list_date = day2.split(' ')
	date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' năm ' + n2w(list_date[2])
	# print(date_result)
	test_string = test_string.replace(word, date_result)

	except AttributeError:
	# print(word)
	# print("can't make a group")
	continue


	# print(test_string)

	regex2 = r"\d{2}(?P<sep>[-/])\d{1,2}"

	for word in list_word :
	try:
	# print(word)
	searchbox_result = re.match(regex2, word)
	day = searchbox_result.group(0)
	day2 = day
	day2 = day2.replace('/',' ').replace('-',' ')
	list_date = day2.split(' ')
	date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1])
	# print(date_result)
	test_string = test_string.replace(word, date_result)

	except AttributeError:
	# print(word)
	# print("can't make a group")
	continue


	# print(test_string)

	regex3 = r"\d{1,2}(?P<sep>[h:])\d{1,2}"

	for word in list_word :
	try:
	# print(word)
	searchbox_result = re.match(regex3, word)
	day = searchbox_result.group(0)
	day2 = day
	day2 = day2.replace('h',' ').replace(':',' ')
	list_date = day2.split(' ')
	date_result = n2w(list_date[0]) + ' giờ ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' phút '
	# print(date_result)
	test_string = test_string.replace(word, date_result)

	except AttributeError:
	# print(word)
	# print("can't make a group")
	continue


	print(test_string)

	for word in list_word :
	try:
	if word.isdigit() :
	# print(word)
	text_result = n2w_single(word)
	# print(text_result)
	test_string = test_string.replace(word, text_result, 1)

	except AttributeError:
	# print(word)
	print("can't make a group")
	continue


	return test_string



	if __name__ == '__main__':
	# args = parse_args()

	gradio_application = GradioApplication()
	gradio_application.run()