Spaces:

tobiccino
/

tts

Sleeping

File size: 6,493 Bytes

import gradio as gr
import argparse
import yaml
from vietTTS.hifigan.mel2wave import mel2wave
from vietTTS.nat.text2mel import text2mel
from vietTTS.synthesizer import nat_normalize_text
import numpy as np
import gradio as gr
import re
from vietnam_number import n2w
from vietnam_number import n2w_single
from synthesize import synthesizer
import noisereduce as nr
import os
import scipy.io.wavfile as wavf
from scipy.io import wavfile


TITLE = "Saltlux Text to Speech"
DESCRIPTION = "SLT Vietnamese Text to speech demo."
    
    
class GradioApplication:
    def __init__(self):
    
        inputs = prepare_input()
        outputs = prepare_output()

        self.iface = gr.Interface(fn=self.infer,
                                  title=TITLE,
                                  description=DESCRIPTION,
                                  inputs=inputs,
                                  outputs=outputs,
                                  allow_flagging='never')
        
  
    
    def infer(self, text, lang, duration_rate):
        if(lang == "Tacotron2"):
            return using_tacotron(text)
        else :
            return using_viettts(text,duration_rate)
        return 1     

    def run(self):
        try:
            self.iface.launch(debug=False)
        
        except KeyboardInterrupt:
            gr.close_all()


def prepare_input():
    text_input = gr.Textbox(lines=2,
                            placeholder="Lựa chọn model test - VietTTS và Tacotron 2 + Univnet",
                            value="Thành phố muốn thí điểm thu thuế bất động sản thứ 2, tự quyết nhiều quyết định đầu tư để thu hút nguồn vốn tư nhân",
                            label="Text")
    lang_input = gr.Radio(['VietTTS', 'Tacotron2'],
                          type='value',
                          value=None,
                          label="Model select")
    duration_rate_input = gr.Slider(minimum=0.2,
                                    maximum=1,
                                    step=0.1,
                                    value=1.0,
                                    label="Duration (The bigger the value, the slower the speech) - only for vietTTS")
    
    return [text_input, lang_input, duration_rate_input]


def prepare_output():
    outputs = [gr.Audio(label="Output before denoise"),gr.Audio(label="Output after denoise")]
    return outputs
def text_to_speech(text,stop_duration):
    print("starting")
    # prevent too long text
    if len(text) > 500:
        text = text[:500]
    # text_to_speech_tacotron(text)
    # stop_duration_float = float(stop_duration_text)
    text = clean_text(text)
    text = nat_normalize_text(text)
    mel = text2mel(
        text,
        "lexicon.txt",
        stop_duration,
        "acoustic_latest_ckpt.pickle",
        "duration_latest_ckpt.pickle",
    )
    wave = mel2wave(mel, "config.json", "hk_hifi.pickle")
    return (wave * (2**15)).astype(np.int16)

def text_to_speech_tacotron(text):
    print("starting")
    # prevent too long text
    if len(text) > 500:
        text = text[:500]

    wav = synthesizer.tts(text) 
    output = './out.wav'

    synthesizer.save_wav(wav,output)
    return output



def using_viettts(text,stop_duration):
    y = text_to_speech(text,stop_duration)
    fs = 16000
    output = './out.wav'
    output_denoise = './output_denoise.wav'

    wavf.write(output, fs, y)
    rate, data = wavfile.read(output)
  # perform noise reduction
    reduced_noise = nr.reduce_noise(y=data, sr=rate)
    wavfile.write(output_denoise, rate, reduced_noise)
    return (output,output_denoise)

def using_tacotron(text):
    y = text_to_speech_tacotron(text)
    output_denoise = "./output_denoise.wav"
    rate, data = wavfile.read(y)
  # perform noise reduction
    reduced_noise = nr.reduce_noise(y=data, sr=rate)
    wavfile.write(output_denoise, rate, reduced_noise)
    return (y,output_denoise)


def clean_text(test_string):
  list_word = test_string.split()
  # print(list_word)

  regex = r"\d{2}(?P<sep>[-/])\d{1,2}(?P=sep)\d{4}"

  for word in list_word :
    try:
      # print(word)
      searchbox_result = re.match(regex, word)
      day = searchbox_result.group(0)
      day2 = day
      day2 = day2.replace('/',' ').replace('-',' ')
      list_date = day2.split(' ')
      date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' năm ' + n2w(list_date[2])
      # print(date_result)
      test_string = test_string.replace(word, date_result)

    except AttributeError:
      # print(word) 
      # print("can't make a group")
      continue


  # print(test_string)

  regex2 = r"\d{2}(?P<sep>[-/])\d{1,2}"

  for word in list_word :
    try:
      # print(word)
      searchbox_result = re.match(regex2, word)
      day = searchbox_result.group(0)
      day2 = day
      day2 = day2.replace('/',' ').replace('-',' ')
      list_date = day2.split(' ')
      date_result = 'Ngày ' + n2w(list_date[0]) + ' tháng ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1])
      # print(date_result)
      test_string = test_string.replace(word, date_result)

    except AttributeError:
      # print(word) 
      # print("can't make a group")
      continue


  # print(test_string)

  regex3 = r"\d{1,2}(?P<sep>[h:])\d{1,2}"

  for word in list_word :
    try:
      # print(word)
      searchbox_result = re.match(regex3, word)
      day = searchbox_result.group(0)
      day2 = day
      day2 = day2.replace('h',' ').replace(':',' ')
      list_date = day2.split(' ')
      date_result = n2w(list_date[0]) + ' giờ ' + n2w(list_date[1].replace('0','') if list_date[1].startswith('0') else list_date[1]) + ' phút '
      # print(date_result)
      test_string = test_string.replace(word, date_result)

    except AttributeError:
      # print(word) 
      # print("can't make a group")
      continue


  print(test_string)

  for word in list_word :
    try:
      if word.isdigit() :
        # print(word)
        text_result = n2w_single(word)
        # print(text_result)
        test_string = test_string.replace(word, text_result, 1)

    except AttributeError:
      # print(word) 
      print("can't make a group")
      continue


  return test_string



if __name__ == '__main__':
    # args = parse_args()
    
    gradio_application = GradioApplication()
    gradio_application.run()