tts / vietTTS /synthesizer.py
tobiccino's picture
update
a7960cf
raw
history blame
1.41 kB
import re
import unicodedata
from argparse import ArgumentParser
from pathlib import Path
import soundfile as sf
from .hifigan.mel2wave import mel2wave
from .nat.config import FLAGS
from .nat.text2mel import text2mel
parser = ArgumentParser()
parser.add_argument("--text", type=str)
parser.add_argument("--output", default="clip.wav", type=Path)
parser.add_argument("--sample-rate", default=16000, type=int)
parser.add_argument("--silence-duration", default=-1, type=float)
parser.add_argument("--lexicon-file", default=None)
args = parser.parse_args()
def nat_normalize_text(text):
text = unicodedata.normalize("NFKC", text)
text = text.lower().strip()
sil = FLAGS.special_phonemes[FLAGS.sil_index]
text = re.sub(r"[\n.,:]+", f" {sil} ", text)
text = text.replace('"', " ")
text = re.sub(r"\s+", " ", text)
text = re.sub(r"[.,:;?!]+", f" {sil} ", text)
text = re.sub("[ ]+", " ", text)
text = re.sub(f"( {sil}+)+ ", f" {sil} ", text)
return text.strip()
def run():
# text = nat_normalize_text(args.text)
text = nat_normalize_text("ahihi do ngoc")
print("Normalized text input:", text)
# mel = text2mel(text, args.lexicon_file, args.silence_duration)
# mel = text2mel(text, "./assets/infore/lexicon.txt", 0.15)
# wave = mel2wave(mel)
# print("writing output to file", "output.wav")
# sf.write(str("output.wav"), wave, samplerate=16000)