tts / vietTTS /synthesizer.py
tobiccino's picture
upload
12da6cc
raw
history blame
1.27 kB
import re
import unicodedata
from argparse import ArgumentParser
from pathlib import Path
import soundfile as sf
from .hifigan.mel2wave import mel2wave
from .nat.config import FLAGS
from .nat.text2mel import text2mel
parser = ArgumentParser()
parser.add_argument("--text", type=str)
parser.add_argument("--output", default="clip.wav", type=Path)
parser.add_argument("--sample-rate", default=16000, type=int)
parser.add_argument("--silence-duration", default=-1, type=float)
parser.add_argument("--lexicon-file", default=None)
args = parser.parse_args()
def nat_normalize_text(text):
text = unicodedata.normalize("NFKC", text)
text = text.lower().strip()
sil = FLAGS.special_phonemes[FLAGS.sil_index]
text = re.sub(r"[\n.,:]+", f" {sil} ", text)
text = text.replace('"', " ")
text = re.sub(r"\s+", " ", text)
text = re.sub(r"[.,:;?!]+", f" {sil} ", text)
text = re.sub("[ ]+", " ", text)
text = re.sub(f"( {sil}+)+ ", f" {sil} ", text)
return text.strip()
text = nat_normalize_text(args.text)
print("Normalized text input:", text)
mel = text2mel(text, args.lexicon_file, args.silence_duration)
wave = mel2wave(mel)
print("writing output to file", args.output)
sf.write(str(args.output), wave, samplerate=args.sample_rate)