import re import unicodedata from argparse import ArgumentParser from pathlib import Path import soundfile as sf from .hifigan.mel2wave import mel2wave from .nat.config import FLAGS from .nat.text2mel import text2mel parser = ArgumentParser() parser.add_argument("--text", type=str) parser.add_argument("--output", default="clip.wav", type=Path) parser.add_argument("--sample-rate", default=16000, type=int) parser.add_argument("--silence-duration", default=-1, type=float) parser.add_argument("--lexicon-file", default=None) args = parser.parse_args() def nat_normalize_text(text): text = unicodedata.normalize("NFKC", text) text = text.lower().strip() sil = FLAGS.special_phonemes[FLAGS.sil_index] text = re.sub(r"[\n.,:]+", f" {sil} ", text) text = text.replace('"', " ") text = re.sub(r"\s+", " ", text) text = re.sub(r"[.,:;?!]+", f" {sil} ", text) text = re.sub("[ ]+", " ", text) text = re.sub(f"( {sil}+)+ ", f" {sil} ", text) return text.strip() text = nat_normalize_text(args.text) print("Normalized text input:", text) mel = text2mel(text, args.lexicon_file, args.silence_duration) wave = mel2wave(mel) print("writing output to file", args.output) sf.write(str(args.output), wave, samplerate=args.sample_rate)