File size: 1,268 Bytes
12da6cc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import re
import unicodedata
from argparse import ArgumentParser
from pathlib import Path
import soundfile as sf
from .hifigan.mel2wave import mel2wave
from .nat.config import FLAGS
from .nat.text2mel import text2mel
parser = ArgumentParser()
parser.add_argument("--text", type=str)
parser.add_argument("--output", default="clip.wav", type=Path)
parser.add_argument("--sample-rate", default=16000, type=int)
parser.add_argument("--silence-duration", default=-1, type=float)
parser.add_argument("--lexicon-file", default=None)
args = parser.parse_args()
def nat_normalize_text(text):
text = unicodedata.normalize("NFKC", text)
text = text.lower().strip()
sil = FLAGS.special_phonemes[FLAGS.sil_index]
text = re.sub(r"[\n.,:]+", f" {sil} ", text)
text = text.replace('"', " ")
text = re.sub(r"\s+", " ", text)
text = re.sub(r"[.,:;?!]+", f" {sil} ", text)
text = re.sub("[ ]+", " ", text)
text = re.sub(f"( {sil}+)+ ", f" {sil} ", text)
return text.strip()
text = nat_normalize_text(args.text)
print("Normalized text input:", text)
mel = text2mel(text, args.lexicon_file, args.silence_duration)
wave = mel2wave(mel)
print("writing output to file", args.output)
sf.write(str(args.output), wave, samplerate=args.sample_rate)
|