File size: 1,414 Bytes
12da6cc a9a877b 6194d18 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
import re
import unicodedata
from argparse import ArgumentParser
from pathlib import Path
import soundfile as sf
from .hifigan.mel2wave import mel2wave
from .nat.config import FLAGS
from .nat.text2mel import text2mel
parser = ArgumentParser()
parser.add_argument("--text", type=str)
parser.add_argument("--output", default="clip.wav", type=Path)
parser.add_argument("--sample-rate", default=16000, type=int)
parser.add_argument("--silence-duration", default=-1, type=float)
parser.add_argument("--lexicon-file", default=None)
args = parser.parse_args()
def nat_normalize_text(text):
text = unicodedata.normalize("NFKC", text)
text = text.lower().strip()
sil = FLAGS.special_phonemes[FLAGS.sil_index]
text = re.sub(r"[\n.,:]+", f" {sil} ", text)
text = text.replace('"', " ")
text = re.sub(r"\s+", " ", text)
text = re.sub(r"[.,:;?!]+", f" {sil} ", text)
text = re.sub("[ ]+", " ", text)
text = re.sub(f"( {sil}+)+ ", f" {sil} ", text)
return text.strip()
def run():
# text = nat_normalize_text(args.text)
text = nat_normalize_text("ahihi do ngoc")
print("Normalized text input:", text)
# mel = text2mel(text, args.lexicon_file, args.silence_duration)
# mel = text2mel(text, "./assets/infore/lexicon.txt", 0.15)
# wave = mel2wave(mel)
# print("writing output to file", "output.wav")
# sf.write(str("output.wav"), wave, samplerate=16000)
|