File size: 1,414 Bytes
12da6cc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9a877b
 
 
 
 
 
 
 
 
6194d18
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import re
import unicodedata
from argparse import ArgumentParser
from pathlib import Path

import soundfile as sf

from .hifigan.mel2wave import mel2wave
from .nat.config import FLAGS
from .nat.text2mel import text2mel

parser = ArgumentParser()
parser.add_argument("--text", type=str)
parser.add_argument("--output", default="clip.wav", type=Path)
parser.add_argument("--sample-rate", default=16000, type=int)
parser.add_argument("--silence-duration", default=-1, type=float)
parser.add_argument("--lexicon-file", default=None)
args = parser.parse_args()


def nat_normalize_text(text):
    text = unicodedata.normalize("NFKC", text)
    text = text.lower().strip()
    sil = FLAGS.special_phonemes[FLAGS.sil_index]
    text = re.sub(r"[\n.,:]+", f" {sil} ", text)
    text = text.replace('"', " ")
    text = re.sub(r"\s+", " ", text)
    text = re.sub(r"[.,:;?!]+", f" {sil} ", text)
    text = re.sub("[ ]+", " ", text)
    text = re.sub(f"( {sil}+)+ ", f" {sil} ", text)
    return text.strip()
def run():
    # text = nat_normalize_text(args.text)
    text = nat_normalize_text("ahihi do ngoc")
    print("Normalized text input:", text)
    # mel = text2mel(text, args.lexicon_file, args.silence_duration)
    # mel = text2mel(text, "./assets/infore/lexicon.txt", 0.15)
    # wave = mel2wave(mel)
    # print("writing output to file", "output.wav")
    # sf.write(str("output.wav"), wave, samplerate=16000)