Spaces:

tobiccino
/

tts

Sleeping

App Files Files Community

tts / vietTTS /nat /config.py

tobiccino

upload

12da6cc over 1 year ago

raw

history blame

2.11 kB

	from argparse import Namespace
	from pathlib import Path
	from typing import NamedTuple

	from jax.numpy import ndarray


	class FLAGS(Namespace):
	"""Configurations"""

	duration_lstm_dim = 256
	vocab_size = 256
	duration_embed_dropout_rate = 0.5
	num_training_steps = 200_000
	postnet_dim = 512
	acoustic_decoder_dim = 512
	acoustic_encoder_dim = 256

	# dataset
	max_phoneme_seq_len = 256 * 1
	assert max_phoneme_seq_len % 256 == 0 # prevent compilation error on Colab T4 GPU
	max_wave_len = 1024 * 64 * 3

	# Montreal Forced Aligner
	special_phonemes = ["sil", "sp", "spn", " "] # [sil], [sp] [spn] [word end]
	sil_index = special_phonemes.index("sil")
	sp_index = sil_index # no use of "sp"
	word_end_index = special_phonemes.index(" ")
	_normal_phonemes = (
	[]
	+ ["a", "b", "c", "d", "e", "g", "h", "i", "k", "l"]
	+ ["m", "n", "o", "p", "q", "r", "s", "t", "u", "v"]
	+ ["x", "y", "à", "á", "â", "ã", "è", "é", "ê", "ì"]
	+ ["í", "ò", "ó", "ô", "õ", "ù", "ú", "ý", "ă", "đ"]
	+ ["ĩ", "ũ", "ơ", "ư", "ạ", "ả", "ấ", "ầ", "ẩ", "ẫ"]
	+ ["ậ", "ắ", "ằ", "ẳ", "ẵ", "ặ", "ẹ", "ẻ", "ẽ", "ế"]
	+ ["ề", "ể", "ễ", "ệ", "ỉ", "ị", "ọ", "ỏ", "ố", "ồ"]
	+ ["ổ", "ỗ", "ộ", "ớ", "ờ", "ở", "ỡ", "ợ", "ụ", "ủ"]
	+ ["ứ", "ừ", "ử", "ữ", "ự", "ỳ", "ỵ", "ỷ", "ỹ"]
	)

	# dsp
	mel_dim = 80
	n_fft = 1024
	sample_rate = 16000
	fmin = 0.0
	fmax = 8000

	# training
	batch_size = 64
	learning_rate = 1e-4
	duration_learning_rate = 1e-4
	max_grad_norm = 1.0
	weight_decay = 1e-4
	token_mask_prob = 0.1

	# ckpt
	ckpt_dir = Path("assets/infore/nat")
	data_dir = Path("train_data")


	class DurationInput(NamedTuple):
	phonemes: ndarray
	lengths: ndarray
	durations: ndarray


	class AcousticInput(NamedTuple):
	phonemes: ndarray
	lengths: ndarray
	durations: ndarray
	wavs: ndarray
	wav_lengths: ndarray
	mels: ndarray