Spaces:
Runtime error
Runtime error
File size: 1,357 Bytes
64e7f2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
base_config:
- egs/egs_bases/tts/vocoder/pwg.yaml
- egs/egs_bases/tts/base_mel2wav.yaml
- egs/datasets/audio/lj/pwg.yaml
raw_data_dir: 'data/raw/LJSpeech-1.1'
processed_data_dir: 'data/processed/LJSpeech_FastDiff'
#binary_data_dir: 'data/binary/LJSpeech_Taco'
binary_data_dir: /apdcephfs/private_nlphuang/preprocess/AdaGrad/data/binary/LJSpeech_Taco
binarizer_cls: data_gen.tts.vocoder_binarizer.VocoderBinarizer
pre_align_cls: egs.datasets.audio.lj.pre_align.LJPreAlign
task_cls: modules.FastDiff.task.FastDiff.FastDiffTask
binarization_args:
with_wav: true
with_spk_embed: false
with_align: false
with_word: false
with_txt: false
with_f0: false
# data
num_spk: 400
max_samples: 25600
aux_context_window: 0
max_sentences: 20
test_input_dir: '' # 'wavs' # wav->wav inference
test_mel_dir: '' # 'mels' # mel->wav inference
use_wav: True # mel->wav inference
# training
num_sanity_val_steps: -1
max_updates: 1000000
lr: 2e-4
weight_decay: 0
# FastDiff
audio_channels: 1
inner_channels: 32
cond_channels: 80
upsample_ratios: [8, 8, 4]
lvc_layers_each_block: 4
lvc_kernel_size: 3
kpnet_hidden_channels: 64
kpnet_conv_size: 3
dropout: 0.0
diffusion_step_embed_dim_in: 128
diffusion_step_embed_dim_mid: 512
diffusion_step_embed_dim_out: 512
use_weight_norm: True
# Diffusion
T: 1000
beta_0: 0.000001
beta_T: 0.01
noise_schedule: ''
N: ''
|