|
train: |
|
seed: 1234 |
|
epochs: 100 |
|
batch_size: 6 |
|
gradient_accumulation: 4 |
|
save_every_n_epoch: 1 |
|
precision: 32 |
|
gradient_clip: 1.0 |
|
optimizer: |
|
lr: 0.01 |
|
lr_init: 0.00001 |
|
lr_end: 0.0001 |
|
warmup_steps: 2000 |
|
decay_steps: 40000 |
|
data: |
|
max_eval_sample: 8 |
|
max_sec: 40 |
|
num_workers: 1 |
|
pad_val: 1024 |
|
model: |
|
saving_path: "ckpt/" |
|
resume_checkpoint: null |
|
vocoder_config_path: "quantizer/new_ckpt/config.json" |
|
vocoder_ckpt_path: "quantizer/new_ckpt/g_00600000" |
|
datadir: "/home/liweiche/GigaSpeech/wavs" |
|
metapath: "/home/liweiche/GigaSpeech/train2.json" |
|
val_metapath: "/home/liweiche/GigaSpeech/dev2.json" |
|
sampledir: "logs/" |
|
pretrained_path: null |
|
lr: 0.0001 |
|
batch_size: 200.0 |
|
train_bucket_size: 8192 |
|
training_step: 800000 |
|
optim_flat_percent: 0.0 |
|
warmup_step: 50 |
|
adam_beta1: 0.9 |
|
adam_beta2: 0.98 |
|
ffd_size: 3072 |
|
hidden_size: 768 |
|
enc_nlayers: 6 |
|
dec_nlayers: 6 |
|
nheads: 12 |
|
ar_layer: 4 |
|
ar_ffd_size: 1024 |
|
ar_hidden_size: 256 |
|
ar_nheads: 4 |
|
aligner_softmax_temp: 1.0 |
|
layer_norm_eps: 0.00001 |
|
speaker_embed_dropout: 0.05 |
|
label_smoothing: 0.0 |
|
val_check_interval: 5000 |
|
check_val_every_n_epoch: 1 |
|
precision: "fp16" |
|
nworkers: 16 |
|
distributed: true |
|
accelerator: "ddp" |
|
version: null |
|
accumulate_grad_batches: 1 |
|
use_repetition_token: true |
|
use_repetition_gating: false |
|
repetition_penalty: 1.0 |
|
sampling_temperature: 1.0 |
|
top_k: -1 |
|
min_top_k: 3 |
|
top_p: 0.8 |
|
sample_num: 4 |
|
length_penalty_max_length: 15000 |
|
length_penalty_max_prob: 0.95 |
|
max_input_length: 2048 |
|
max_output_length: 2000 |
|
sample_rate: 16000 |
|
n_codes: 1024 |
|
n_cluster_groups: 1 |
|
phone_context_window: 4 |
|
phoneset_size: 1000 |
|
inference: |
|
top_k: 5 |
|
|