|
{ |
|
"architectures": [ |
|
"FastSpeech2ConformerModel" |
|
], |
|
"decoder_config": { |
|
"attention_dropout_rate": 0.2, |
|
"concat_after": false, |
|
"dropout_rate": 0.2, |
|
"kernel_size": 31, |
|
"layers": 4, |
|
"linear_units": 1536, |
|
"normalize_before": false, |
|
"num_attention_heads": 2, |
|
"positional_dropout_rate": 0.2 |
|
}, |
|
"duration_predictor_channels": 256, |
|
"duration_predictor_dropout_rate": 0.2, |
|
"duration_predictor_kernel_size": 3, |
|
"duration_predictor_layers": 2, |
|
"encoder_config": { |
|
"attention_dropout_rate": 0.2, |
|
"concat_after": false, |
|
"dropout_rate": 0.2, |
|
"kernel_size": 7, |
|
"layers": 4, |
|
"linear_units": 1536, |
|
"normalize_before": false, |
|
"num_attention_heads": 2, |
|
"positional_dropout_rate": 0.2 |
|
}, |
|
"encoder_layers": 4, |
|
"encoder_num_attention_heads": 2, |
|
"energy_embed_dropout": 0.0, |
|
"energy_embed_kernel_size": 1, |
|
"energy_predictor_channels": 256, |
|
"energy_predictor_dropout": 0.5, |
|
"energy_predictor_kernel_size": 3, |
|
"energy_predictor_layers": 2, |
|
"hidden_size": 384, |
|
"input_dim": 78, |
|
"is_encoder_decoder": true, |
|
"max_source_positions": 5000, |
|
"model_type": "fastspeech2_conformer", |
|
"num_languages": null, |
|
"num_mel_bins": 80, |
|
"num_speakers": null, |
|
"pitch_embed_dropout": 0.0, |
|
"pitch_embed_kernel_size": 1, |
|
"pitch_predictor_channels": 256, |
|
"pitch_predictor_dropout": 0.5, |
|
"pitch_predictor_kernel_size": 5, |
|
"pitch_predictor_layers": 5, |
|
"positionwise_conv_kernel_size": 3, |
|
"reduction_factor": 1, |
|
"speaker_embed_dim": null, |
|
"speaking_speed": 1.0, |
|
"speech_decoder_postnet_dropout": 0.5, |
|
"speech_decoder_postnet_kernel": 5, |
|
"speech_decoder_postnet_layers": 5, |
|
"speech_decoder_postnet_units": 256, |
|
"stop_gradient_from_energy_predictor": false, |
|
"stop_gradient_from_pitch_predictor": true, |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.33.0.dev0", |
|
"use_cnn_in_conformer": true, |
|
"use_macaron_style_in_conformer": true, |
|
"use_masking": true, |
|
"use_weighted_masking": false, |
|
"vocab_size": 78 |
|
} |
|
|