tokenizer: | |
type: text_tokenizer.DictTokenizer | |
args: | |
max_length: 20 | |
target_sr: 16000 | |
model: | |
args: | |
shared_dim: 1024 | |
tchr_dim: 768 | |
model: | |
args: {} | |
decoder: | |
args: | |
attn_emb_dim: 1408 | |
dropout: 0.2 | |
emb_dim: 256 | |
fc_emb_dim: 1408 | |
nlayers: 2 | |
tie_weights: true | |
vocab_size: 4981 | |
type: models.transformer_decoder.TransformerDecoder | |
encoder: | |
args: | |
freeze: false | |
pretrained: true | |
type: models.cnn_encoder.EfficientNetB2 | |
type: models.transformer_model.TransformerModel | |
type: models.kd_wrapper.ContraEncoderKdWrapper | |