onair-audio-caption-v1 / audiocaps_config.yaml
rippertnt's picture
Upload 2 files
896d9d0 verified
raw
history blame contribute delete
768 Bytes
tokenizer:
type: text_tokenizer.DictTokenizer
args:
max_length: 20
target_sr: 16000
model:
args:
shared_dim: 1024
tchr_dim: 768
model:
args: {}
decoder:
args:
attn_emb_dim: 1408
dropout: 0.2
emb_dim: 256
fc_emb_dim: 1408
nlayers: 2
tie_weights: true
vocab_size: 4981
type: models.transformer_decoder.TransformerDecoder
encoder:
args:
freeze: false
pretrained: true
type: models.cnn_encoder.EfficientNetB2
type: models.transformer_model.TransformerModel
type: models.kd_wrapper.ContraEncoderKdWrapper