add_qkv_bias: true asr_adapter: llamamlp attn_dropout: 0.0 bias: false block_size: 2048 force_align: false gelu_approximate: none head_size: 64 hf_config: name: Qwen2-0.5B org: Qwen intermediate_size: 4864 lm_head_bias: false mlp_class_name: LLaMAMLP n_embd: 896 n_expert: 0 n_expert_per_token: 0 n_head: 14 n_layer: 24 n_query_groups: 2 name: Qwen2-0.5B norm_class_name: RMSNorm norm_eps: 1.0e-06 padded_vocab_size: 181120 padding_multiple: 512 parallel_residual: false pos_type: rope post_adapter: false post_adapter_layers: 6 prompt_vocab_size: null rope_base: 1000000 rope_condense_ratio: 1 rotary_percentage: 1 scale_embeddings: false shared_attention_norm: false tie_word_embeddings: true use_pretrain_phoneme_emb: false vocab_size: 50254 text_vocab_size: 152000 cat_audio_vocab_size: 29120 audio_vocab_size: 4160 whisper_adapter_dim: 768