MolmoE-1B-0924 / config.json
Muennighoff's picture
Add
d13896f
raw
history blame
3.76 kB
{
"auto_map": {
"AutoConfig": "config_molmoe.MolmoConfig",
"AutoModelForCausalLM": "modeling_molmoe.MolmoForCausalLM"
},
"activation_type": "swiglu",
"additional_vocab_size": 128,
"alibi": false,
"alibi_bias_max": 8.0,
"always_start_with_space": true,
"architectures": [
"OLMoForCausalLM"
],
"attention_dropout": 0.0,
"attention_layer_norm": true,
"attention_layer_norm_with_affine": true,
"attention_type": "sdpa",
"attn_logit_softcapping": null,
"bias_for_layer_norm": false,
"block_group_size": 1,
"block_type": "moe",
"clip_qkv": null,
"crop_mode": "overlap-and-resize-c2",
"d_model": 2048,
"default_inference_len": 65,
"do_random_scale": false,
"embedding_dropout": 0.0,
"embedding_size": 50304,
"final_logit_softcapping": null,
"fix_image_input_idx": 2,
"float32_attention": true,
"gin_bindings": null,
"head_dim": null,
"image_feature_dropout": 0.0,
"image_padding_embed": "pad_and_partial_pad",
"image_pooling_2d": "attention-meanq",
"image_pooling_h": 2,
"image_pooling_w": 2,
"image_projector": "mlp",
"include_bias": false,
"init_cutoff_factor": 3.0,
"init_device": "meta",
"init_fn": "normal",
"init_std": 0.02,
"initializer_range": 0.02,
"layer_norm_eps": 1e-05,
"layer_norm_type": "rms",
"layer_norm_with_affine": true,
"llm_load_path": null,
"loss_token_weighting": "root_subsegments",
"low_cpu_fsdp": true,
"max_crops": 12,
"max_position_embeddings": 32768,
"max_sequence_length": 4096,
"message_formatting": "role",
"mlp_hidden_size": null,
"mlp_ratio": 1,
"model_type": "molmo",
"moe_capacity_factor": 1.25,
"moe_dropless": true,
"moe_interleave": false,
"moe_lbl_in_fp32": false,
"moe_log_expert_assignment": false,
"moe_loss_weight": 0.0,
"moe_mlp_impl": "sparse",
"moe_num_experts": 64,
"moe_shared_expert": false,
"moe_top_k": 8,
"moe_zloss_weight": 0.0,
"multi_query_attention": null,
"n_heads": 16,
"n_kv_heads": null,
"n_layers": 16,
"new_embedding_init_range": 0.02,
"norm_after": false,
"normalize_input_embeds": false,
"overlap_margins": [
4,
4
],
"pad_to": null,
"pad_token_id": 1,
"pad_tokenizer": false,
"precision": "amp_bf16",
"prompt_override": null,
"prompt_type": "uber_model",
"qkv_bias": false,
"query_pre_attn_scalar": 224,
"residual_dropout": 0.1,
"response_attention_dropout": 0.0,
"response_residual_dropout": 0.0,
"rope": true,
"rope_full_precision": true,
"rope_impl": "llama",
"rope_theta": 10000.0,
"scale_logits": false,
"system_prompt_kind": "demo_or_style",
"tokenizer": {
"identifier": "allenai/gpt-neox-olmo-dolma-v1_5",
"olmo_bos_token_id": null,
"olmo_eos_token_id": null,
"tokenizer_adds_space": false,
"tokenizer_dir": null,
"truncate_direction": "right"
},
"transformers_version": "4.45.0.dev0",
"unconditioned": false,
"use_cache": true,
"use_cls_feature": false,
"use_col_tokens": true,
"use_position_ids": true,
"vision_backbone": {
"attention_dropout": 0.0,
"fsdp_wrap": false,
"image_default_input_size": [
336,
336
],
"image_dropout_rate": 0.0,
"image_emb_dim": 1024,
"image_head_dim": 64,
"image_mlp_activations": "quick_gelu",
"image_mlp_dim": 4096,
"image_model_type": "openai",
"image_norm_eps": 1e-05,
"image_num_heads": 16,
"image_num_key_value_heads": 16,
"image_num_layers": 23,
"image_num_pos": 577,
"image_patch_size": 14,
"image_pos_patch_size": 14,
"initializer_range": 0.02,
"residual_dropout": 0.0,
"resize_mode": "default"
},
"vit_layers": [
-2,
-9
],
"vit_load_path": null,
"vocab_size": 50280,
"weight_tying": false
}