VL3-SigLIP-NaViT / config.json
ClownRat's picture
Upload model
0e04069 verified
raw
history blame contribute delete
602 Bytes
{
"architectures": [
"Videollama3VisionEncoderModel"
],
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_videollama3_encoder.Videollama3VisionEncoderConfig",
"AutoModel": "modeling_videollama3_encoder.Videollama3VisionEncoderModel"
},
"hidden_act": "gelu_pytorch_tanh",
"hidden_size": 1152,
"intermediate_size": 4304,
"layer_norm_eps": 1e-06,
"model_type": "videollama3_vision_encoder",
"num_attention_heads": 16,
"num_channels": 3,
"num_hidden_layers": 27,
"patch_size": 14,
"torch_dtype": "bfloat16",
"transformers_version": "4.46.3"
}