{ | |
"_name_or_path": "siglip-model", | |
"architectures": [ | |
"SiglipVisionModel" | |
], | |
"image_grid_pinpoints": [ | |
[384,768], | |
[384,1152], | |
[384,1536], | |
[384,1920], | |
[384,2304], | |
[384,2688], | |
[384,3072], | |
[384,3456], | |
[384,3840], | |
[768,384], | |
[768,768], | |
[768,1152], | |
[768,1536], | |
[768,1920], | |
[1152,384], | |
[1152,768], | |
[1152,1152], | |
[1536,384], | |
[1536,768], | |
[1920,384], | |
[1920,768], | |
[2304,384], | |
[2688,384], | |
[3072,384], | |
[3456,384], | |
[3840,384] | |
], | |
"mm_patch_merge_type": "spatial_unpad", | |
"hidden_size": 1152, | |
"image_size": 384, | |
"intermediate_size": 4304, | |
"model_type": "siglip_vision_model", | |
"num_attention_heads": 16, | |
"num_hidden_layers": 27, | |
"patch_size": 14, | |
"transformers_version": "4.45.0.dev0", | |
"layer_norm_eps": 1e-6, | |
"hidden_act": "gelu_pytorch_tanh", | |
"projection_dim": 0, | |
"vision_feature_layer": [-24, -20, -12, -1] | |
} | |