{ "_name_or_path": "siglip-model", "architectures": [ "SiglipVisionModel" ], "image_grid_pinpoints": [ [384,768], [384,1152], [384,1536], [384,1920], [384,2304], [384,2688], [384,3072], [384,3456], [384,3840], [768,384], [768,768], [768,1152], [768,1536], [768,1920], [1152,384], [1152,768], [1152,1152], [1536,384], [1536,768], [1920,384], [1920,768], [2304,384], [2688,384], [3072,384], [3456,384], [3840,384] ], "mm_patch_merge_type": "spatial_unpad", "hidden_size": 1152, "image_size": 384, "intermediate_size": 4304, "model_type": "siglip_vision_model", "num_attention_heads": 16, "num_hidden_layers": 27, "patch_size": 14, "transformers_version": "4.45.0.dev0", "layer_norm_eps": 1e-6, "hidden_act": "gelu_pytorch_tanh", "projection_dim": 0, "vision_feature_layer": [-24, -20, -12, -1] }