internimage_g_22kto1k_512 / configuration_internimage.py

Upload models

bd51328 verified 7 days ago

5.39 kB

	# --------------------------------------------------------
	# InternImage
	# Copyright (c) 2025 OpenGVLab
	# Licensed under The MIT License [see LICENSE for details]
	# --------------------------------------------------------

	from transformers import PretrainedConfig


	class InternImageConfig(PretrainedConfig):
	r"""
	This is the configuration class to store the configuration of a [`~InternImageModel`].
	It is used to instantiate an internimage model according to the specified arguments, defining the model
	architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of
	the internimage [OpenGVLab/internimage](https://huggingface.co/OpenGVLab/internimage) architecture.

	Configuration objects inherit from [`PretrainedConfig`] and can be used
	to control the model outputs. Read the documentation from [`PretrainedConfig`]
	for more information.

	Args:
	core_op (`str`, optional, defaults to `"DCNv3"`):
	Core operation used in the InternImageModel.
	depths (`tuple`, optional, defaults to `(4, 4, 18, 4)`):
	Tuple specifying the depth of layers in the InternImageModel.
	groups (`tuple`, optional, defaults to `(4, 8, 16, 32)`):
	Tuple specifying the group of layers in the InternImageModel.
	channels (`int`, optional, defaults to `64`):
	Number of channels in the InternImageModel.
	dw_kernel_size (`int`, optional, defaults to `None`):
	Kernel size for depthwise convolutions.
	layer_scale (`float`, optional, defaults to `None`):
	Scale of the layers in the model.
	offset_scale (`float`, optional, defaults to `1.0`):
	Offset scale in the model.
	mlp_ratio (`float`, optional, defaults to `4.0`):
	Ratio of mlp layers in the InternImageModel.
	post_norm (`bool`, optional, defaults to `False`):
	Whether to use post normalization in the model.
	level2_post_norm (`bool`, optional, defaults to `False`):
	Whether to use level 2 post normalization.
	level2_post_norm_block_ids (`list`, optional, defaults to `None`):
	Specific block IDs for level 2 post normalization.
	center_feature_scale (`bool`, optional, defaults to `False`):
	Whether to apply center feature scaling.
	use_clip_projector (`bool`, optional, defaults to `False`):
	Whether to use CLIP projector.
	remove_center (`bool`, optional, defaults to `False`):
	Whether to remove center pixels in some operations.
	num_classes (`int`, optional, defaults to `1000`):
	Number of classes for the model output.
	drop_rate (`float`, optional, defaults to `0.0`):
	Dropout rate in the model.
	drop_path_rate (`float`, optional, defaults to `0.0`):
	Dropout path rate in the model.
	drop_path_type (`str`, optional, defaults to `"linear"`):
	Type of dropout path used in the model.
	act_layer (`str`, optional, defaults to `"GELU"`):
	Activation function used in the model.
	norm_layer (`str`, optional, defaults to `"LN"`):
	Normalization layer used in the model.
	cls_scale (`float`, optional, defaults to `1.5`):
	Scale of the classification layer in the model.
	with_cp (`bool`, optional, defaults to `False`):
	Whether to use checkpointing in the model.
	"""
	model_type = 'internimage'

	def __init__(
	self,
	core_op='DCNv3',
	depths=(4, 4, 18, 4),
	groups=(4, 8, 16, 32),
	channels=64,
	dw_kernel_size=None,
	layer_scale=None,
	offset_scale=1.0,
	mlp_ratio=4.0,
	post_norm=False,
	res_post_norm=False,
	level2_post_norm=False,
	level2_post_norm_block_ids=None,
	center_feature_scale=False,
	use_clip_projector=False,
	remove_center=False,
	num_classes=1000,
	drop_rate=0.0,
	drop_path_rate=0.0,
	drop_path_type='linear',
	act_layer='GELU',
	norm_layer='LN',
	cls_scale=1.5,
	with_cp=False,
	**kwargs,
	):
	super().__init__(**kwargs)

	# Model configuration parameters
	self.core_op = core_op
	self.depths = depths
	self.groups = groups
	self.channels = channels
	self.dw_kernel_size = dw_kernel_size
	self.layer_scale = layer_scale
	self.offset_scale = offset_scale
	self.mlp_ratio = mlp_ratio
	self.post_norm = post_norm
	self.res_post_norm = res_post_norm
	self.level2_post_norm = level2_post_norm
	self.level2_post_norm_block_ids = level2_post_norm_block_ids
	self.center_feature_scale = center_feature_scale
	self.use_clip_projector = use_clip_projector
	self.remove_center = remove_center
	self.num_classes = num_classes
	self.drop_rate = drop_rate
	self.drop_path_rate = drop_path_rate
	self.drop_path_type = drop_path_type
	self.act_layer = act_layer
	self.norm_layer = norm_layer
	self.cls_scale = cls_scale
	self.with_cp = with_cp