|
|
|
|
|
|
|
|
|
|
|
|
|
from transformers import PretrainedConfig |
|
|
|
|
|
class InternImageConfig(PretrainedConfig): |
|
r""" |
|
This is the configuration class to store the configuration of a [`~InternImageModel`]. |
|
It is used to instantiate an internimage model according to the specified arguments, defining the model |
|
architecture. Instantiating a configuration with the defaults will yield a similar configuration to that of |
|
the internimage [OpenGVLab/internimage](https://huggingface.co/OpenGVLab/internimage) architecture. |
|
|
|
Configuration objects inherit from [`PretrainedConfig`] and can be used |
|
to control the model outputs. Read the documentation from [`PretrainedConfig`] |
|
for more information. |
|
|
|
Args: |
|
core_op (`str`, *optional*, defaults to `"DCNv3"`): |
|
Core operation used in the InternImageModel. |
|
depths (`tuple`, *optional*, defaults to `(4, 4, 18, 4)`): |
|
Tuple specifying the depth of layers in the InternImageModel. |
|
groups (`tuple`, *optional*, defaults to `(4, 8, 16, 32)`): |
|
Tuple specifying the group of layers in the InternImageModel. |
|
channels (`int`, *optional*, defaults to `64`): |
|
Number of channels in the InternImageModel. |
|
dw_kernel_size (`int`, *optional*, defaults to `None`): |
|
Kernel size for depthwise convolutions. |
|
layer_scale (`float`, *optional*, defaults to `None`): |
|
Scale of the layers in the model. |
|
offset_scale (`float`, *optional*, defaults to `1.0`): |
|
Offset scale in the model. |
|
mlp_ratio (`float`, *optional*, defaults to `4.0`): |
|
Ratio of mlp layers in the InternImageModel. |
|
post_norm (`bool`, *optional*, defaults to `False`): |
|
Whether to use post normalization in the model. |
|
level2_post_norm (`bool`, *optional*, defaults to `False`): |
|
Whether to use level 2 post normalization. |
|
level2_post_norm_block_ids (`list`, *optional*, defaults to `None`): |
|
Specific block IDs for level 2 post normalization. |
|
center_feature_scale (`bool`, *optional*, defaults to `False`): |
|
Whether to apply center feature scaling. |
|
use_clip_projector (`bool`, *optional*, defaults to `False`): |
|
Whether to use CLIP projector. |
|
remove_center (`bool`, *optional*, defaults to `False`): |
|
Whether to remove center pixels in some operations. |
|
num_classes (`int`, *optional*, defaults to `1000`): |
|
Number of classes for the model output. |
|
drop_rate (`float`, *optional*, defaults to `0.0`): |
|
Dropout rate in the model. |
|
drop_path_rate (`float`, *optional*, defaults to `0.0`): |
|
Dropout path rate in the model. |
|
drop_path_type (`str`, *optional*, defaults to `"linear"`): |
|
Type of dropout path used in the model. |
|
act_layer (`str`, *optional*, defaults to `"GELU"`): |
|
Activation function used in the model. |
|
norm_layer (`str`, *optional*, defaults to `"LN"`): |
|
Normalization layer used in the model. |
|
cls_scale (`float`, *optional*, defaults to `1.5`): |
|
Scale of the classification layer in the model. |
|
with_cp (`bool`, *optional*, defaults to `False`): |
|
Whether to use checkpointing in the model. |
|
""" |
|
model_type = 'internimage' |
|
|
|
def __init__( |
|
self, |
|
core_op='DCNv3', |
|
depths=(4, 4, 18, 4), |
|
groups=(4, 8, 16, 32), |
|
channels=64, |
|
dw_kernel_size=None, |
|
layer_scale=None, |
|
offset_scale=1.0, |
|
mlp_ratio=4.0, |
|
post_norm=False, |
|
res_post_norm=False, |
|
level2_post_norm=False, |
|
level2_post_norm_block_ids=None, |
|
center_feature_scale=False, |
|
use_clip_projector=False, |
|
remove_center=False, |
|
num_classes=1000, |
|
drop_rate=0.0, |
|
drop_path_rate=0.0, |
|
drop_path_type='linear', |
|
act_layer='GELU', |
|
norm_layer='LN', |
|
cls_scale=1.5, |
|
with_cp=False, |
|
**kwargs, |
|
): |
|
super().__init__(**kwargs) |
|
|
|
|
|
self.core_op = core_op |
|
self.depths = depths |
|
self.groups = groups |
|
self.channels = channels |
|
self.dw_kernel_size = dw_kernel_size |
|
self.layer_scale = layer_scale |
|
self.offset_scale = offset_scale |
|
self.mlp_ratio = mlp_ratio |
|
self.post_norm = post_norm |
|
self.res_post_norm = res_post_norm |
|
self.level2_post_norm = level2_post_norm |
|
self.level2_post_norm_block_ids = level2_post_norm_block_ids |
|
self.center_feature_scale = center_feature_scale |
|
self.use_clip_projector = use_clip_projector |
|
self.remove_center = remove_center |
|
self.num_classes = num_classes |
|
self.drop_rate = drop_rate |
|
self.drop_path_rate = drop_path_rate |
|
self.drop_path_type = drop_path_type |
|
self.act_layer = act_layer |
|
self.norm_layer = norm_layer |
|
self.cls_scale = cls_scale |
|
self.with_cp = with_cp |
|
|