# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle import paddle.nn as nn from paddle import ParamAttr from paddle.regularizer import L2Decay from paddle.nn import AdaptiveAvgPool2D, BatchNorm, Conv2D, Dropout, Linear from paddleseg.cvlibs import manager from paddleseg.utils import utils, logger from paddleseg.models import layers __all__ = [ "MobileNetV3_small_x0_35", "MobileNetV3_small_x0_5", "MobileNetV3_small_x0_75", "MobileNetV3_small_x1_0", "MobileNetV3_small_x1_25", "MobileNetV3_large_x0_35", "MobileNetV3_large_x0_5", "MobileNetV3_large_x0_75", "MobileNetV3_large_x1_0", "MobileNetV3_large_x1_25" ] MODEL_STAGES_PATTERN = { "MobileNetV3_small": ["blocks[0]", "blocks[2]", "blocks[7]", "blocks[10]"], "MobileNetV3_large": ["blocks[0]", "blocks[2]", "blocks[5]", "blocks[11]", "blocks[14]"] } # "large", "small" is just for MobinetV3_large, MobileNetV3_small respectively. # The type of "large" or "small" config is a list. Each element(list) represents a depthwise block, which is composed of k, exp, se, act, s. # k: kernel_size # exp: middle channel number in depthwise block # c: output channel number in depthwise block # se: whether to use SE block # act: which activation to use # s: stride in depthwise block # d: dilation rate in depthwise block NET_CONFIG = { "large": [ # k, exp, c, se, act, s [3, 16, 16, False, "relu", 1], [3, 64, 24, False, "relu", 2], [3, 72, 24, False, "relu", 1], # x4 [5, 72, 40, True, "relu", 2], [5, 120, 40, True, "relu", 1], [5, 120, 40, True, "relu", 1], # x8 [3, 240, 80, False, "hardswish", 2], [3, 200, 80, False, "hardswish", 1], [3, 184, 80, False, "hardswish", 1], [3, 184, 80, False, "hardswish", 1], [3, 480, 112, True, "hardswish", 1], [3, 672, 112, True, "hardswish", 1], # x16 [5, 672, 160, True, "hardswish", 2], [5, 960, 160, True, "hardswish", 1], [5, 960, 160, True, "hardswish", 1], # x32 ], "small": [ # k, exp, c, se, act, s [3, 16, 16, True, "relu", 2], [3, 72, 24, False, "relu", 2], [3, 88, 24, False, "relu", 1], [5, 96, 40, True, "hardswish", 2], [5, 240, 40, True, "hardswish", 1], [5, 240, 40, True, "hardswish", 1], [5, 120, 48, True, "hardswish", 1], [5, 144, 48, True, "hardswish", 1], [5, 288, 96, True, "hardswish", 2], [5, 576, 96, True, "hardswish", 1], [5, 576, 96, True, "hardswish", 1], ], "large_os8": [ # k, exp, c, se, act, s, {d} [3, 16, 16, False, "relu", 1], [3, 64, 24, False, "relu", 2], [3, 72, 24, False, "relu", 1], # x4 [5, 72, 40, True, "relu", 2], [5, 120, 40, True, "relu", 1], [5, 120, 40, True, "relu", 1], # x8 [3, 240, 80, False, "hardswish", 1], [3, 200, 80, False, "hardswish", 1, 2], [3, 184, 80, False, "hardswish", 1, 2], [3, 184, 80, False, "hardswish", 1, 2], [3, 480, 112, True, "hardswish", 1, 2], [3, 672, 112, True, "hardswish", 1, 2], [5, 672, 160, True, "hardswish", 1, 2], [5, 960, 160, True, "hardswish", 1, 4], [5, 960, 160, True, "hardswish", 1, 4], ], "small_os8": [ # k, exp, c, se, act, s, {d} [3, 16, 16, True, "relu", 2], [3, 72, 24, False, "relu", 2], [3, 88, 24, False, "relu", 1], [5, 96, 40, True, "hardswish", 1], [5, 240, 40, True, "hardswish", 1, 2], [5, 240, 40, True, "hardswish", 1, 2], [5, 120, 48, True, "hardswish", 1, 2], [5, 144, 48, True, "hardswish", 1, 2], [5, 288, 96, True, "hardswish", 1, 2], [5, 576, 96, True, "hardswish", 1, 4], [5, 576, 96, True, "hardswish", 1, 4], ] } OUT_INDEX = {"large": [2, 5, 11, 14], "small": [0, 2, 7, 10]} def _make_divisible(v, divisor=8, min_value=None): if min_value is None: min_value = divisor new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) if new_v < 0.9 * v: new_v += divisor return new_v def _create_act(act): if act == "hardswish": return nn.Hardswish() elif act == "relu": return nn.ReLU() elif act is None: return None else: raise RuntimeError( "The activation function is not supported: {}".format(act)) class MobileNetV3(nn.Layer): """ MobileNetV3 Args: config: list. MobileNetV3 depthwise blocks config. scale: float=1.0. The coefficient that controls the size of network parameters. Returns: model: nn.Layer. Specific MobileNetV3 model depends on args. """ def __init__(self, config, stages_pattern, out_index, scale=1.0, pretrained=None): super().__init__() self.cfg = config self.out_index = out_index self.scale = scale self.pretrained = pretrained inplanes = 16 self.conv = ConvBNLayer( in_c=3, out_c=_make_divisible(inplanes * self.scale), filter_size=3, stride=2, padding=1, num_groups=1, if_act=True, act="hardswish") self.blocks = nn.Sequential(*[ ResidualUnit( in_c=_make_divisible(inplanes * self.scale if i == 0 else self.cfg[i - 1][2] * self.scale), mid_c=_make_divisible(self.scale * exp), out_c=_make_divisible(self.scale * c), filter_size=k, stride=s, use_se=se, act=act, dilation=td[0] if td else 1) for i, (k, exp, c, se, act, s, *td) in enumerate(self.cfg) ]) out_channels = [config[idx][2] for idx in self.out_index] self.feat_channels = [ _make_divisible(self.scale * c) for c in out_channels ] self.init_res(stages_pattern) self.init_weight() def init_weight(self): if self.pretrained is not None: utils.load_entire_model(self, self.pretrained) def init_res(self, stages_pattern, return_patterns=None, return_stages=None): if return_patterns and return_stages: msg = f"The 'return_patterns' would be ignored when 'return_stages' is set." logger.warning(msg) return_stages = None if return_stages is True: return_patterns = stages_pattern # return_stages is int or bool if type(return_stages) is int: return_stages = [return_stages] if isinstance(return_stages, list): if max(return_stages) > len(stages_pattern) or min( return_stages) < 0: msg = f"The 'return_stages' set error. Illegal value(s) have been ignored. The stages' pattern list is {stages_pattern}." logger.warning(msg) return_stages = [ val for val in return_stages if val >= 0 and val < len(stages_pattern) ] return_patterns = [stages_pattern[i] for i in return_stages] def forward(self, x): x = self.conv(x) feat_list = [] for idx, block in enumerate(self.blocks): x = block(x) if idx in self.out_index: feat_list.append(x) return feat_list class ConvBNLayer(nn.Layer): def __init__(self, in_c, out_c, filter_size, stride, padding, num_groups=1, if_act=True, act=None, dilation=1): super().__init__() self.conv = Conv2D( in_channels=in_c, out_channels=out_c, kernel_size=filter_size, stride=stride, padding=padding, groups=num_groups, bias_attr=False, dilation=dilation) self.bn = BatchNorm( num_channels=out_c, act=None, param_attr=ParamAttr(regularizer=L2Decay(0.0)), bias_attr=ParamAttr(regularizer=L2Decay(0.0))) self.if_act = if_act self.act = _create_act(act) def forward(self, x): x = self.conv(x) x = self.bn(x) if self.if_act: x = self.act(x) return x class ResidualUnit(nn.Layer): def __init__(self, in_c, mid_c, out_c, filter_size, stride, use_se, act=None, dilation=1): super().__init__() self.if_shortcut = stride == 1 and in_c == out_c self.if_se = use_se self.expand_conv = ConvBNLayer( in_c=in_c, out_c=mid_c, filter_size=1, stride=1, padding=0, if_act=True, act=act) self.bottleneck_conv = ConvBNLayer( in_c=mid_c, out_c=mid_c, filter_size=filter_size, stride=stride, padding=int((filter_size - 1) // 2) * dilation, num_groups=mid_c, if_act=True, act=act, dilation=dilation) if self.if_se: self.mid_se = SEModule(mid_c) self.linear_conv = ConvBNLayer( in_c=mid_c, out_c=out_c, filter_size=1, stride=1, padding=0, if_act=False, act=None) def forward(self, x): identity = x x = self.expand_conv(x) x = self.bottleneck_conv(x) if self.if_se: x = self.mid_se(x) x = self.linear_conv(x) if self.if_shortcut: x = paddle.add(identity, x) return x # nn.Hardsigmoid can't transfer "slope" and "offset" in nn.functional.hardsigmoid class Hardsigmoid(nn.Layer): def __init__(self, slope=0.2, offset=0.5): super().__init__() self.slope = slope self.offset = offset def forward(self, x): return nn.functional.hardsigmoid( x, slope=self.slope, offset=self.offset) class SEModule(nn.Layer): def __init__(self, channel, reduction=4): super().__init__() self.avg_pool = AdaptiveAvgPool2D(1) self.conv1 = Conv2D( in_channels=channel, out_channels=channel // reduction, kernel_size=1, stride=1, padding=0) self.relu = nn.ReLU() self.conv2 = Conv2D( in_channels=channel // reduction, out_channels=channel, kernel_size=1, stride=1, padding=0) self.hardsigmoid = Hardsigmoid(slope=0.2, offset=0.5) def forward(self, x): identity = x x = self.avg_pool(x) x = self.conv1(x) x = self.relu(x) x = self.conv2(x) x = self.hardsigmoid(x) return paddle.multiply(x=identity, y=x) @manager.BACKBONES.add_component def MobileNetV3_small_x0_35(**kwargs): model = MobileNetV3( config=NET_CONFIG["small"], scale=0.35, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], out_index=OUT_INDEX["small"], **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_small_x0_5(**kwargs): model = MobileNetV3( config=NET_CONFIG["small"], scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], out_index=OUT_INDEX["small"], **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_small_x0_75(**kwargs): model = MobileNetV3( config=NET_CONFIG["small"], scale=0.75, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], out_index=OUT_INDEX["small"], **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_small_x1_0(**kwargs): model = MobileNetV3( config=NET_CONFIG["small"], scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], out_index=OUT_INDEX["small"], **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_small_x1_25(**kwargs): model = MobileNetV3( config=NET_CONFIG["small"], scale=1.25, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], out_index=OUT_INDEX["small"], **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_large_x0_35(**kwargs): model = MobileNetV3( config=NET_CONFIG["large"], scale=0.35, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], out_index=OUT_INDEX["large"], **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_large_x0_5(**kwargs): model = MobileNetV3( config=NET_CONFIG["large"], scale=0.5, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], out_index=OUT_INDEX["large"], **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_large_x0_75(**kwargs): model = MobileNetV3( config=NET_CONFIG["large"], scale=0.75, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], out_index=OUT_INDEX["large"], **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_large_x1_0(**kwargs): model = MobileNetV3( config=NET_CONFIG["large"], scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], out_index=OUT_INDEX["large"], **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_large_x1_25(**kwargs): model = MobileNetV3( config=NET_CONFIG["large"], scale=1.25, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], out_index=OUT_INDEX["large"], **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_large_x1_0_os8(**kwargs): model = MobileNetV3( config=NET_CONFIG["large_os8"], scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_large"], out_index=OUT_INDEX["large"], **kwargs) return model @manager.BACKBONES.add_component def MobileNetV3_small_x1_0_os8(**kwargs): model = MobileNetV3( config=NET_CONFIG["small_os8"], scale=1.0, stages_pattern=MODEL_STAGES_PATTERN["MobileNetV3_small"], out_index=OUT_INDEX["small"], **kwargs) return model