Spaces:
Configuration error
Configuration error
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import paddle | |
import paddle.nn as nn | |
import paddle.nn.functional as F | |
from paddleseg.cvlibs import manager | |
from paddleseg.models import layers | |
from paddleseg.utils import utils | |
__all__ = [ | |
"ResNet18_vd", "ResNet34_vd", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd" | |
] | |
class ConvBNLayer(nn.Layer): | |
def __init__(self, | |
in_channels, | |
out_channels, | |
kernel_size, | |
stride=1, | |
dilation=1, | |
groups=1, | |
is_vd_mode=False, | |
act=None, | |
data_format='NCHW'): | |
super(ConvBNLayer, self).__init__() | |
if dilation != 1 and kernel_size != 3: | |
raise RuntimeError("When the dilation isn't 1," \ | |
"the kernel_size should be 3.") | |
self.is_vd_mode = is_vd_mode | |
self._pool2d_avg = nn.AvgPool2D( | |
kernel_size=2, | |
stride=2, | |
padding=0, | |
ceil_mode=True, | |
data_format=data_format) | |
self._conv = nn.Conv2D( | |
in_channels=in_channels, | |
out_channels=out_channels, | |
kernel_size=kernel_size, | |
stride=stride, | |
padding=(kernel_size - 1) // 2 \ | |
if dilation == 1 else dilation, | |
dilation=dilation, | |
groups=groups, | |
bias_attr=False, | |
data_format=data_format) | |
self._batch_norm = layers.SyncBatchNorm( | |
out_channels, data_format=data_format) | |
self._act_op = layers.Activation(act=act) | |
def forward(self, inputs): | |
if self.is_vd_mode: | |
inputs = self._pool2d_avg(inputs) | |
y = self._conv(inputs) | |
y = self._batch_norm(y) | |
y = self._act_op(y) | |
return y | |
class BottleneckBlock(nn.Layer): | |
def __init__(self, | |
in_channels, | |
out_channels, | |
stride, | |
shortcut=True, | |
if_first=False, | |
dilation=1, | |
data_format='NCHW'): | |
super(BottleneckBlock, self).__init__() | |
self.data_format = data_format | |
self.conv0 = ConvBNLayer( | |
in_channels=in_channels, | |
out_channels=out_channels, | |
kernel_size=1, | |
act='relu', | |
data_format=data_format) | |
self.dilation = dilation | |
self.conv1 = ConvBNLayer( | |
in_channels=out_channels, | |
out_channels=out_channels, | |
kernel_size=3, | |
stride=stride, | |
act='relu', | |
dilation=dilation, | |
data_format=data_format) | |
self.conv2 = ConvBNLayer( | |
in_channels=out_channels, | |
out_channels=out_channels * 4, | |
kernel_size=1, | |
act=None, | |
data_format=data_format) | |
if not shortcut: | |
self.short = ConvBNLayer( | |
in_channels=in_channels, | |
out_channels=out_channels * 4, | |
kernel_size=1, | |
stride=1, | |
is_vd_mode=False if if_first or stride == 1 else True, | |
data_format=data_format) | |
self.shortcut = shortcut | |
# NOTE: Use the wrap layer for quantization training | |
self.add = layers.Add() | |
self.relu = layers.Activation(act="relu") | |
def forward(self, inputs): | |
y = self.conv0(inputs) | |
conv1 = self.conv1(y) | |
conv2 = self.conv2(conv1) | |
if self.shortcut: | |
short = inputs | |
else: | |
short = self.short(inputs) | |
y = self.add(short, conv2) | |
y = self.relu(y) | |
return y | |
class BasicBlock(nn.Layer): | |
def __init__(self, | |
in_channels, | |
out_channels, | |
stride, | |
dilation=1, | |
shortcut=True, | |
if_first=False, | |
data_format='NCHW'): | |
super(BasicBlock, self).__init__() | |
self.conv0 = ConvBNLayer( | |
in_channels=in_channels, | |
out_channels=out_channels, | |
kernel_size=3, | |
stride=stride, | |
dilation=dilation, | |
act='relu', | |
data_format=data_format) | |
self.conv1 = ConvBNLayer( | |
in_channels=out_channels, | |
out_channels=out_channels, | |
kernel_size=3, | |
dilation=dilation, | |
act=None, | |
data_format=data_format) | |
if not shortcut: | |
self.short = ConvBNLayer( | |
in_channels=in_channels, | |
out_channels=out_channels, | |
kernel_size=1, | |
stride=1, | |
is_vd_mode=False if if_first or stride == 1 else True, | |
data_format=data_format) | |
self.shortcut = shortcut | |
self.dilation = dilation | |
self.data_format = data_format | |
self.add = layers.Add() | |
self.relu = layers.Activation(act="relu") | |
def forward(self, inputs): | |
y = self.conv0(inputs) | |
conv1 = self.conv1(y) | |
if self.shortcut: | |
short = inputs | |
else: | |
short = self.short(inputs) | |
y = self.add(short, conv1) | |
y = self.relu(y) | |
return y | |
class ResNet_vd(nn.Layer): | |
""" | |
The ResNet_vd implementation based on PaddlePaddle. | |
The original article refers to Jingdong | |
Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks" | |
(https://arxiv.org/pdf/1812.01187.pdf). | |
Args: | |
layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50. | |
output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8. | |
multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1). | |
pretrained (str, optional): The path of pretrained model. | |
""" | |
def __init__(self, | |
layers=50, | |
output_stride=8, | |
multi_grid=(1, 1, 1), | |
pretrained=None, | |
data_format='NCHW'): | |
super(ResNet_vd, self).__init__() | |
self.data_format = data_format | |
self.conv1_logit = None # for gscnn shape stream | |
self.layers = layers | |
supported_layers = [18, 34, 50, 101, 152, 200] | |
assert layers in supported_layers, \ | |
"supported layers are {} but input layer is {}".format( | |
supported_layers, layers) | |
if layers == 18: | |
depth = [2, 2, 2, 2] | |
elif layers == 34 or layers == 50: | |
depth = [3, 4, 6, 3] | |
elif layers == 101: | |
depth = [3, 4, 23, 3] | |
elif layers == 152: | |
depth = [3, 8, 36, 3] | |
elif layers == 200: | |
depth = [3, 12, 48, 3] | |
num_channels = [64, 256, 512, | |
1024] if layers >= 50 else [64, 64, 128, 256] | |
num_filters = [64, 128, 256, 512] | |
# for channels of four returned stages | |
self.feat_channels = [c * 4 for c in num_filters | |
] if layers >= 50 else num_filters | |
dilation_dict = None | |
if output_stride == 8: | |
dilation_dict = {2: 2, 3: 4} | |
elif output_stride == 16: | |
dilation_dict = {3: 2} | |
self.conv1_1 = ConvBNLayer( | |
in_channels=3, | |
out_channels=32, | |
kernel_size=3, | |
stride=2, | |
act='relu', | |
data_format=data_format) | |
self.conv1_2 = ConvBNLayer( | |
in_channels=32, | |
out_channels=32, | |
kernel_size=3, | |
stride=1, | |
act='relu', | |
data_format=data_format) | |
self.conv1_3 = ConvBNLayer( | |
in_channels=32, | |
out_channels=64, | |
kernel_size=3, | |
stride=1, | |
act='relu', | |
data_format=data_format) | |
self.pool2d_max = nn.MaxPool2D( | |
kernel_size=3, stride=2, padding=1, data_format=data_format) | |
# self.block_list = [] | |
self.stage_list = [] | |
if layers >= 50: | |
for block in range(len(depth)): | |
shortcut = False | |
block_list = [] | |
for i in range(depth[block]): | |
if layers in [101, 152] and block == 2: | |
if i == 0: | |
conv_name = "res" + str(block + 2) + "a" | |
else: | |
conv_name = "res" + str(block + 2) + "b" + str(i) | |
else: | |
conv_name = "res" + str(block + 2) + chr(97 + i) | |
############################################################################### | |
# Add dilation rate for some segmentation tasks, if dilation_dict is not None. | |
dilation_rate = dilation_dict[ | |
block] if dilation_dict and block in dilation_dict else 1 | |
# Actually block here is 'stage', and i is 'block' in 'stage' | |
# At the stage 4, expand the the dilation_rate if given multi_grid | |
if block == 3: | |
dilation_rate = dilation_rate * multi_grid[i] | |
############################################################################### | |
bottleneck_block = self.add_sublayer( | |
'bb_%d_%d' % (block, i), | |
BottleneckBlock( | |
in_channels=num_channels[block] | |
if i == 0 else num_filters[block] * 4, | |
out_channels=num_filters[block], | |
stride=2 if i == 0 and block != 0 and | |
dilation_rate == 1 else 1, | |
shortcut=shortcut, | |
if_first=block == i == 0, | |
dilation=dilation_rate, | |
data_format=data_format)) | |
block_list.append(bottleneck_block) | |
shortcut = True | |
self.stage_list.append(block_list) | |
else: | |
for block in range(len(depth)): | |
shortcut = False | |
block_list = [] | |
for i in range(depth[block]): | |
dilation_rate = dilation_dict[block] \ | |
if dilation_dict and block in dilation_dict else 1 | |
if block == 3: | |
dilation_rate = dilation_rate * multi_grid[i] | |
basic_block = self.add_sublayer( | |
'bb_%d_%d' % (block, i), | |
BasicBlock( | |
in_channels=num_channels[block] | |
if i == 0 else num_filters[block], | |
out_channels=num_filters[block], | |
stride=2 if i == 0 and block != 0 \ | |
and dilation_rate == 1 else 1, | |
dilation=dilation_rate, | |
shortcut=shortcut, | |
if_first=block == i == 0, | |
data_format=data_format)) | |
block_list.append(basic_block) | |
shortcut = True | |
self.stage_list.append(block_list) | |
self.pretrained = pretrained | |
self.init_weight() | |
def forward(self, inputs): | |
y = self.conv1_1(inputs) | |
y = self.conv1_2(y) | |
y = self.conv1_3(y) | |
self.conv1_logit = y.clone() | |
y = self.pool2d_max(y) | |
# A feature list saves the output feature map of each stage. | |
feat_list = [] | |
for stage in self.stage_list: | |
for block in stage: | |
y = block(y) | |
feat_list.append(y) | |
return feat_list | |
def init_weight(self): | |
utils.load_pretrained_model(self, self.pretrained) | |
def ResNet18_vd(**args): | |
model = ResNet_vd(layers=18, **args) | |
return model | |
def ResNet34_vd(**args): | |
model = ResNet_vd(layers=34, **args) | |
return model | |
def ResNet50_vd(**args): | |
model = ResNet_vd(layers=50, **args) | |
return model | |
def ResNet101_vd(**args): | |
model = ResNet_vd(layers=101, **args) | |
return model | |
def ResNet152_vd(**args): | |
model = ResNet_vd(layers=152, **args) | |
return model | |
def ResNet200_vd(**args): | |
model = ResNet_vd(layers=200, **args) | |
return model | |