sidharthism's picture
Added model *.pdparams
1ab1a09
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleseg.cvlibs import manager
from paddleseg.models import layers
from paddleseg.utils import utils
@manager.MODELS.add_component
class ENCNet(nn.Layer):
"""
The ENCNet implementation based on PaddlePaddle.
The original article refers to
Hang Zhang, Kristin Dana, et, al. "Context Encoding for Semantic Segmentation".
Args:
num_classes (int): The unique number of target classes.
backbone (Paddle.nn.Layer): A backbone network.
backbone_indices (tuple): The values in the tuple indicate the indices of
output of backbone.
num_codes (int): The number of encoded words. Default: 32.
mid_channels (int): The channels of middle layers. Default: 512.
use_se_loss (int): Whether use semantic encoding loss. Default: True.
add_lateral (int): Whether use lateral convolution layers. Default: False.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices=[1, 2, 3],
num_codes=32,
mid_channels=512,
use_se_loss=True,
add_lateral=False,
pretrained=None):
super().__init__()
self.add_lateral = add_lateral
self.num_codes = num_codes
self.backbone = backbone
self.backbone_indices = backbone_indices
in_channels = [
self.backbone.feat_channels[index] for index in backbone_indices
]
self.bottleneck = layers.ConvBNReLU(
in_channels[-1],
mid_channels,
3,
padding=1, )
if self.add_lateral:
self.lateral_convs = nn.LayerList()
for in_ch in in_channels[:-1]:
self.lateral_convs.append(
layers.ConvBNReLU(
in_ch,
mid_channels,
1, ))
self.fusion = layers.ConvBNReLU(
len(in_channels) * mid_channels,
mid_channels,
3,
padding=1, )
self.enc_module = EncModule(mid_channels, num_codes)
self.head = nn.Conv2D(mid_channels, num_classes, 1)
self.fcn_head = layers.AuxLayer(self.backbone.feat_channels[2],
mid_channels, num_classes)
self.use_se_loss = use_se_loss
if use_se_loss:
self.se_layer = nn.Linear(mid_channels, num_classes)
self.pretrained = pretrained
self.init_weight()
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
def forward(self, inputs):
N, C, H, W = paddle.shape(inputs)
feats = self.backbone(inputs)
fcn_feat = feats[2]
feats = [feats[i] for i in self.backbone_indices]
feat = self.bottleneck(feats[-1])
if self.add_lateral:
laterals = []
for j, lateral_conv in enumerate(self.lateral_convs):
laterals.append(
F.interpolate(
lateral_conv(feats[j]),
size=paddle.shape(feat)[2:],
mode='bilinear',
align_corners=False))
feat = self.fusion(paddle.concat([feat, *laterals], 1))
encode_feat, feat = self.enc_module(feat)
out = self.head(feat)
out = F.interpolate(
out, size=[H, W], mode='bilinear', align_corners=False)
output = [out]
if self.training:
fcn_out = self.fcn_head(fcn_feat)
fcn_out = F.interpolate(
fcn_out, size=[H, W], mode='bilinear', align_corners=False)
output.append(fcn_out)
if self.use_se_loss:
se_out = self.se_layer(encode_feat)
output.append(se_out)
return output
return output
class Encoding(nn.Layer):
def __init__(self, channels, num_codes):
super().__init__()
self.channels, self.num_codes = channels, num_codes
std = 1 / ((channels * num_codes)**0.5)
self.codewords = self.create_parameter(
shape=(num_codes, channels),
default_initializer=nn.initializer.Uniform(-std, std), )
self.scale = self.create_parameter(
shape=(num_codes, ),
default_initializer=nn.initializer.Uniform(-1, 0), )
self.channels = channels
def scaled_l2(self, x, codewords, scale):
num_codes, channels = paddle.shape(codewords)
reshaped_scale = scale.reshape([1, 1, num_codes])
expanded_x = paddle.tile(x.unsqueeze(2), [1, 1, num_codes, 1])
reshaped_codewords = codewords.reshape([1, 1, num_codes, channels])
scaled_l2_norm = paddle.multiply(
reshaped_scale,
(expanded_x - reshaped_codewords).pow(2).sum(axis=3))
return scaled_l2_norm
def aggregate(self, assignment_weights, x, codewords):
num_codes, channels = paddle.shape(codewords)
reshaped_codewords = codewords.reshape([1, 1, num_codes, channels])
expanded_x = paddle.tile(x.unsqueeze(2), [1, 1, num_codes, 1])
encoded_feat = paddle.multiply(
assignment_weights.unsqueeze(3),
(expanded_x - reshaped_codewords)).sum(axis=1)
encoded_feat = paddle.reshape(encoded_feat,
[-1, self.num_codes, self.channels])
return encoded_feat
def forward(self, x):
x_dims = x.ndim
assert x_dims == 4, "The dimension of input tensor must equal 4, but got {}.".format(
x_dims)
assert paddle.shape(
x
)[1] == self.channels, "Encoding channels error, excepted {} but got {}.".format(
self.channels, paddle.shape(x)[1])
batch_size = paddle.shape(x)[0]
x = x.reshape([batch_size, self.channels, -1]).transpose([0, 2, 1])
assignment_weights = F.softmax(
self.scaled_l2(x, self.codewords, self.scale), axis=2)
encoded_feat = self.aggregate(assignment_weights, x, self.codewords)
return encoded_feat
class EncModule(nn.Layer):
def __init__(self, in_channels, num_codes):
super().__init__()
self.encoding_project = layers.ConvBNReLU(
in_channels,
in_channels,
1, )
self.encoding = nn.Sequential(
Encoding(
channels=in_channels, num_codes=num_codes),
nn.BatchNorm1D(num_codes),
nn.ReLU(), )
self.fc = nn.Sequential(
nn.Linear(in_channels, in_channels),
nn.Sigmoid(), )
self.in_channels = in_channels
def forward(self, x):
encoding_projection = self.encoding_project(x)
encoding_feat = self.encoding(encoding_projection)
encoding_feat = encoding_feat.mean(axis=1)
batch_size, _, _, _ = paddle.shape(x)
gamma = self.fc(encoding_feat)
y = gamma.reshape([batch_size, self.in_channels, 1, 1])
output = F.relu(x + x * y)
return encoding_feat, output