sidharthism's picture
Added model *.pdparams
1ab1a09
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
from paddle.nn import functional as F
from paddleseg.utils import utils
from paddleseg.models import layers
from paddleseg.cvlibs import manager
@manager.MODELS.add_component
class GINet(nn.Layer):
"""
The GINet implementation based on PaddlePaddle.
The original article refers to
Wu, Tianyi, Yu Lu, Yu Zhu, Chuang Zhang, Ming Wu, Zhanyu Ma, and Guodong Guo. "GINet: Graph interaction network for scene parsing." In European Conference on Computer Vision, pp. 34-51. Springer, Cham, 2020.
(https://arxiv.org/pdf/2009.06160).
Args:
num_classes (int): The unique number of target classes.
backbone (Paddle.nn.Layer): Backbone network.
backbone_indices (tuple, optional): Values in the tuple indicate the indices of output of backbone.
enable_auxiliary_loss (bool, optional): A bool value indicates whether adding auxiliary loss.
If true, auxiliary loss will be added after LearningToDownsample module. Default: False.
align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.. Default: False.
jpu (bool, optional)): whether to use jpu unit in the base forward. Default:True.
pretrained (str, optional): The path or url of pretrained model. Default: None.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices=[0, 1, 2, 3],
enable_auxiliary_loss=True,
align_corners=True,
jpu=True,
pretrained=None):
super().__init__()
self.nclass = num_classes
self.aux = enable_auxiliary_loss
self.jpu = jpu
self.backbone = backbone
self.backbone_indices = backbone_indices
self.align_corners = align_corners
self.jpu = layers.JPU([512, 1024, 2048], width=512) if jpu else None
self.head = GIHead(in_channels=2048, nclass=num_classes)
if self.aux:
self.auxlayer = layers.AuxLayer(
1024, 1024 // 4, num_classes, bias_attr=False)
self.pretrained = pretrained
self.init_weight()
def base_forward(self, x):
feat_list = self.backbone(x)
c1, c2, c3, c4 = [feat_list[i] for i in self.backbone_indices]
if self.jpu:
return self.jpu(c1, c2, c3, c4)
else:
return c1, c2, c3, c4
def forward(self, x):
_, _, h, w = paddle.shape(x)
_, _, c3, c4 = self.base_forward(x)
logit_list = []
x, _ = self.head(c4)
logit_list.append(x)
if self.aux:
auxout = self.auxlayer(c3)
logit_list.append(auxout)
return [
F.interpolate(
logit, [h, w],
mode='bilinear',
align_corners=self.align_corners) for logit in logit_list
]
def init_weight(self):
if self.pretrained is not None:
utils.load_entire_model(self, self.pretrained)
class GIHead(nn.Layer):
"""The Graph Interaction Network head."""
def __init__(self, in_channels, nclass):
super().__init__()
self.nclass = nclass
inter_channels = in_channels // 4
self.inp = paddle.zeros(shape=(nclass, 300), dtype='float32')
self.inp = paddle.create_parameter(
shape=self.inp.shape,
dtype=str(self.inp.numpy().dtype),
default_initializer=paddle.nn.initializer.Assign(self.inp))
self.inp.stop_gradient = True
self.fc1 = nn.Sequential(
nn.Linear(300, 128), nn.BatchNorm1D(128), nn.ReLU())
self.fc2 = nn.Sequential(
nn.Linear(128, 256), nn.BatchNorm1D(256), nn.ReLU())
self.conv5 = layers.ConvBNReLU(
in_channels,
inter_channels,
3,
padding=1,
bias_attr=False,
stride=1)
self.gloru = GlobalReasonUnit(
in_channels=inter_channels,
num_state=256,
num_node=84,
nclass=nclass)
self.conv6 = nn.Sequential(
nn.Dropout(0.1), nn.Conv2D(inter_channels, nclass, 1))
def forward(self, x):
B, C, H, W = paddle.shape(x)
inp = self.inp
inp = self.fc1(inp)
inp = self.fc2(inp).unsqueeze(axis=0).transpose((0, 2, 1))\
.expand((B, 256, self.nclass))
out = self.conv5(x)
out, se_out = self.gloru(out, inp)
out = self.conv6(out)
return out, se_out
class GlobalReasonUnit(nn.Layer):
"""
The original paper refers to:
Chen, Yunpeng, et al. "Graph-Based Global Reasoning Networks" (https://arxiv.org/abs/1811.12814)
"""
def __init__(self, in_channels, num_state=256, num_node=84, nclass=59):
super().__init__()
self.num_state = num_state
self.conv_theta = nn.Conv2D(
in_channels, num_node, kernel_size=1, stride=1, padding=0)
self.conv_phi = nn.Conv2D(
in_channels, num_state, kernel_size=1, stride=1, padding=0)
self.graph = GraphLayer(num_state, num_node, nclass)
self.extend_dim = nn.Conv2D(
num_state, in_channels, kernel_size=1, bias_attr=False)
self.bn = layers.SyncBatchNorm(in_channels)
def forward(self, x, inp):
B = self.conv_theta(x)
sizeB = paddle.shape(B)
B = paddle.flatten(B, 2, 3)
sizex = paddle.shape(x)
x_reduce = self.conv_phi(x)
x_reduce = paddle.flatten(x_reduce, 2, 3).transpose((0, 2, 1))
V = paddle.bmm(B, x_reduce).transpose((0, 2, 1))
V = paddle.divide(V, (sizex[2] * sizex[3]).astype('float32'))
class_node, new_V = self.graph(inp, V)
D = B.transpose((0, 2, 1))
Y = paddle.bmm(D, new_V.transpose((0, 2, 1)))
Y = Y.transpose((0, 2, 1)).reshape((sizex[0], self.num_state, \
sizex[2], -1))
Y = self.extend_dim(Y)
Y = self.bn(Y)
out = Y + x
return out, class_node
class GraphLayer(nn.Layer):
def __init__(self, num_state, num_node, num_class):
super().__init__()
self.vis_gcn = GCN(num_state, num_node)
self.word_gcn = GCN(num_state, num_class)
self.transfer = GraphTransfer(num_state)
self.gamma_vis = paddle.zeros([num_node])
self.gamma_word = paddle.zeros([num_class])
self.gamma_vis = paddle.create_parameter(
shape=paddle.shape(self.gamma_vis),
dtype=str(self.gamma_vis.numpy().dtype),
default_initializer=paddle.nn.initializer.Assign(self.gamma_vis))
self.gamma_word = paddle.create_parameter(
shape=paddle.shape(self.gamma_word),
dtype=str(self.gamma_word.numpy().dtype),
default_initializer=paddle.nn.initializer.Assign(self.gamma_word))
def forward(self, inp, vis_node):
inp = self.word_gcn(inp)
new_V = self.vis_gcn(vis_node)
class_node, vis_node = self.transfer(inp, new_V)
class_node = self.gamma_word * inp + class_node
new_V = self.gamma_vis * vis_node + new_V
return class_node, new_V
class GCN(nn.Layer):
def __init__(self, num_state=128, num_node=64, bias=False):
super().__init__()
self.conv1 = nn.Conv1D(
num_node,
num_node,
kernel_size=1,
padding=0,
stride=1,
groups=1, )
self.relu = nn.ReLU()
self.conv2 = nn.Conv1D(
num_state,
num_state,
kernel_size=1,
padding=0,
stride=1,
groups=1,
bias_attr=bias)
def forward(self, x):
h = self.conv1(x.transpose((0, 2, 1))).transpose((0, 2, 1))
h = h + x
h = self.relu(h)
h = self.conv2(h)
return h
class GraphTransfer(nn.Layer):
"""Transfer vis graph to class node, transfer class node to vis feature"""
def __init__(self, in_dim):
super().__init__()
self.channle_in = in_dim
self.query_conv = nn.Conv1D(
in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1)
self.key_conv = nn.Conv1D(
in_channels=in_dim, out_channels=in_dim // 2, kernel_size=1)
self.value_conv_vis = nn.Conv1D(
in_channels=in_dim, out_channels=in_dim, kernel_size=1)
self.value_conv_word = nn.Conv1D(
in_channels=in_dim, out_channels=in_dim, kernel_size=1)
self.softmax_vis = nn.Softmax(axis=-1)
self.softmax_word = nn.Softmax(axis=-2)
def forward(self, word, vis_node):
m_batchsize, C, Nc = paddle.shape(word)
m_batchsize, C, Nn = paddle.shape(vis_node)
proj_query = self.query_conv(word).reshape((m_batchsize, -1, Nc))\
.transpose((0, 2, 1))
proj_key = self.key_conv(vis_node).reshape((m_batchsize, -1, Nn))
energy = paddle.bmm(proj_query, proj_key)
attention_vis = self.softmax_vis(energy).transpose((0, 2, 1))
attention_word = self.softmax_word(energy)
proj_value_vis = self.value_conv_vis(vis_node).reshape((m_batchsize, -1,
Nn))
proj_value_word = self.value_conv_word(word).reshape((m_batchsize, -1,
Nc))
class_out = paddle.bmm(proj_value_vis, attention_vis)
node_out = paddle.bmm(proj_value_word, attention_word)
return class_out, node_out