Spaces:

sidharthism
/

pipeline_paddle

Configuration error

App Files Files Community

pipeline_paddle / paddleseg /models /encnet.py

sidharthism

Added model *.pdparams

1ab1a09 over 2 years ago

raw

history blame contribute delete

7.95 kB

	# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import paddle
	import paddle.nn as nn
	import paddle.nn.functional as F

	from paddleseg.cvlibs import manager
	from paddleseg.models import layers
	from paddleseg.utils import utils


	@manager.MODELS.add_component
	class ENCNet(nn.Layer):
	"""
	The ENCNet implementation based on PaddlePaddle.

	The original article refers to
	Hang Zhang, Kristin Dana, et, al. "Context Encoding for Semantic Segmentation".

	Args:
	num_classes (int): The unique number of target classes.
	backbone (Paddle.nn.Layer): A backbone network.
	backbone_indices (tuple): The values in the tuple indicate the indices of
	output of backbone.
	num_codes (int): The number of encoded words. Default: 32.
	mid_channels (int): The channels of middle layers. Default: 512.
	use_se_loss (int): Whether use semantic encoding loss. Default: True.
	add_lateral (int): Whether use lateral convolution layers. Default: False.
	pretrained (str, optional): The path or url of pretrained model. Default: None.
	"""

	def __init__(self,
	num_classes,
	backbone,
	backbone_indices=[1, 2, 3],
	num_codes=32,
	mid_channels=512,
	use_se_loss=True,
	add_lateral=False,
	pretrained=None):
	super().__init__()
	self.add_lateral = add_lateral
	self.num_codes = num_codes
	self.backbone = backbone
	self.backbone_indices = backbone_indices
	in_channels = [
	self.backbone.feat_channels[index] for index in backbone_indices
	]

	self.bottleneck = layers.ConvBNReLU(
	in_channels[-1],
	mid_channels,
	3,
	padding=1, )
	if self.add_lateral:
	self.lateral_convs = nn.LayerList()
	for in_ch in in_channels[:-1]:
	self.lateral_convs.append(
	layers.ConvBNReLU(
	in_ch,
	mid_channels,
	1, ))
	self.fusion = layers.ConvBNReLU(
	len(in_channels) * mid_channels,
	mid_channels,
	3,
	padding=1, )

	self.enc_module = EncModule(mid_channels, num_codes)
	self.head = nn.Conv2D(mid_channels, num_classes, 1)

	self.fcn_head = layers.AuxLayer(self.backbone.feat_channels[2],
	mid_channels, num_classes)

	self.use_se_loss = use_se_loss
	if use_se_loss:
	self.se_layer = nn.Linear(mid_channels, num_classes)

	self.pretrained = pretrained
	self.init_weight()

	def init_weight(self):
	if self.pretrained is not None:
	utils.load_entire_model(self, self.pretrained)

	def forward(self, inputs):
	N, C, H, W = paddle.shape(inputs)
	feats = self.backbone(inputs)
	fcn_feat = feats[2]

	feats = [feats[i] for i in self.backbone_indices]
	feat = self.bottleneck(feats[-1])

	if self.add_lateral:
	laterals = []
	for j, lateral_conv in enumerate(self.lateral_convs):
	laterals.append(
	F.interpolate(
	lateral_conv(feats[j]),
	size=paddle.shape(feat)[2:],
	mode='bilinear',
	align_corners=False))
	feat = self.fusion(paddle.concat([feat, *laterals], 1))
	encode_feat, feat = self.enc_module(feat)
	out = self.head(feat)
	out = F.interpolate(
	out, size=[H, W], mode='bilinear', align_corners=False)
	output = [out]
	if self.training:
	fcn_out = self.fcn_head(fcn_feat)
	fcn_out = F.interpolate(
	fcn_out, size=[H, W], mode='bilinear', align_corners=False)
	output.append(fcn_out)
	if self.use_se_loss:
	se_out = self.se_layer(encode_feat)
	output.append(se_out)
	return output
	return output


	class Encoding(nn.Layer):
	def __init__(self, channels, num_codes):
	super().__init__()
	self.channels, self.num_codes = channels, num_codes

	std = 1 / ((channels * num_codes)**0.5)
	self.codewords = self.create_parameter(
	shape=(num_codes, channels),
	default_initializer=nn.initializer.Uniform(-std, std), )
	self.scale = self.create_parameter(
	shape=(num_codes, ),
	default_initializer=nn.initializer.Uniform(-1, 0), )
	self.channels = channels

	def scaled_l2(self, x, codewords, scale):
	num_codes, channels = paddle.shape(codewords)
	reshaped_scale = scale.reshape([1, 1, num_codes])
	expanded_x = paddle.tile(x.unsqueeze(2), [1, 1, num_codes, 1])
	reshaped_codewords = codewords.reshape([1, 1, num_codes, channels])

	scaled_l2_norm = paddle.multiply(
	reshaped_scale,
	(expanded_x - reshaped_codewords).pow(2).sum(axis=3))
	return scaled_l2_norm

	def aggregate(self, assignment_weights, x, codewords):
	num_codes, channels = paddle.shape(codewords)
	reshaped_codewords = codewords.reshape([1, 1, num_codes, channels])
	expanded_x = paddle.tile(x.unsqueeze(2), [1, 1, num_codes, 1])

	encoded_feat = paddle.multiply(
	assignment_weights.unsqueeze(3),
	(expanded_x - reshaped_codewords)).sum(axis=1)
	encoded_feat = paddle.reshape(encoded_feat,
	[-1, self.num_codes, self.channels])
	return encoded_feat

	def forward(self, x):
	x_dims = x.ndim
	assert x_dims == 4, "The dimension of input tensor must equal 4, but got {}.".format(
	x_dims)
	assert paddle.shape(
	x
	)[1] == self.channels, "Encoding channels error, excepted {} but got {}.".format(
	self.channels, paddle.shape(x)[1])
	batch_size = paddle.shape(x)[0]
	x = x.reshape([batch_size, self.channels, -1]).transpose([0, 2, 1])
	assignment_weights = F.softmax(
	self.scaled_l2(x, self.codewords, self.scale), axis=2)
	encoded_feat = self.aggregate(assignment_weights, x, self.codewords)
	return encoded_feat


	class EncModule(nn.Layer):
	def __init__(self, in_channels, num_codes):
	super().__init__()
	self.encoding_project = layers.ConvBNReLU(
	in_channels,
	in_channels,
	1, )
	self.encoding = nn.Sequential(
	Encoding(
	channels=in_channels, num_codes=num_codes),
	nn.BatchNorm1D(num_codes),
	nn.ReLU(), )
	self.fc = nn.Sequential(
	nn.Linear(in_channels, in_channels),
	nn.Sigmoid(), )
	self.in_channels = in_channels

	def forward(self, x):
	encoding_projection = self.encoding_project(x)
	encoding_feat = self.encoding(encoding_projection)

	encoding_feat = encoding_feat.mean(axis=1)
	batch_size, _, _, _ = paddle.shape(x)

	gamma = self.fc(encoding_feat)
	y = gamma.reshape([batch_size, self.in_channels, 1, 1])
	output = F.relu(x + x * y)
	return encoding_feat, output