Spaces:
Configuration error
Configuration error
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import collections.abc | |
from itertools import combinations | |
import numpy as np | |
import cv2 | |
import paddle | |
import paddle.nn.functional as F | |
def reverse_transform(pred, trans_info, mode='nearest'): | |
"""recover pred to origin shape""" | |
intTypeList = [paddle.int8, paddle.int16, paddle.int32, paddle.int64] | |
dtype = pred.dtype | |
for item in trans_info[::-1]: | |
if isinstance(item[0], list): | |
trans_mode = item[0][0] | |
else: | |
trans_mode = item[0] | |
if trans_mode == 'resize': | |
h, w = item[1][0], item[1][1] | |
if paddle.get_device() == 'cpu' and dtype in intTypeList: | |
pred = paddle.cast(pred, 'float32') | |
pred = F.interpolate(pred, (h, w), mode=mode) | |
pred = paddle.cast(pred, dtype) | |
else: | |
pred = F.interpolate(pred, (h, w), mode=mode) | |
elif trans_mode == 'padding': | |
h, w = item[1][0], item[1][1] | |
pred = pred[:, :, 0:h, 0:w] | |
else: | |
raise Exception("Unexpected info '{}' in im_info".format(item[0])) | |
return pred | |
def flip_combination(flip_horizontal=False, flip_vertical=False): | |
""" | |
Get flip combination. | |
Args: | |
flip_horizontal (bool): Whether to flip horizontally. Default: False. | |
flip_vertical (bool): Whether to flip vertically. Default: False. | |
Returns: | |
list: List of tuple. The first element of tuple is whether to flip horizontally, | |
and the second is whether to flip vertically. | |
""" | |
flip_comb = [(False, False)] | |
if flip_horizontal: | |
flip_comb.append((True, False)) | |
if flip_vertical: | |
flip_comb.append((False, True)) | |
if flip_horizontal: | |
flip_comb.append((True, True)) | |
return flip_comb | |
def tensor_flip(x, flip): | |
"""Flip tensor according directions""" | |
if flip[0]: | |
x = x[:, :, :, ::-1] | |
if flip[1]: | |
x = x[:, :, ::-1, :] | |
return x | |
def slide_inference(model, im, crop_size, stride): | |
""" | |
Infer by sliding window. | |
Args: | |
model (paddle.nn.Layer): model to get logits of image. | |
im (Tensor): the input image. | |
crop_size (tuple|list). The size of sliding window, (w, h). | |
stride (tuple|list). The size of stride, (w, h). | |
Return: | |
Tensor: The logit of input image. | |
""" | |
h_im, w_im = im.shape[-2:] | |
w_crop, h_crop = crop_size | |
w_stride, h_stride = stride | |
# calculate the crop nums | |
rows = np.int(np.ceil(1.0 * (h_im - h_crop) / h_stride)) + 1 | |
cols = np.int(np.ceil(1.0 * (w_im - w_crop) / w_stride)) + 1 | |
# prevent negative sliding rounds when imgs after scaling << crop_size | |
rows = 1 if h_im <= h_crop else rows | |
cols = 1 if w_im <= w_crop else cols | |
# TODO 'Tensor' object does not support item assignment. If support, use tensor to calculation. | |
final_logit = None | |
count = np.zeros([1, 1, h_im, w_im]) | |
for r in range(rows): | |
for c in range(cols): | |
h1 = r * h_stride | |
w1 = c * w_stride | |
h2 = min(h1 + h_crop, h_im) | |
w2 = min(w1 + w_crop, w_im) | |
h1 = max(h2 - h_crop, 0) | |
w1 = max(w2 - w_crop, 0) | |
im_crop = im[:, :, h1:h2, w1:w2] | |
logits = model(im_crop) | |
if not isinstance(logits, collections.abc.Sequence): | |
raise TypeError( | |
"The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}" | |
.format(type(logits))) | |
logit = logits[0].numpy() | |
if final_logit is None: | |
final_logit = np.zeros([1, logit.shape[1], h_im, w_im]) | |
final_logit[:, :, h1:h2, w1:w2] += logit[:, :, :h2 - h1, :w2 - w1] | |
count[:, :, h1:h2, w1:w2] += 1 | |
if np.sum(count == 0) != 0: | |
raise RuntimeError( | |
'There are pixel not predicted. It is possible that stride is greater than crop_size' | |
) | |
final_logit = final_logit / count | |
final_logit = paddle.to_tensor(final_logit) | |
return final_logit | |
def inference(model, | |
im, | |
trans_info=None, | |
is_slide=False, | |
stride=None, | |
crop_size=None): | |
""" | |
Inference for image. | |
Args: | |
model (paddle.nn.Layer): model to get logits of image. | |
im (Tensor): the input image. | |
trans_info (list): Image shape informating changed process. Default: None. | |
is_slide (bool): Whether to infer by sliding window. Default: False. | |
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. | |
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. | |
Returns: | |
Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned. | |
If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned. | |
""" | |
if hasattr(model, 'data_format') and model.data_format == 'NHWC': | |
im = im.transpose((0, 2, 3, 1)) | |
if not is_slide: | |
logits = model(im) | |
if not isinstance(logits, collections.abc.Sequence): | |
raise TypeError( | |
"The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}" | |
.format(type(logits))) | |
logit = logits[0] | |
else: | |
logit = slide_inference(model, im, crop_size=crop_size, stride=stride) | |
if hasattr(model, 'data_format') and model.data_format == 'NHWC': | |
logit = logit.transpose((0, 3, 1, 2)) | |
if trans_info is not None: | |
logit = reverse_transform(logit, trans_info, mode='bilinear') | |
pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32') | |
return pred, logit | |
else: | |
return logit | |
def aug_inference(model, | |
im, | |
trans_info, | |
scales=1.0, | |
flip_horizontal=False, | |
flip_vertical=False, | |
is_slide=False, | |
stride=None, | |
crop_size=None): | |
""" | |
Infer with augmentation. | |
Args: | |
model (paddle.nn.Layer): model to get logits of image. | |
im (Tensor): the input image. | |
trans_info (list): Transforms for image. | |
scales (float|tuple|list): Scales for resize. Default: 1. | |
flip_horizontal (bool): Whether to flip horizontally. Default: False. | |
flip_vertical (bool): Whether to flip vertically. Default: False. | |
is_slide (bool): Whether to infer by sliding wimdow. Default: False. | |
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True. | |
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True. | |
Returns: | |
Tensor: Prediction of image with shape (1, 1, h, w) is returned. | |
""" | |
if isinstance(scales, float): | |
scales = [scales] | |
elif not isinstance(scales, (tuple, list)): | |
raise TypeError( | |
'`scales` expects float/tuple/list type, but received {}'.format( | |
type(scales))) | |
final_logit = 0 | |
h_input, w_input = im.shape[-2], im.shape[-1] | |
flip_comb = flip_combination(flip_horizontal, flip_vertical) | |
for scale in scales: | |
h = int(h_input * scale + 0.5) | |
w = int(w_input * scale + 0.5) | |
im = F.interpolate(im, (h, w), mode='bilinear') | |
for flip in flip_comb: | |
im_flip = tensor_flip(im, flip) | |
logit = inference( | |
model, | |
im_flip, | |
is_slide=is_slide, | |
crop_size=crop_size, | |
stride=stride) | |
logit = tensor_flip(logit, flip) | |
logit = F.interpolate(logit, (h_input, w_input), mode='bilinear') | |
logit = F.softmax(logit, axis=1) | |
final_logit = final_logit + logit | |
final_logit = reverse_transform(final_logit, trans_info, mode='bilinear') | |
pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32') | |
return pred, final_logit | |