|
import numpy as np |
|
from PIL import Image |
|
|
|
|
|
def nms(boxes, overlap_threshold=0.5, mode="union"): |
|
"""Non-maximum suppression. |
|
|
|
Arguments: |
|
boxes: a float numpy array of shape [n, 5], |
|
where each row is (xmin, ymin, xmax, ymax, score). |
|
overlap_threshold: a float number. |
|
mode: 'union' or 'min'. |
|
|
|
Returns: |
|
list with indices of the selected boxes |
|
""" |
|
|
|
|
|
if len(boxes) == 0: |
|
return [] |
|
|
|
|
|
pick = [] |
|
|
|
|
|
x1, y1, x2, y2, score = [boxes[:, i] for i in range(5)] |
|
|
|
area = (x2 - x1 + 1.0) * (y2 - y1 + 1.0) |
|
ids = np.argsort(score) |
|
|
|
while len(ids) > 0: |
|
|
|
last = len(ids) - 1 |
|
i = ids[last] |
|
pick.append(i) |
|
|
|
|
|
|
|
|
|
|
|
|
|
ix1 = np.maximum(x1[i], x1[ids[:last]]) |
|
iy1 = np.maximum(y1[i], y1[ids[:last]]) |
|
|
|
|
|
ix2 = np.minimum(x2[i], x2[ids[:last]]) |
|
iy2 = np.minimum(y2[i], y2[ids[:last]]) |
|
|
|
|
|
w = np.maximum(0.0, ix2 - ix1 + 1.0) |
|
h = np.maximum(0.0, iy2 - iy1 + 1.0) |
|
|
|
|
|
inter = w * h |
|
if mode == "min": |
|
overlap = inter / np.minimum(area[i], area[ids[:last]]) |
|
elif mode == "union": |
|
|
|
overlap = inter / (area[i] + area[ids[:last]] - inter) |
|
|
|
|
|
ids = np.delete( |
|
ids, np.concatenate([[last], np.where(overlap > overlap_threshold)[0]]) |
|
) |
|
|
|
return pick |
|
|
|
|
|
def convert_to_square(bboxes): |
|
"""Convert bounding boxes to a square form. |
|
|
|
Arguments: |
|
bboxes: a float numpy array of shape [n, 5]. |
|
|
|
Returns: |
|
a float numpy array of shape [n, 5], |
|
squared bounding boxes. |
|
""" |
|
|
|
square_bboxes = np.zeros_like(bboxes) |
|
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] |
|
h = y2 - y1 + 1.0 |
|
w = x2 - x1 + 1.0 |
|
max_side = np.maximum(h, w) |
|
square_bboxes[:, 0] = x1 + w * 0.5 - max_side * 0.5 |
|
square_bboxes[:, 1] = y1 + h * 0.5 - max_side * 0.5 |
|
square_bboxes[:, 2] = square_bboxes[:, 0] + max_side - 1.0 |
|
square_bboxes[:, 3] = square_bboxes[:, 1] + max_side - 1.0 |
|
return square_bboxes |
|
|
|
|
|
def calibrate_box(bboxes, offsets): |
|
"""Transform bounding boxes to be more like true bounding boxes. |
|
'offsets' is one of the outputs of the nets. |
|
|
|
Arguments: |
|
bboxes: a float numpy array of shape [n, 5]. |
|
offsets: a float numpy array of shape [n, 4]. |
|
|
|
Returns: |
|
a float numpy array of shape [n, 5]. |
|
""" |
|
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] |
|
w = x2 - x1 + 1.0 |
|
h = y2 - y1 + 1.0 |
|
w = np.expand_dims(w, 1) |
|
h = np.expand_dims(h, 1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
translation = np.hstack([w, h, w, h]) * offsets |
|
bboxes[:, 0:4] = bboxes[:, 0:4] + translation |
|
return bboxes |
|
|
|
|
|
def get_image_boxes(bounding_boxes, img, size=24): |
|
"""Cut out boxes from the image. |
|
|
|
Arguments: |
|
bounding_boxes: a float numpy array of shape [n, 5]. |
|
img: an instance of PIL.Image. |
|
size: an integer, size of cutouts. |
|
|
|
Returns: |
|
a float numpy array of shape [n, 3, size, size]. |
|
""" |
|
|
|
num_boxes = len(bounding_boxes) |
|
width, height = img.size |
|
|
|
[dy, edy, dx, edx, y, ey, x, ex, w, h] = correct_bboxes( |
|
bounding_boxes, width, height |
|
) |
|
img_boxes = np.zeros((num_boxes, 3, size, size), "float32") |
|
|
|
for i in range(num_boxes): |
|
img_box = np.zeros((h[i], w[i], 3), "uint8") |
|
|
|
img_array = np.asarray(img, "uint8") |
|
img_box[dy[i] : (edy[i] + 1), dx[i] : (edx[i] + 1), :] = img_array[ |
|
y[i] : (ey[i] + 1), x[i] : (ex[i] + 1), : |
|
] |
|
|
|
|
|
img_box = Image.fromarray(img_box) |
|
img_box = img_box.resize((size, size), Image.BILINEAR) |
|
img_box = np.asarray(img_box, "float32") |
|
|
|
img_boxes[i, :, :, :] = _preprocess(img_box) |
|
|
|
return img_boxes |
|
|
|
|
|
def correct_bboxes(bboxes, width, height): |
|
"""Crop boxes that are too big and get coordinates |
|
with respect to cutouts. |
|
|
|
Arguments: |
|
bboxes: a float numpy array of shape [n, 5], |
|
where each row is (xmin, ymin, xmax, ymax, score). |
|
width: a float number. |
|
height: a float number. |
|
|
|
Returns: |
|
dy, dx, edy, edx: a int numpy arrays of shape [n], |
|
coordinates of the boxes with respect to the cutouts. |
|
y, x, ey, ex: a int numpy arrays of shape [n], |
|
corrected ymin, xmin, ymax, xmax. |
|
h, w: a int numpy arrays of shape [n], |
|
just heights and widths of boxes. |
|
|
|
in the following order: |
|
[dy, edy, dx, edx, y, ey, x, ex, w, h]. |
|
""" |
|
|
|
x1, y1, x2, y2 = [bboxes[:, i] for i in range(4)] |
|
w, h = x2 - x1 + 1.0, y2 - y1 + 1.0 |
|
num_boxes = bboxes.shape[0] |
|
|
|
|
|
|
|
x, y, ex, ey = x1, y1, x2, y2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
dx, dy = np.zeros((num_boxes,)), np.zeros((num_boxes,)) |
|
edx, edy = w.copy() - 1.0, h.copy() - 1.0 |
|
|
|
|
|
ind = np.where(ex > width - 1.0)[0] |
|
edx[ind] = w[ind] + width - 2.0 - ex[ind] |
|
ex[ind] = width - 1.0 |
|
|
|
|
|
ind = np.where(ey > height - 1.0)[0] |
|
edy[ind] = h[ind] + height - 2.0 - ey[ind] |
|
ey[ind] = height - 1.0 |
|
|
|
|
|
ind = np.where(x < 0.0)[0] |
|
dx[ind] = 0.0 - x[ind] |
|
x[ind] = 0.0 |
|
|
|
|
|
ind = np.where(y < 0.0)[0] |
|
dy[ind] = 0.0 - y[ind] |
|
y[ind] = 0.0 |
|
|
|
return_list = [dy, edy, dx, edx, y, ey, x, ex, w, h] |
|
return_list = [i.astype("int32") for i in return_list] |
|
|
|
return return_list |
|
|
|
|
|
def _preprocess(img): |
|
"""Preprocessing step before feeding the network. |
|
|
|
Arguments: |
|
img: a float numpy array of shape [h, w, c]. |
|
|
|
Returns: |
|
a float numpy array of shape [1, c, h, w]. |
|
""" |
|
img = img.transpose((2, 0, 1)) |
|
img = np.expand_dims(img, 0) |
|
img = (img - 127.5) * 0.0078125 |
|
return img |
|
|