File size: 3,477 Bytes
ec7bf1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import cv2
import cv2 as cv
import numpy as np
from yunet import YuNet


# Valid combinations of backends and targets
backend_target_pairs = [
    [cv.dnn.DNN_BACKEND_OPENCV, cv.dnn.DNN_TARGET_CPU],
    [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA],
    [cv.dnn.DNN_BACKEND_CUDA, cv.dnn.DNN_TARGET_CUDA_FP16],
    [cv.dnn.DNN_BACKEND_TIMVX, cv.dnn.DNN_TARGET_NPU],
    [cv.dnn.DNN_BACKEND_CANN, cv.dnn.DNN_TARGET_NPU],
]


class ImageResizer:
    def __init__(
        self,
        modelPath,
        input_size=(320, 320),
        conf_threshold=0.6,
        nms_threshold=0.3,
        top_k=5000,
        backend_id=0,
        target_id=0,
    ):
        self.model = YuNet(
            modelPath=modelPath,
            inputSize=input_size,
            confThreshold=conf_threshold,
            nmsThreshold=nms_threshold,
            topK=top_k,
            backendId=backend_id,
            targetId=target_id,
        )

    def detect(self, image, num_faces=None):
        # If input is an image
        if image is not None:
            h, w, _ = image.shape

            # Inference
            self.model.setInputSize([w, h])
            results = self.model.infer(image)

            faces = results[:num_faces] if num_faces else results

            bboxs = []

            for face in faces:
                bbox = face[0:4].astype(np.int32)  # x,y,w,h
                x, y, w, h = bbox
                # draw
                cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), 2)
                bboxs.append(bbox)

            return image, bboxs

    def resize(self, image, target_size=512, above_head_ratio=0.5):
        height, width, _c = image.shape
        ar = width / height
        # downscale the image
        if not target_size:
            target_size = 512
        if ar > 1:
            # Landscape
            new_height = target_size
            new_width = int(target_size * ar)
        elif ar < 1:
            # Portrait
            new_width = target_size
            new_height = int(target_size / ar)
        else:
            # Square
            new_width = target_size
            new_height = target_size

        resized = cv2.resize(
            image, (new_width, new_height), interpolation=cv2.INTER_AREA
        )

        # Perform object detection on the resized image
        dt_image, bboxes = self.detect(resized.copy())

        # crop around face
        if len(bboxes) >= 1:
            x, y, w, h = bboxes[0]
        else:
            x, y, w, h = 0, 0, target_size, target_size
        # 20% of image height
        above_head_max = int(target_size * above_head_ratio)
        x_center = int((x + (x + w)) / 2)
        y_center = int((y + (y + h)) / 2)
        # Calculate cropping box
        top = int(max(0, y_center - above_head_max))
        bottom = int(min(top + target_size, resized.shape[0]))

        left = int(max(0, x_center - target_size // 2))
        right = int(min(x_center + target_size // 2, resized.shape[1]))

        # adjust width if necessory
        _w = right - left
        if _w != target_size:
            dx = (
                target_size - _w
            )  # difference between the target size and the current width
            nl = max(0, left - dx)
            dr = dx - nl  # remaining adjustment needed for the right coordinate
            left = nl
            right += dr

        cropped_image = resized[top:bottom, left:right]
        return dt_image, cropped_image