Sanket17 commited on
Commit
a3cda61
·
1 Parent(s): cf691a0

util folder created

Browse files
Files changed (1) hide show
  1. util/box_annotator.py +262 -0
util/box_annotator.py ADDED
@@ -0,0 +1,262 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional, Union, Tuple
2
+
3
+ import cv2
4
+ import numpy as np
5
+
6
+ from supervision.detection.core import Detections
7
+ from supervision.draw.color import Color, ColorPalette
8
+
9
+
10
+ class BoxAnnotator:
11
+ """
12
+ A class for drawing bounding boxes on an image using detections provided.
13
+
14
+ Attributes:
15
+ color (Union[Color, ColorPalette]): The color to draw the bounding box,
16
+ can be a single color or a color palette
17
+ thickness (int): The thickness of the bounding box lines, default is 2
18
+ text_color (Color): The color of the text on the bounding box, default is white
19
+ text_scale (float): The scale of the text on the bounding box, default is 0.5
20
+ text_thickness (int): The thickness of the text on the bounding box,
21
+ default is 1
22
+ text_padding (int): The padding around the text on the bounding box,
23
+ default is 5
24
+
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ color: Union[Color, ColorPalette] = ColorPalette.DEFAULT,
30
+ thickness: int = 3, # 1 for seeclick 2 for mind2web and 3 for demo
31
+ text_color: Color = Color.BLACK,
32
+ text_scale: float = 0.5, # 0.8 for mobile/web, 0.3 for desktop # 0.4 for mind2web
33
+ text_thickness: int = 2, #1, # 2 for demo
34
+ text_padding: int = 10,
35
+ avoid_overlap: bool = True,
36
+ ):
37
+ self.color: Union[Color, ColorPalette] = color
38
+ self.thickness: int = thickness
39
+ self.text_color: Color = text_color
40
+ self.text_scale: float = text_scale
41
+ self.text_thickness: int = text_thickness
42
+ self.text_padding: int = text_padding
43
+ self.avoid_overlap: bool = avoid_overlap
44
+
45
+ def annotate(
46
+ self,
47
+ scene: np.ndarray,
48
+ detections: Detections,
49
+ labels: Optional[List[str]] = None,
50
+ skip_label: bool = False,
51
+ image_size: Optional[Tuple[int, int]] = None,
52
+ ) -> np.ndarray:
53
+ """
54
+ Draws bounding boxes on the frame using the detections provided.
55
+
56
+ Args:
57
+ scene (np.ndarray): The image on which the bounding boxes will be drawn
58
+ detections (Detections): The detections for which the
59
+ bounding boxes will be drawn
60
+ labels (Optional[List[str]]): An optional list of labels
61
+ corresponding to each detection. If `labels` are not provided,
62
+ corresponding `class_id` will be used as label.
63
+ skip_label (bool): Is set to `True`, skips bounding box label annotation.
64
+ Returns:
65
+ np.ndarray: The image with the bounding boxes drawn on it
66
+
67
+ Example:
68
+ ```python
69
+ import supervision as sv
70
+
71
+ classes = ['person', ...]
72
+ image = ...
73
+ detections = sv.Detections(...)
74
+
75
+ box_annotator = sv.BoxAnnotator()
76
+ labels = [
77
+ f"{classes[class_id]} {confidence:0.2f}"
78
+ for _, _, confidence, class_id, _ in detections
79
+ ]
80
+ annotated_frame = box_annotator.annotate(
81
+ scene=image.copy(),
82
+ detections=detections,
83
+ labels=labels
84
+ )
85
+ ```
86
+ """
87
+ font = cv2.FONT_HERSHEY_SIMPLEX
88
+ for i in range(len(detections)):
89
+ x1, y1, x2, y2 = detections.xyxy[i].astype(int)
90
+ class_id = (
91
+ detections.class_id[i] if detections.class_id is not None else None
92
+ )
93
+ idx = class_id if class_id is not None else i
94
+ color = (
95
+ self.color.by_idx(idx)
96
+ if isinstance(self.color, ColorPalette)
97
+ else self.color
98
+ )
99
+ cv2.rectangle(
100
+ img=scene,
101
+ pt1=(x1, y1),
102
+ pt2=(x2, y2),
103
+ color=color.as_bgr(),
104
+ thickness=self.thickness,
105
+ )
106
+ if skip_label:
107
+ continue
108
+
109
+ text = (
110
+ f"{class_id}"
111
+ if (labels is None or len(detections) != len(labels))
112
+ else labels[i]
113
+ )
114
+
115
+ text_width, text_height = cv2.getTextSize(
116
+ text=text,
117
+ fontFace=font,
118
+ fontScale=self.text_scale,
119
+ thickness=self.text_thickness,
120
+ )[0]
121
+
122
+ if not self.avoid_overlap:
123
+ text_x = x1 + self.text_padding
124
+ text_y = y1 - self.text_padding
125
+
126
+ text_background_x1 = x1
127
+ text_background_y1 = y1 - 2 * self.text_padding - text_height
128
+
129
+ text_background_x2 = x1 + 2 * self.text_padding + text_width
130
+ text_background_y2 = y1
131
+ # text_x = x1 - self.text_padding - text_width
132
+ # text_y = y1 + self.text_padding + text_height
133
+ # text_background_x1 = x1 - 2 * self.text_padding - text_width
134
+ # text_background_y1 = y1
135
+ # text_background_x2 = x1
136
+ # text_background_y2 = y1 + 2 * self.text_padding + text_height
137
+ else:
138
+ text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2 = get_optimal_label_pos(self.text_padding, text_width, text_height, x1, y1, x2, y2, detections, image_size)
139
+
140
+ cv2.rectangle(
141
+ img=scene,
142
+ pt1=(text_background_x1, text_background_y1),
143
+ pt2=(text_background_x2, text_background_y2),
144
+ color=color.as_bgr(),
145
+ thickness=cv2.FILLED,
146
+ )
147
+ # import pdb; pdb.set_trace()
148
+ box_color = color.as_rgb()
149
+ luminance = 0.299 * box_color[0] + 0.587 * box_color[1] + 0.114 * box_color[2]
150
+ text_color = (0,0,0) if luminance > 160 else (255,255,255)
151
+ cv2.putText(
152
+ img=scene,
153
+ text=text,
154
+ org=(text_x, text_y),
155
+ fontFace=font,
156
+ fontScale=self.text_scale,
157
+ # color=self.text_color.as_rgb(),
158
+ color=text_color,
159
+ thickness=self.text_thickness,
160
+ lineType=cv2.LINE_AA,
161
+ )
162
+ return scene
163
+
164
+
165
+ def box_area(box):
166
+ return (box[2] - box[0]) * (box[3] - box[1])
167
+
168
+ def intersection_area(box1, box2):
169
+ x1 = max(box1[0], box2[0])
170
+ y1 = max(box1[1], box2[1])
171
+ x2 = min(box1[2], box2[2])
172
+ y2 = min(box1[3], box2[3])
173
+ return max(0, x2 - x1) * max(0, y2 - y1)
174
+
175
+ def IoU(box1, box2, return_max=True):
176
+ intersection = intersection_area(box1, box2)
177
+ union = box_area(box1) + box_area(box2) - intersection
178
+ if box_area(box1) > 0 and box_area(box2) > 0:
179
+ ratio1 = intersection / box_area(box1)
180
+ ratio2 = intersection / box_area(box2)
181
+ else:
182
+ ratio1, ratio2 = 0, 0
183
+ if return_max:
184
+ return max(intersection / union, ratio1, ratio2)
185
+ else:
186
+ return intersection / union
187
+
188
+
189
+ def get_optimal_label_pos(text_padding, text_width, text_height, x1, y1, x2, y2, detections, image_size):
190
+ """ check overlap of text and background detection box, and get_optimal_label_pos,
191
+ pos: str, position of the text, must be one of 'top left', 'top right', 'outer left', 'outer right' TODO: if all are overlapping, return the last one, i.e. outer right
192
+ Threshold: default to 0.3
193
+ """
194
+
195
+ def get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size):
196
+ is_overlap = False
197
+ for i in range(len(detections)):
198
+ detection = detections.xyxy[i].astype(int)
199
+ if IoU([text_background_x1, text_background_y1, text_background_x2, text_background_y2], detection) > 0.3:
200
+ is_overlap = True
201
+ break
202
+ # check if the text is out of the image
203
+ if text_background_x1 < 0 or text_background_x2 > image_size[0] or text_background_y1 < 0 or text_background_y2 > image_size[1]:
204
+ is_overlap = True
205
+ return is_overlap
206
+
207
+ # if pos == 'top left':
208
+ text_x = x1 + text_padding
209
+ text_y = y1 - text_padding
210
+
211
+ text_background_x1 = x1
212
+ text_background_y1 = y1 - 2 * text_padding - text_height
213
+
214
+ text_background_x2 = x1 + 2 * text_padding + text_width
215
+ text_background_y2 = y1
216
+ is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
217
+ if not is_overlap:
218
+ return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2
219
+
220
+ # elif pos == 'outer left':
221
+ text_x = x1 - text_padding - text_width
222
+ text_y = y1 + text_padding + text_height
223
+
224
+ text_background_x1 = x1 - 2 * text_padding - text_width
225
+ text_background_y1 = y1
226
+
227
+ text_background_x2 = x1
228
+ text_background_y2 = y1 + 2 * text_padding + text_height
229
+ is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
230
+ if not is_overlap:
231
+ return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2
232
+
233
+
234
+ # elif pos == 'outer right':
235
+ text_x = x2 + text_padding
236
+ text_y = y1 + text_padding + text_height
237
+
238
+ text_background_x1 = x2
239
+ text_background_y1 = y1
240
+
241
+ text_background_x2 = x2 + 2 * text_padding + text_width
242
+ text_background_y2 = y1 + 2 * text_padding + text_height
243
+
244
+ is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
245
+ if not is_overlap:
246
+ return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2
247
+
248
+ # elif pos == 'top right':
249
+ text_x = x2 - text_padding - text_width
250
+ text_y = y1 - text_padding
251
+
252
+ text_background_x1 = x2 - 2 * text_padding - text_width
253
+ text_background_y1 = y1 - 2 * text_padding - text_height
254
+
255
+ text_background_x2 = x2
256
+ text_background_y2 = y1
257
+
258
+ is_overlap = get_is_overlap(detections, text_background_x1, text_background_y1, text_background_x2, text_background_y2, image_size)
259
+ if not is_overlap:
260
+ return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2
261
+
262
+ return text_x, text_y, text_background_x1, text_background_y1, text_background_x2, text_background_y2