banao-tech commited on
Commit
281c395
·
verified ·
1 Parent(s): c0fcac1

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +22 -34
utils.py CHANGED
@@ -227,43 +227,31 @@ def load_image(image_path: str) -> Tuple[np.array, torch.Tensor]:
227
 
228
  def annotate(image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor, phrases: List[str], text_scale: float,
229
  text_padding=5, text_thickness=2, thickness=3) -> np.ndarray:
 
 
 
 
 
 
 
 
 
 
 
230
  h, w, _ = image_source.shape
231
-
232
- # Convert to CPU first
233
- boxes_cpu = boxes.cpu()
234
- logits_cpu = logits.cpu()
235
-
236
- # Process on CPU
237
- boxes_scaled = boxes_cpu * torch.Tensor([w, h, w, h])
238
- xyxy = box_convert(boxes=boxes_scaled, in_fmt="cxcywh", out_fmt="xyxy").numpy()
239
-
240
- detections = sv.Detections(
241
- xyxy=xyxy,
242
- confidence=logits_cpu.numpy(),
243
- class_id=np.zeros(len(xyxy))
244
- )
245
 
246
- box_annotator = sv.BoxAnnotator(
247
- text_scale=text_scale,
248
- text_padding=text_padding,
249
- text_thickness=text_thickness,
250
- thickness=thickness
251
- )
252
-
253
- labels = [
254
- f"{phrases[i]} ({logits_cpu[i]:.2f})"
255
- for i in range(len(phrases))
256
- ]
257
-
258
- annotated_frame = box_annotator.annotate(
259
- scene=image_source.copy(),
260
- detections=detections,
261
- labels=labels
262
- )
263
 
264
- xywh = box_convert(boxes=boxes_scaled, in_fmt="cxcywh", out_fmt="xywh").numpy()
265
- label_coordinates = {phrases[i]: xywh[i] for i in range(len(phrases))}
266
-
 
 
 
267
  return annotated_frame, label_coordinates
268
 
269
 
 
227
 
228
  def annotate(image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor, phrases: List[str], text_scale: float,
229
  text_padding=5, text_thickness=2, thickness=3) -> np.ndarray:
230
+ """
231
+ This function annotates an image with bounding boxes and labels.
232
+ Parameters:
233
+ image_source (np.ndarray): The source image to be annotated.
234
+ boxes (torch.Tensor): A tensor containing bounding box coordinates. in cxcywh format, pixel scale
235
+ logits (torch.Tensor): A tensor containing confidence scores for each bounding box.
236
+ phrases (List[str]): A list of labels for each bounding box.
237
+ text_scale (float): The scale of the text to be displayed. 0.8 for mobile/web, 0.3 for desktop # 0.4 for mind2web
238
+ Returns:
239
+ np.ndarray: The annotated image.
240
+ """
241
  h, w, _ = image_source.shape
242
+ boxes = boxes * torch.Tensor([w, h, w, h])
243
+ xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
244
+ xywh = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xywh").numpy()
245
+ detections = sv.Detections(xyxy=xyxy)
 
 
 
 
 
 
 
 
 
 
246
 
247
+ labels = [f"{phrase}" for phrase in range(boxes.shape[0])]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
+ from util.box_annotator import BoxAnnotator
250
+ box_annotator = BoxAnnotator(text_scale=text_scale, text_padding=text_padding,text_thickness=text_thickness,thickness=thickness) # 0.8 for mobile/web, 0.3 for desktop # 0.4 for mind2web
251
+ annotated_frame = image_source.copy()
252
+ annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels, image_size=(w,h))
253
+
254
+ label_coordinates = {f"{phrase}": v for phrase, v in zip(phrases, xywh)}
255
  return annotated_frame, label_coordinates
256
 
257