Spaces:

banao-tech
/

omni-final

Sleeping

App Files Files Community

banao-tech commited on 28 days ago

Commit

281c395

verified ·

1 Parent(s): c0fcac1

Update utils.py

Browse files

Files changed (1) hide show

utils.py +22 -34

utils.py CHANGED Viewed

@@ -227,43 +227,31 @@ def load_image(image_path: str) -> Tuple[np.array, torch.Tensor]:
 def annotate(image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor, phrases: List[str], text_scale: float,
              text_padding=5, text_thickness=2, thickness=3) -> np.ndarray:
     h, w, _ = image_source.shape
-    # Convert to CPU first
-    boxes_cpu = boxes.cpu()
-    logits_cpu = logits.cpu()
-    # Process on CPU
-    boxes_scaled = boxes_cpu * torch.Tensor([w, h, w, h])
-    xyxy = box_convert(boxes=boxes_scaled, in_fmt="cxcywh", out_fmt="xyxy").numpy()
-    detections = sv.Detections(
-        xyxy=xyxy,
-        confidence=logits_cpu.numpy(),
-        class_id=np.zeros(len(xyxy))
-    )
-    box_annotator = sv.BoxAnnotator(
-        text_scale=text_scale,
-        text_padding=text_padding,
-        text_thickness=text_thickness,
-        thickness=thickness
-    )
-    labels = [
-        f"{phrases[i]} ({logits_cpu[i]:.2f})"
-        for i in range(len(phrases))
-    ]
-    annotated_frame = box_annotator.annotate(
-        scene=image_source.copy(),
-        detections=detections,
-        labels=labels
-    )
-    xywh = box_convert(boxes=boxes_scaled, in_fmt="cxcywh", out_fmt="xywh").numpy()
-    label_coordinates = {phrases[i]: xywh[i] for i in range(len(phrases))}
     return annotated_frame, label_coordinates

 def annotate(image_source: np.ndarray, boxes: torch.Tensor, logits: torch.Tensor, phrases: List[str], text_scale: float,
              text_padding=5, text_thickness=2, thickness=3) -> np.ndarray:
+    """
+    This function annotates an image with bounding boxes and labels.
+    Parameters:
+    image_source (np.ndarray): The source image to be annotated.
+    boxes (torch.Tensor): A tensor containing bounding box coordinates. in cxcywh format, pixel scale
+    logits (torch.Tensor): A tensor containing confidence scores for each bounding box.
+    phrases (List[str]): A list of labels for each bounding box.
+    text_scale (float): The scale of the text to be displayed. 0.8 for mobile/web, 0.3 for desktop # 0.4 for mind2web
+    Returns:
+    np.ndarray: The annotated image.
+    """
     h, w, _ = image_source.shape
+    boxes = boxes * torch.Tensor([w, h, w, h])
+    xyxy = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xyxy").numpy()
+    xywh = box_convert(boxes=boxes, in_fmt="cxcywh", out_fmt="xywh").numpy()
+    detections = sv.Detections(xyxy=xyxy)
+    labels = [f"{phrase}" for phrase in range(boxes.shape[0])]
+    from util.box_annotator import BoxAnnotator
+    box_annotator = BoxAnnotator(text_scale=text_scale, text_padding=text_padding,text_thickness=text_thickness,thickness=thickness) # 0.8 for mobile/web, 0.3 for desktop # 0.4 for mind2web
+    annotated_frame = image_source.copy()
+    annotated_frame = box_annotator.annotate(scene=annotated_frame, detections=detections, labels=labels, image_size=(w,h))
+    label_coordinates = {f"{phrase}": v for phrase, v in zip(phrases, xywh)}
     return annotated_frame, label_coordinates