Spaces:

merve
/

OWLSAM

Running on Zero

App Files Files Community

merve HF staff commited on Oct 8

Commit

d2b4796

•

1 Parent(s): d4be87c

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -13

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from transformers import pipeline, SamModel, SamProcessor
 import torch
 import numpy as np
 import spaces
 checkpoint = "google/owlv2-base-patch16-ensemble"
@@ -8,8 +9,9 @@ detector = pipeline(model=checkpoint, task="zero-shot-object-detection", device=
 sam_model = SamModel.from_pretrained("facebook/sam-vit-base").to("cuda")
 sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
 @spaces.GPU
-def query(image, texts, threshold):
   texts = texts.split(",")
   predictions = detector(
@@ -20,42 +22,48 @@ def query(image, texts, threshold):
   result_labels = []
   for pred in predictions:
     box = pred["box"]
     score = pred["score"]
     label = pred["label"]
-    box = [round(pred["box"]["xmin"], 2), round(pred["box"]["ymin"], 2),
         round(pred["box"]["xmax"], 2), round(pred["box"]["ymax"], 2)]
     inputs = sam_processor(
             image,
-            input_boxes=[[[box]]],
             return_tensors="pt"
         ).to("cuda")
     with torch.no_grad():
-        outputs = sam_model(**inputs)
     mask = sam_processor.image_processor.post_process_masks(
         outputs.pred_masks.cpu(),
         inputs["original_sizes"].cpu(),
         inputs["reshaped_input_sizes"].cpu()
-    )[0][0][0].numpy()
-    mask = mask[np.newaxis, ...]
-    result_labels.append((mask, label))
   return image, result_labels
-import gradio as gr
 description = "This Space combines OWLv2, the state-of-the-art zero-shot object detection model with SAM, the state-of-the-art mask generation model. SAM normally doesn't accept text input. Combining SAM with OWLv2 makes SAM text promptable. Try the example or input an image and comma separated candidate labels to segment."
 demo = gr.Interface(
     query,
-    inputs=[gr.Image(type="pil", label="Image Input"), gr.Textbox(label = "Candidate Labels"), gr.Slider(0, 1, value=0.05, label="Confidence Threshold")],
     outputs="annotatedimage",
     title="OWL 🤝 SAM",
     description=description,
     examples=[
-        ["./cats.png", "cat", 0.1],
     ],
     cache_examples=True
 )

 from transformers import pipeline, SamModel, SamProcessor
 import torch
 import numpy as np
+import gradio as gr
 import spaces
 checkpoint = "google/owlv2-base-patch16-ensemble"
 sam_model = SamModel.from_pretrained("facebook/sam-vit-base").to("cuda")
 sam_processor = SamProcessor.from_pretrained("facebook/sam-vit-base")
 @spaces.GPU
+def query(image, texts, threshold, sam_threshold):
   texts = texts.split(",")
   predictions = detector(
   result_labels = []
   for pred in predictions:
     box = pred["box"]
     score = pred["score"]
     label = pred["label"]
+    box = [round(pred["box"]["xmin"], 2), round(pred["box"]["ymin"], 2),
         round(pred["box"]["xmax"], 2), round(pred["box"]["ymax"], 2)]
     inputs = sam_processor(
             image,
+            input_boxes=[[box]],
             return_tensors="pt"
         ).to("cuda")
     with torch.no_grad():
+      outputs = sam_model(**inputs)
     mask = sam_processor.image_processor.post_process_masks(
         outputs.pred_masks.cpu(),
         inputs["original_sizes"].cpu(),
         inputs["reshaped_input_sizes"].cpu()
+    )
+    iou_scores = outputs["iou_scores"]
+    masks, iou_scores, boxes = sam_processor.image_processor.filter_masks(
+        mask[0],
+        iou_scores[0].cpu(),
+        inputs["original_sizes"][0].cpu(),
+        box,
+        pred_iou_thresh=sam_threshold,
+    )
+    result_labels.append((mask[0][0][0].numpy(), label))
   return image, result_labels
 description = "This Space combines OWLv2, the state-of-the-art zero-shot object detection model with SAM, the state-of-the-art mask generation model. SAM normally doesn't accept text input. Combining SAM with OWLv2 makes SAM text promptable. Try the example or input an image and comma separated candidate labels to segment."
 demo = gr.Interface(
     query,
+    inputs=[gr.Image(type="pil", label="Image Input"), gr.Textbox(label = "Candidate Labels"), gr.Slider(0, 1, value=0.05, label="Confidence Threshold for OWL"), gr.Slider(0, 1, value=0.88, label="IoU threshold for SAM")],
     outputs="annotatedimage",
     title="OWL 🤝 SAM",
     description=description,
     examples=[
+        ["./cats.png", "cat", 0.1, 0.88],
     ],
     cache_examples=True
 )