Spaces:

mrdbourke
/

trashify_demo_v2

Running

App Files Files Community

mrdbourke commited on Nov 18, 2024

Commit

dd1fd86

verified ·

1 Parent(s): 4845fdd

Uploading Trashify V2 box detection model (with data augmentation) app.py

Browse files

Files changed (2) hide show

README.md +16 -5
app.py +79 -18

README.md CHANGED Viewed

@@ -1,13 +1,24 @@
 ---
-title: Trashify Demo V2
-emoji: 👁
 colorFrom: purple
-colorTo: purple
 sdk: gradio
-sdk_version: 4.41.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Trashify Demo V2 🚮
+emoji: 🗑️
 colorFrom: purple
+colorTo: blue
 sdk: gradio
+sdk_version: 4.40.0
 app_file: app.py
 pinned: false
 license: apache-2.0
 ---
+# 🚮 Trashify Object Detector Demo V2
+Object detection demo to detect `trash`, `bin`, `hand`, `trash_arm`, `not_trash`, `not_bin`, `not_hand`.
+Used as example for encouraging people to cleanup their local area.
+If `trash`, `hand`, `bin` all detected = +1 point.
+* V1 = model trained *without* data augmentation
+* V2 = model trained *with* data augmentation
+TK - finish the README.md + update with links to materials

app.py CHANGED Viewed

@@ -1,29 +1,45 @@
 import gradio as gr
 import torch
-from PIL import Image, ImageDraw
 from transformers import AutoImageProcessor
 from transformers import AutoModelForObjectDetection
-from PIL import Image
-model_save_path = "mrdbourke/detr_finetuned_trashify_box_detector_synthetic_data_only"
 image_processor = AutoImageProcessor.from_pretrained(model_save_path)
 model = AutoModelForObjectDetection.from_pretrained(model_save_path)
 id2label = model.config.id2label
-color_dict = {
-    "not_trash": "red",
     "bin": "green",
     "trash": "blue",
-    "hand": "purple"
 }
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = model.to(device)
-def predict_on_image(image, conf_threshold=0.25):
     with torch.no_grad():
         inputs = image_processor(images=[image], return_tensors="pt")
         outputs = model(**inputs.to(device))
@@ -43,6 +59,12 @@ def predict_on_image(image, conf_threshold=0.25):
     # Can return results as plotted on a PIL image (then display the image)
     draw = ImageDraw.Draw(image)
     for box, score, label in zip(results["boxes"], results["scores"], results["labels"]):
         # Create coordinates
         x, y, x2, y2 = tuple(box.tolist())
@@ -50,6 +72,7 @@ def predict_on_image(image, conf_threshold=0.25):
         # Get label_name
         label_name = id2label[label.item()]
         targ_color = color_dict[label_name]
         # Draw the rectangle
         draw.rectangle(xy=(x, y, x2, y2),
@@ -62,23 +85,61 @@ def predict_on_image(image, conf_threshold=0.25):
         # Draw the text on the image
         draw.text(xy=(x, y),
                   text=text_string_to_show,
-                  fill="white")
     # Remove the draw each time
     del draw
-    return image
 demo = gr.Interface(
     fn=predict_on_image,
     inputs=[
-        gr.Image(type="pil", label="Upload Target Image"),
         gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence Threshold")
     ],
-    outputs=gr.Image(type="pil"),
-    title="🚮 Trashify Object Detection Demo",
-    description="Upload an image to detect whether there's a bin, a hand or trash in it. Model trained on synthetically generated images by Flux and labels creating by GroundingDINO."
 )
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
+from PIL import Image, ImageDraw, ImageFont
 from transformers import AutoImageProcessor
 from transformers import AutoModelForObjectDetection
+# Note: Can load from Hugging Face or can load from local.
+# You will have to replace {mrdbourke} for your own username if the model is on your Hugging Face account.
+model_save_path = "mrdbourke/detr_finetuned_trashify_box_detector_with_data_aug"
+# Load the model and preprocessor
 image_processor = AutoImageProcessor.from_pretrained(model_save_path)
 model = AutoModelForObjectDetection.from_pretrained(model_save_path)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = model.to(device)
+# Get the id2label dictionary from the model
 id2label = model.config.id2label
+# Set up a colour dictionary for plotting boxes with different colours
+color_dict = {
     "bin": "green",
     "trash": "blue",
+    "hand": "purple",
+    "trash_arm": "yellow",
+    "not_trash": "red",
+    "not_bin": "red",
+    "not_hand": "red",
 }
+# Create helper functions for seeing if items from one list are in another
+def any_in_list(list_a, list_b):
+    "Returns True if any item from list_a is in list_b, otherwise False."
+    return any(item in list_b for item in list_a)
+def all_in_list(list_a, list_b):
+    "Returns True if all items from list_a are in list_b, otherwise False."
+    return all(item in list_b for item in list_a)
+def predict_on_image(image, conf_threshold):
     with torch.no_grad():
         inputs = image_processor(images=[image], return_tensors="pt")
         outputs = model(**inputs.to(device))
     # Can return results as plotted on a PIL image (then display the image)
     draw = ImageDraw.Draw(image)
+    # Get a font from ImageFont
+    font = ImageFont.load_default(size=20)
+    # Get class names as text for print out
+    class_name_text_labels = []
     for box, score, label in zip(results["boxes"], results["scores"], results["labels"]):
         # Create coordinates
         x, y, x2, y2 = tuple(box.tolist())
         # Get label_name
         label_name = id2label[label.item()]
         targ_color = color_dict[label_name]
+        class_name_text_labels.append(label_name)
         # Draw the rectangle
         draw.rectangle(xy=(x, y, x2, y2),
         # Draw the text on the image
         draw.text(xy=(x, y),
                   text=text_string_to_show,
+                  fill="white",
+                  font=font)
     # Remove the draw each time
     del draw
+    # Setup blank string to print out
+    return_string = ""
+    # Setup list of target items to discover
+    target_items = ["trash", "bin", "hand"]
+    # If no items detected or trash, bin, hand not in list, return notification
+    if (len(class_name_text_labels) == 0) or not (any_in_list(list_a=target_items, list_b=class_name_text_labels)):
+        return_string = f"No trash, bin or hand detected at confidence threshold {conf_threshold}. Try another image or lowering the confidence threshold."
+        return image, return_string
+    # If there are some missing, print the ones which are missing
+    elif not all_in_list(list_a=target_items, list_b=class_name_text_labels):
+        missing_items = []
+        for item in target_items:
+            if item not in class_name_text_labels:
+                missing_items.append(item)
+        return_string = f"Detected the following items: {class_name_text_labels}. But missing the following in order to get +1: {missing_items}. If this is an error, try another image or altering the confidence threshold. Otherwise, the model may need to be updated with better data."
+    # If all 3 trash, bin, hand occur = + 1
+    if all_in_list(list_a=target_items, list_b=class_name_text_labels):
+        return_string = f"+1! Found the following items: {class_name_text_labels}, thank you for cleaning up the area!"
+    print(return_string)
+    return image, return_string
+# Create the interface
 demo = gr.Interface(
     fn=predict_on_image,
     inputs=[
+        gr.Image(type="pil", label="Target Image"),
         gr.Slider(minimum=0, maximum=1, value=0.25, label="Confidence Threshold")
     ],
+    outputs=[
+        gr.Image(type="pil", label="Image Output"),
+        gr.Text(label="Text Output")
+    ],
+    title="🚮 Trashify Object Detection Demo V2",
+    description="""Help clean up your local area! Upload an image and get +1 if there is all of the following items detected: trash, bin, hand.
+    Model in V2 has been trained with data augmentation (tk - add link to model).
+    """,
+    # Examples come in the form of a list of lists, where each inner list contains elements to prefill the `inputs` parameter with
+    examples=[
+        ["examples/trashify_example_1.jpeg", 0.25],
+        ["examples/trashify_example_2.jpeg", 0.25]
+    ],
+    cache_examples=True
 )
+# Launch the demo
+demo.launch()