Spaces:

nikigoli
/

countgd

Running on T4

App Files Files Community

nikigoli commited on 28 days ago

Commit

d73d6bf

verified ·

1 Parent(s): 2f1d1a1

add-demo-notebook (#5)

Browse files

- Refactor app.py - extract reusable functions (aedd89b11c7db4e19dbd7d72566a0fbccea3bd85)
- Add sample notebook (96f9e24b2cf04f41c38a00d4706abb6b38ec88e4)
- Remove notebook output (eb3994e008d734612930f2bdd5a091882ba18603)

Files changed (4) hide show

.gitignore +2 -2
app.py +118 -148
notebooks/demo.ipynb +492 -0
requirements.txt +2 -0

.gitignore CHANGED Viewed

@@ -2,7 +2,7 @@
 env/
 __pycache__
 .python-version
 # vim
-*.sw[op]

 env/
 __pycache__
 .python-version
+*.py[od]
 # vim
+*.sw[op]

app.py CHANGED Viewed

@@ -14,11 +14,6 @@ import matplotlib.pyplot as plt
 import io
 from enum import Enum
 import os
-import subprocess
-from subprocess import call
-import shlex
-import shutil
-#os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.getcwd(), "tmp")
 cwd = os.getcwd()
 # Suppress warnings to avoid overflowing the log.
 import warnings
@@ -145,22 +140,6 @@ def build_model_and_transforms(args):
     return model, data_transform
-examples = [
-    ["strawberry.jpg", "strawberry", {"image": "strawberry.jpg"}],
-    ["strawberry.jpg", "blueberry", {"image": "strawberry.jpg"}],
-    ["bird-1.JPG", "bird", {"image": "bird-2.JPG"}],
-    ["fish.jpg", "fish", {"image": "fish.jpg"}],
-    ["women.jpg", "girl", {"image": "women.jpg"}],
-    ["women.jpg", "boy", {"image": "women.jpg"}],
-    ["balloon.jpg", "hot air balloon", {"image": "balloon.jpg"}],
-    ["deer.jpg", "deer", {"image": "deer.jpg"}],
-    ["apple.jpg", "apple", {"image": "apple.jpg"}],
-    ["egg.jpg", "egg", {"image": "egg.jpg"}],
-    ["stamp.jpg", "stamp", {"image": "stamp.jpg"}],
-    ["green-pea.jpg", "green pea", {"image": "green-pea.jpg"}],
-    ["lego.jpg", "lego", {"image": "lego.jpg"}]
-]
 # APP:
 def get_box_inputs(prompts):
     box_inputs = []
@@ -197,6 +176,107 @@ def get_ind_to_filter(text, word_ids, keywords):
     return inds_to_filter
 if __name__ == '__main__':
     parser = argparse.ArgumentParser("Counting Application", parents=[get_args_parser()])
@@ -205,56 +285,19 @@ if __name__ == '__main__':
     model, transform = build_model_and_transforms(args)
     model = model.to(device)
     @spaces.GPU(duration=120)
     def count(image, text, prompts, state, device):
-        keywords = "" # do not handle this for now
-        # Handle no prompt case.
         if prompts is None:
             prompts = {"image": image, "points": []}
-        input_image, _ = transform(image, {"exemplars": torch.tensor([])})
-        input_image = input_image.unsqueeze(0).to(device)
-        exemplars = get_box_inputs(prompts["points"])
-        input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
-        input_image_exemplars = input_image_exemplars.unsqueeze(0).to(device)
-        exemplars = [exemplars["exemplars"].to(device)]
-        with torch.no_grad():
-            model_output = model(
-                    nested_tensor_from_tensor_list(input_image),
-                    nested_tensor_from_tensor_list(input_image_exemplars),
-                    exemplars,
-                    [torch.tensor([0]).to(device) for _ in range(len(input_image))],
-                    captions=[text + " ."] * len(input_image),
-                )
-        ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
-        logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
-        boxes = model_output["pred_boxes"][0]
-        if len(keywords.strip()) > 0:
-            box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
-        else:
-            box_mask = logits.max(dim=-1).values > CONF_THRESH
-        logits = logits[box_mask, :].cpu().numpy()
-        boxes = boxes[box_mask, :].cpu().numpy()
-        # Plot results.
-        (w, h) = image.size
-        det_map = np.zeros((h, w))
-        det_map[(h * boxes[:, 1]).astype(int), (w * boxes[:, 0]).astype(int)] = 1
-        det_map = ndimage.gaussian_filter(
-            det_map, sigma=(w // 200, w // 200), order=0
-        )
-        plt.imshow(image)
-        plt.imshow(det_map[None, :].transpose(1, 2, 0), 'jet', interpolation='none', alpha=0.7)
-        plt.axis('off')
-        img_buf = io.BytesIO()
-        plt.savefig(img_buf, format='png', bbox_inches='tight')
-        plt.close()
-        output_img = Image.open(img_buf)
         if AppSteps.TEXT_AND_EXEMPLARS not in state:
             exemplar_image = ImagePrompter(type='pil', label='Visual Exemplar Image', value=prompts, interactive=True, visible=True)
@@ -274,92 +317,19 @@ if __name__ == '__main__':
             main_instructions_comp = gr.Markdown(visible=True)
             step_3 = gr.Tab(visible=True)
-        out_label = "Detected instances predicted with"
-        if len(text.strip()) > 0:
-            out_label += " text"
-            if exemplars[0].size()[0] == 1:
-                out_label += " and " + str(exemplars[0].size()[0]) + " visual exemplar."
-            elif exemplars[0].size()[0] > 1:
-                out_label += " and " + str(exemplars[0].size()[0]) + " visual exemplars."
-            else:
-                out_label += "."
-        elif exemplars[0].size()[0] > 0:
-            if exemplars[0].size()[0] == 1:
-                out_label += " " + str(exemplars[0].size()[0]) + " visual exemplar."
-            else:
-                out_label += " " + str(exemplars[0].size()[0]) + " visual exemplars."
-        else:
-            out_label = "Nothing specified to detect."
-        return (gr.Image(output_img, visible=True, label=out_label, show_label=True), gr.Number(label="Predicted Count", visible=True, value=boxes.shape[0]), new_submit_btn, gr.Tab(visible=True), step_3, state)
     @spaces.GPU
     def count_main(image, text, prompts, device):
-        keywords = "" # do not handle this for now
-        # Handle no prompt case.
         if prompts is None:
             prompts = {"image": image, "points": []}
-        input_image, _ = transform(image, {"exemplars": torch.tensor([])})
-        input_image = input_image.unsqueeze(0).to(device)
-        exemplars = get_box_inputs(prompts["points"])
-        input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
-        input_image_exemplars = input_image_exemplars.unsqueeze(0).to(device)
-        exemplars = [exemplars["exemplars"].to(device)]
-        with torch.no_grad():
-            model_output = model(
-                    nested_tensor_from_tensor_list(input_image),
-                    nested_tensor_from_tensor_list(input_image_exemplars),
-                    exemplars,
-                    [torch.tensor([0]).to(device) for _ in range(len(input_image))],
-                    captions=[text + " ."] * len(input_image),
-                )
-        ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
-        logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
-        boxes = model_output["pred_boxes"][0]
-        if len(keywords.strip()) > 0:
-            box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
-        else:
-            box_mask = logits.max(dim=-1).values > CONF_THRESH
-        logits = logits[box_mask, :].cpu().numpy()
-        boxes = boxes[box_mask, :].cpu().numpy()
-        # Plot results.
-        (w, h) = image.size
-        det_map = np.zeros((h, w))
-        det_map[(h * boxes[:, 1]).astype(int), (w * boxes[:, 0]).astype(int)] = 1
-        det_map = ndimage.gaussian_filter(
-            det_map, sigma=(w // 200, w // 200), order=0
-        )
-        plt.imshow(image)
-        plt.imshow(det_map[None, :].transpose(1, 2, 0), 'jet', interpolation='none', alpha=0.7)
-        plt.axis('off')
-        img_buf = io.BytesIO()
-        plt.savefig(img_buf, format='png', bbox_inches='tight')
-        plt.close()
-        output_img = Image.open(img_buf)
-        out_label = "Detected instances predicted with"
-        if len(text.strip()) > 0:
-            out_label += " text"
-            if exemplars[0].size()[0] == 1:
-                out_label += " and " + str(exemplars[0].size()[0]) + " visual exemplar."
-            elif exemplars[0].size()[0] > 1:
-                out_label += " and " + str(exemplars[0].size()[0]) + " visual exemplars."
-            else:
-                out_label += "."
-        elif exemplars[0].size()[0] > 0:
-            if exemplars[0].size()[0] == 1:
-                out_label += " " + str(exemplars[0].size()[0]) + " visual exemplar."
-            else:
-                out_label += " " + str(exemplars[0].size()[0]) + " visual exemplars."
-        else:
-            out_label = "Nothing specified to detect."
-        return (gr.Image(output_img, visible=True, label=out_label, show_label=True), gr.Number(label="Predicted Count", visible=True, value=boxes.shape[0]))
     def remove_label(image):
         return gr.Image(show_label=False)
@@ -401,12 +371,12 @@ if __name__ == '__main__':
                         with gr.Accordion("Open for Further Information", open=False):
                             gr.Markdown(exemplar_img_drawing_instructions_part_2)
                     with gr.Tab("Step 1", visible=True) as step_1:
-                        input_image = gr.Image(type='pil', label='Input Image', show_label='True', value="strawberry.jpg", interactive=False, width="30vw")
                         gr.Markdown('# Click "Count" to count the strawberries.')
                 with gr.Column():
                     with gr.Tab("Output Image"):
-                        detected_instances = gr.Image(label="Detected Instances", show_label='True', interactive=False, visible=True, width="40vw")
             with gr.Row():
                 input_text = gr.Textbox(label="What would you like to count?", value="strawberry", interactive=True)

 import io
 from enum import Enum
 import os
 cwd = os.getcwd()
 # Suppress warnings to avoid overflowing the log.
 import warnings
     return model, data_transform
 # APP:
 def get_box_inputs(prompts):
     box_inputs = []
     return inds_to_filter
+def generate_heatmap(image, boxes):
+    # Plot results.
+    (w, h) = image.size
+    det_map = np.zeros((h, w))
+    det_map[(h * boxes[:, 1]).astype(int), (w * boxes[:, 0]).astype(int)] = 1
+    det_map = ndimage.gaussian_filter(
+        det_map, sigma=(w // 200, w // 200), order=0
+    )
+    plt.imshow(image)
+    plt.imshow(det_map[None, :].transpose(1, 2, 0), 'jet', interpolation='none', alpha=0.7)
+    plt.axis('off')
+    img_buf = io.BytesIO()
+    plt.savefig(img_buf, format='png', bbox_inches='tight')
+    plt.close()
+    output_img = Image.open(img_buf)
+    return output_img
+def generate_output_label(text, num_exemplars):
+    out_label = "Detected instances predicted with"
+    if len(text.strip()) > 0:
+        out_label += " text"
+        if num_exemplars == 1:
+            out_label += " and " + str(num_exemplars) + " visual exemplar."
+        elif num_exemplars > 1:
+            out_label += " and " + str(num_exemplars) + " visual exemplars."
+        else:
+            out_label += "."
+    elif num_exemplars > 0:
+        if num_exemplars == 1:
+            out_label += " " + str(num_exemplars) + " visual exemplar."
+        else:
+            out_label += " " + str(num_exemplars) + " visual exemplars."
+    else:
+        out_label = "Nothing specified to detect."
+    return out_label
+def preprocess(transform, image, input_prompts = None):
+    if input_prompts == None:
+        prompts = { "image": image, "points": []}
+    else:
+        prompts = input_prompts
+    input_image, _ = transform(image, None)
+    exemplar = get_box_inputs(prompts["points"])
+    # Wrapping exemplar in a dictionary to apply only relevant transforms
+    input_image_exemplar, exemplar = transform(prompts['image'], {"exemplars": torch.tensor(exemplar)})
+    exemplar = exemplar["exemplars"]
+    return input_image, input_image_exemplar, exemplar
+def get_boxes_from_prediction(model_output, text, keywords = ""):
+    ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
+    logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
+    boxes = model_output["pred_boxes"][0]
+    if len(keywords.strip()) > 0:
+        box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
+    else:
+        box_mask = logits.max(dim=-1).values > CONF_THRESH
+    boxes = boxes[box_mask, :].cpu().numpy()
+    logits = logits[box_mask, :].cpu().numpy()
+    return boxes, logits
+def predict(model, transform, image, text, prompts, device):
+    keywords = "" # do not handle this for now
+    input_image, input_image_exemplar, exemplar = preprocess(transform, image, prompts)
+    input_images = input_image.unsqueeze(0).to(device)
+    input_image_exemplars = input_image_exemplar.unsqueeze(0).to(device)
+    exemplars = [exemplar.to(device)]
+    with torch.no_grad():
+        model_output = model(
+                nested_tensor_from_tensor_list(input_images),
+                nested_tensor_from_tensor_list(input_image_exemplars),
+                exemplars,
+                [torch.tensor([0]).to(device) for _ in range(len(input_images))],
+                captions=[text + " ."] * len(input_images),
+            )
+    keywords = ""
+    return get_boxes_from_prediction(model_output, text, keywords)
+examples = [
+    ["strawberry.jpg", "strawberry", {"image": "strawberry.jpg"}],
+    ["strawberry.jpg", "blueberry", {"image": "strawberry.jpg"}],
+    ["bird-1.JPG", "bird", {"image": "bird-2.JPG"}],
+    ["fish.jpg", "fish", {"image": "fish.jpg"}],
+    ["women.jpg", "girl", {"image": "women.jpg"}],
+    ["women.jpg", "boy", {"image": "women.jpg"}],
+    ["balloon.jpg", "hot air balloon", {"image": "balloon.jpg"}],
+    ["deer.jpg", "deer", {"image": "deer.jpg"}],
+    ["apple.jpg", "apple", {"image": "apple.jpg"}],
+    ["egg.jpg", "egg", {"image": "egg.jpg"}],
+    ["stamp.jpg", "stamp", {"image": "stamp.jpg"}],
+    ["green-pea.jpg", "green pea", {"image": "green-pea.jpg"}],
+    ["lego.jpg", "lego", {"image": "lego.jpg"}]
+]
 if __name__ == '__main__':
     parser = argparse.ArgumentParser("Counting Application", parents=[get_args_parser()])
     model, transform = build_model_and_transforms(args)
     model = model.to(device)
+    _predict = lambda image, text, prompts: predict(model, transform, image, text, prompts, device)
     @spaces.GPU(duration=120)
     def count(image, text, prompts, state, device):
         if prompts is None:
             prompts = {"image": image, "points": []}
+        boxes, _ = _predict(image, text, prompts)
+        count = len(boxes)
+        output_img = generate_heatmap(image, boxes)
+        num_exemplars = len(get_box_inputs(prompts["points"]))
+        out_label = generate_output_label(text, num_exemplars)
         if AppSteps.TEXT_AND_EXEMPLARS not in state:
             exemplar_image = ImagePrompter(type='pil', label='Visual Exemplar Image', value=prompts, interactive=True, visible=True)
             main_instructions_comp = gr.Markdown(visible=True)
             step_3 = gr.Tab(visible=True)
+        return (gr.Image(output_img, visible=True, label=out_label, show_label=True), gr.Number(label="Predicted Count", visible=True, value=count), new_submit_btn, gr.Tab(visible=True), step_3, state)
     @spaces.GPU
     def count_main(image, text, prompts, device):
         if prompts is None:
             prompts = {"image": image, "points": []}
+        boxes, _ = _predict(image, text, prompts)
+        count = len(boxes)
+        output_img = generate_heatmap(image, boxes)
+        num_exemplars = len(get_box_inputs(prompts["points"]))
+        out_label = generate_output_label(text, num_exemplars)
+        return (gr.Image(output_img, visible=True, label=out_label, show_label=True), gr.Number(label="Predicted Count", visible=True, value=count))
     def remove_label(image):
         return gr.Image(show_label=False)
                         with gr.Accordion("Open for Further Information", open=False):
                             gr.Markdown(exemplar_img_drawing_instructions_part_2)
                     with gr.Tab("Step 1", visible=True) as step_1:
+                        input_image = gr.Image(type='pil', label='Input Image', show_label='True', value="strawberry.jpg", interactive=False)
                         gr.Markdown('# Click "Count" to count the strawberries.')
                 with gr.Column():
                     with gr.Tab("Output Image"):
+                        detected_instances = gr.Image(label="Detected Instances", show_label='True', interactive=False, visible=True)
             with gr.Row():
                 input_text = gr.Textbox(label="What would you like to count?", value="strawberry", interactive=True)

notebooks/demo.ipynb ADDED Viewed

	@@ -0,0 +1,492 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yxig5CdZuHb9"
+      },
+      "source": [
+        "# CountGD - Multimodela open-world object counting\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "9wyM6J2HuHb-"
+      },
+      "source": [
+        "## Setup\n",
+        "\n",
+        "The following cells will setup the runtime environment with the following\n",
+        "\n",
+        "- Mount Google Drive\n",
+        "- Install dependencies for running the model\n",
+        "- Load the model into memory"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "jn061Tl8uHb-"
+      },
+      "source": [
+        "### Mount Google Drive (if running on colab)\n",
+        "\n",
+        "The following bit of code will mount your Google Drive folder at `/content/drive`, allowing you to process files directly from it as well as store the results alongside it.\n",
+        "\n",
+        "Once you execute the next cell, you will be requested to share access with the notebook. Please follow the instructions on screen to do so.\n",
+        "If you are not running this on colab, you will still be able to use the files available on your environment."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "collapsed": true,
+        "id": "DkSUXqMPuHb-",
+        "outputId": "6b82521e-3afd-4545-b13f-8cfea0975d95"
+      },
+      "outputs": [],
+      "source": [
+        "# Check if running colab\n",
+        "import logging\n",
+        "\n",
+        "logging.basicConfig(\n",
+        "  level=logging.INFO,\n",
+        "  format='%(asctime)s %(levelname)-8s %(name)s %(message)s'\n",
+        ")\n",
+        "try:\n",
+        "    import google.colab\n",
+        "    RUNNING_IN_COLAB = True\n",
+        "except:\n",
+        "    RUNNING_IN_COLAB = False\n",
+        "\n",
+        "if RUNNING_IN_COLAB:\n",
+        "    from google.colab import drive\n",
+        "    drive.mount('/content/drive')\n",
+        "\n",
+        "from IPython.core.magic import register_cell_magic\n",
+        "from IPython import get_ipython\n",
+        "@register_cell_magic\n",
+        "def skip_if(line, cell):\n",
+        "    if eval(line):\n",
+        "        return\n",
+        "    get_ipython().run_cell(cell)\n",
+        "\n",
+        "\n",
+        "%env RUNNING_IN_COLAB {RUNNING_IN_COLAB}\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kas5YtyluHb_"
+      },
+      "source": [
+        "### Install Dependencies\n",
+        "\n",
+        "The environment will be setup with the code, models and required dependencies."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "982Yiv5tuHb_",
+        "outputId": "2f570d1a-c6cc-49c3-c336-1d784d33a169"
+      },
+      "outputs": [],
+      "source": [
+        "%%bash\n",
+        "\n",
+        "set -euxo pipefail\n",
+        "\n",
+        "if [ \"${RUNNING_IN_COLAB}\" == \"True\" ]; then\n",
+        "  echo \"Downloading the repository...\"\n",
+        "  if [ ! -d /content/countgd ]; then\n",
+        "    git clone \"https://huggingface.co/spaces/nikigoli/countgd\" /content/countgd\n",
+        "  fi\n",
+        "  cd /content/countgd\n",
+        "  git fetch origin refs/pr/5:refs/remotes/origin/pr/5\n",
+        "  git checkout pr/5\n",
+        "else\n",
+        "  # TODO check if cwd is the correct git repo\n",
+        "  # If users use vscode, then we set the default start directory to root of the repo\n",
+        "  echo \"Running in $(pwd)\"\n",
+        "fi\n",
+        "\n",
+        "# TODO check for gcc-11 or above\n",
+        "\n",
+        "# Install pip packages\n",
+        "pip install --upgrade pip setuptools wheel\n",
+        "pip install -r requirements.txt\n",
+        "\n",
+        "# Compile modules\n",
+        "export CUDA_HOME=/usr/local/cuda/\n",
+        "cd models/GroundingDINO/ops\n",
+        "python3 setup.py build\n",
+        "pip install .\n",
+        "python3 test.py"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "58iD_HGnvcRJ",
+        "outputId": "fe356a68-dced-4f6f-93cc-d83da2f84e28"
+      },
+      "outputs": [],
+      "source": [
+        "%cd {\"/content/countgd\" if RUNNING_IN_COLAB else '.'}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gH7A8zthuHb_"
+      },
+      "source": [
+        "## Inference"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IspbBV0XuHb_"
+      },
+      "source": [
+        "### Loading the model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "5nBT_HCUuHb_",
+        "outputId": "95ceb6c6-bee8-4921-8bff-d28937045f78"
+      },
+      "outputs": [],
+      "source": [
+        "import app\n",
+        "import importlib\n",
+        "importlib.reload(app)\n",
+        "from app import (\n",
+        "    build_model_and_transforms,\n",
+        "    get_device,\n",
+        "    get_args_parser,\n",
+        "    generate_heatmap,\n",
+        "    predict,\n",
+        ")\n",
+        "args = get_args_parser().parse_args([])\n",
+        "device = get_device()\n",
+        "model, transform = build_model_and_transforms(args)\n",
+        "model = model.to(device)\n",
+        "\n",
+        "run = lambda image, text: predict(model, transform, image, text, None, device)\n",
+        "get_output = lambda image, boxes: (len(boxes), generate_heatmap(image, boxes))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "gfjraK3vuHb_"
+      },
+      "source": [
+        "### Input / Output Utils\n",
+        "\n",
+        "Helper functions for reading / writing to zipfiles and csv"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 17,
+      "metadata": {
+        "id": "qg0g5B-fuHb_"
+      },
+      "outputs": [],
+      "source": [
+        "import io\n",
+        "import csv\n",
+        "from pathlib import Path\n",
+        "from contextlib import contextmanager\n",
+        "import zipfile\n",
+        "import filetype\n",
+        "from PIL import Image\n",
+        "logger = logging.getLogger()\n",
+        "\n",
+        "def images_from_zipfile(p: Path):\n",
+        "    if not zipfile.is_zipfile(p):\n",
+        "        raise ValueError(f'{p} is not a zipfile!')\n",
+        "\n",
+        "    with zipfile.ZipFile(p, 'r') as zipf:\n",
+        "        def process_entry(info: zipfile.ZipInfo):\n",
+        "            with zipf.open(info) as f:\n",
+        "                if not filetype.is_image(f):\n",
+        "                    logger.debug(f'Skipping file - {info.filename} as it is not an image')\n",
+        "                    return\n",
+        "                # Try loading the file\n",
+        "                try:\n",
+        "                    with Image.open(f) as im:\n",
+        "                        im.load()\n",
+        "                        return (info.filename, im)\n",
+        "                except:\n",
+        "                    logger.exception(f'Error reading file {info.filename}')\n",
+        "\n",
+        "        num_files = sum(1 for info in zipf.infolist() if info.is_dir() == False)\n",
+        "        logger.info(f'Found {num_files} file(s) in the zip')\n",
+        "        yield from (process_entry(info) for info in zipf.infolist() if info.is_dir() == False)\n",
+        "\n",
+        "@contextmanager\n",
+        "def zipfile_writer(p: Path):\n",
+        "    with zipfile.ZipFile(p, 'w') as zipf:\n",
+        "        def write_output(image, image_filename):\n",
+        "            buf = io.BytesIO()\n",
+        "            image.save(buf, 'PNG')\n",
+        "            zipf.writestr(image_filename, buf.getvalue())\n",
+        "        yield write_output\n",
+        "\n",
+        "@contextmanager\n",
+        "def csvfile_writer(p: Path):\n",
+        "    with p.open('w', newline='') as csvfile:\n",
+        "        fieldnames = ['filename', 'count']\n",
+        "        csv_writer = csv.DictWriter(csvfile, fieldnames = fieldnames)\n",
+        "        csv_writer.writeheader()\n",
+        "\n",
+        "        yield csv_writer.writerow"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 15,
+      "metadata": {
+        "id": "rFXRk-_uuHb_"
+      },
+      "outputs": [],
+      "source": [
+        "from tqdm import tqdm\n",
+        "import os\n",
+        "def process_zipfile(input_zipfile: Path, text: str):\n",
+        "    if not input_zipfile.exists() or not input_zipfile.is_file() or not os.access(input_zipfile, os.R_OK):\n",
+        "        logger.error(f'Cannot open / read zipfile: {input_zipfile}. Please check if it exists')\n",
+        "        return\n",
+        "\n",
+        "    if text == \"\":\n",
+        "        logger.error('Please provide the object you would like to count')\n",
+        "        return\n",
+        "\n",
+        "    output_zipfile = input_zipfile.parent / f'{input_zipfile.stem}_countgd.zip'\n",
+        "    output_csvfile = input_zipfile.parent / f'{input_zipfile.stem}.csv'\n",
+        "\n",
+        "    logger.info(f'Writing outputs to {output_zipfile.name} and {output_csvfile.name} in {input_zipfile.parent} folder')\n",
+        "    with zipfile_writer(output_zipfile) as add_to_zip, csvfile_writer(output_csvfile) as write_row:\n",
+        "        for filename, im in tqdm(images_from_zipfile(input_zipfile)):\n",
+        "            boxes, _ = run(im, text)\n",
+        "            count, heatmap  = get_output(im, boxes)\n",
+        "            write_row({'filename': filename, 'count': count})\n",
+        "            add_to_zip(heatmap, filename)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TmqsSxrsuHb_"
+      },
+      "source": [
+        "### Run\n",
+        "\n",
+        "Use the form on colab to set the parameters, providing the zipfile with input images and a promt text representing the object you want to count.\n",
+        "\n",
+        "If you are not running on colab, change the values in the next cell\n",
+        "\n",
+        "Make sure to run the cell once you change the value."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {
+        "id": "ZaN918EkuHb_"
+      },
+      "outputs": [],
+      "source": [
+        "# @title ## Parameters { display-mode: \"form\", run: \"auto\" }\n",
+        "# @markdown Set the following options to pass to the CountGD Model\n",
+        "\n",
+        "# @markdown ---\n",
+        "# @markdown ### Enter a file path to a zip:\n",
+        "zipfile_path = \"test_images.zip\" # @param {type:\"string\"}\n",
+        "# @markdown\n",
+        "# @markdown ### Which object would you like to count?\n",
+        "prompt = \"strawberry\" # @param {type:\"string\"}\n",
+        "# @markdown ---"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 66,
+          "referenced_widgets": [
+            "b14c910dd2594285bb4ad4740099e70c",
+            "01631442369e43138c2c5c4a9fe38ceb",
+            "ff84907ef88a431bab4bd3d1567cc42a"
+          ]
+        },
+        "id": "fd-ShBCsuHb_",
+        "outputId": "5b36bb90-ac6e-46fe-a853-ff11d43dd9f6"
+      },
+      "outputs": [],
+      "source": [
+        "import ipywidgets as widgets\n",
+        "from IPython.display import display\n",
+        "button = widgets.Button(description=\"Run\")\n",
+        "\n",
+        "def on_button_clicked(b):\n",
+        "    # Display the message within the output widget.\n",
+        "    process_zipfile(Path(zipfile_path), prompt)\n",
+        "\n",
+        "button.on_click(on_button_clicked)\n",
+        "display(button)"
+      ]
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "collapsed_sections": [
+        "gfjraK3vuHb_"
+      ],
+      "gpuType": "T4",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "env",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.7"
+    },
+    "widgets": {
+      "application/vnd.jupyter.widget-state+json": {
+        "01631442369e43138c2c5c4a9fe38ceb": {
+          "model_module": "@jupyter-widgets/base",
+          "model_module_version": "1.2.0",
+          "model_name": "LayoutModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/base",
+            "_model_module_version": "1.2.0",
+            "_model_name": "LayoutModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "LayoutView",
+            "align_content": null,
+            "align_items": null,
+            "align_self": null,
+            "border": null,
+            "bottom": null,
+            "display": null,
+            "flex": null,
+            "flex_flow": null,
+            "grid_area": null,
+            "grid_auto_columns": null,
+            "grid_auto_flow": null,
+            "grid_auto_rows": null,
+            "grid_column": null,
+            "grid_gap": null,
+            "grid_row": null,
+            "grid_template_areas": null,
+            "grid_template_columns": null,
+            "grid_template_rows": null,
+            "height": null,
+            "justify_content": null,
+            "justify_items": null,
+            "left": null,
+            "margin": null,
+            "max_height": null,
+            "max_width": null,
+            "min_height": null,
+            "min_width": null,
+            "object_fit": null,
+            "object_position": null,
+            "order": null,
+            "overflow": null,
+            "overflow_x": null,
+            "overflow_y": null,
+            "padding": null,
+            "right": null,
+            "top": null,
+            "visibility": null,
+            "width": null
+          }
+        },
+        "b14c910dd2594285bb4ad4740099e70c": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ButtonModel",
+          "state": {
+            "_dom_classes": [],
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ButtonModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/controls",
+            "_view_module_version": "1.5.0",
+            "_view_name": "ButtonView",
+            "button_style": "",
+            "description": "Run",
+            "disabled": false,
+            "icon": "",
+            "layout": "IPY_MODEL_01631442369e43138c2c5c4a9fe38ceb",
+            "style": "IPY_MODEL_ff84907ef88a431bab4bd3d1567cc42a",
+            "tooltip": ""
+          }
+        },
+        "ff84907ef88a431bab4bd3d1567cc42a": {
+          "model_module": "@jupyter-widgets/controls",
+          "model_module_version": "1.5.0",
+          "model_name": "ButtonStyleModel",
+          "state": {
+            "_model_module": "@jupyter-widgets/controls",
+            "_model_module_version": "1.5.0",
+            "_model_name": "ButtonStyleModel",
+            "_view_count": null,
+            "_view_module": "@jupyter-widgets/base",
+            "_view_module_version": "1.2.0",
+            "_view_name": "StyleView",
+            "button_color": null,
+            "font_weight": ""
+          }
+        }
+      }
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

requirements.txt CHANGED Viewed

@@ -12,6 +12,8 @@ ushlex
 gradio>=4.0.0,<5
 gradio_image_prompter-0.1.0-py3-none-any.whl
 spaces
 --extra-index-url https://download.pytorch.org/whl/cu121
 torch<2.6
 torchvision

 gradio>=4.0.0,<5
 gradio_image_prompter-0.1.0-py3-none-any.whl
 spaces
+filetype
+tqdm
 --extra-index-url https://download.pytorch.org/whl/cu121
 torch<2.6
 torchvision