{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "final text_encoder_type: bert-base-uncased\n" ] }, { "data": { "application/json": { "ascii": false, "bar_format": null, "colour": null, "elapsed": 0.014210224151611328, "initial": 0, "n": 0, "ncols": null, "nrows": null, "postfix": null, "prefix": "Downloading model.safetensors", "rate": null, "total": 440449768, "unit": "B", "unit_divisor": 1000, "unit_scale": true }, "application/vnd.jupyter.widget-view+json": { "model_id": "5922f34578364d36afa13de9f01254bd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading model.safetensors: 0%| | 0.00/440M [00:00<?, ?B/s]" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "/root/miniconda3/lib/python3.8/site-packages/transformers/modeling_utils.py:881: FutureWarning: The `device` argument is deprecated and will be removed in v5 of Transformers.\n", " warnings.warn(\n", "/root/miniconda3/lib/python3.8/site-packages/torch/utils/checkpoint.py:31: UserWarning: None of the inputs have requires_grad=True. Gradients will be None\n", " warnings.warn(\"None of the inputs have requires_grad=True. Gradients will be None\")\n" ] }, { "data": { "text/plain": [ "True" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from groundingdino.util.inference import load_model, load_image, predict, annotate\n", "import cv2\n", "\n", "model = load_model(\"groundingdino/config/GroundingDINO_SwinT_OGC.py\", \"../04-06-segment-anything/weights/groundingdino_swint_ogc.pth\")\n", "IMAGE_PATH = \".asset/cat_dog.jpeg\"\n", "TEXT_PROMPT = \"chair . person . dog .\"\n", "BOX_TRESHOLD = 0.35\n", "TEXT_TRESHOLD = 0.25\n", "\n", "image_source, image = load_image(IMAGE_PATH)\n", "\n", "boxes, logits, phrases = predict(\n", " model=model,\n", " image=image,\n", " caption=TEXT_PROMPT,\n", " box_threshold=BOX_TRESHOLD,\n", " text_threshold=TEXT_TRESHOLD\n", ")\n", "\n", "annotated_frame = annotate(image_source=image_source, boxes=boxes, logits=logits, phrases=phrases)\n", "cv2.imwrite(\"annotated_image.jpg\", annotated_frame)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.10" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }