{ "cells": [ { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# kMaX-DeepLab Demo\n", "This notebook is modified by Qihang Yu, with reference from [Mask2Former's script](https://colab.research.google.com/drive/1uIWE5KbGFSjrxey2aRd5pWkKNY1_SaNq)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Install detectron2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Install detectron2\n", "import torch\n", "TORCH_VERSION = \".\".join(torch.__version__.split(\".\")[:2])\n", "CUDA_VERSION = torch.__version__.split(\"+\")[-1]\n", "print(\"torch: \", TORCH_VERSION, \"; cuda: \", CUDA_VERSION)\n", "# Install detectron2 that matches the above pytorch version\n", "# See https://detectron2.readthedocs.io/tutorials/install.html for instructions\n", "!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Install kMaX-DeepLab" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# clone and install kMaX-DeepLab\n", "!git clone https://github.com/yucornetto/kmaxdeeplab_detectron2.git\n", "%cd kmaxdeeplab_detectron2\n", "!pip install -U opencv-python\n", "!pip install git+https://github.com/cocodataset/panopticapi.git\n", "!pip install -r requirements.txt" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# You may need to restart your runtime prior to this, to let your installation take effect\n", "%cd /content/kmaxdeeplab_detectron2\n", "# Some basic setup:\n", "# Setup detectron2 logger\n", "import detectron2\n", "from detectron2.utils.logger import setup_logger\n", "setup_logger()\n", "setup_logger(name=\"kmax_deeplab\")\n", "\n", "# import some common libraries\n", "import numpy as np\n", "import cv2\n", "import torch\n", "from google.colab.patches import cv2_imshow\n", "\n", "# import some common detectron2 utilities\n", "from detectron2 import model_zoo\n", "from detectron2.engine import DefaultPredictor\n", "from detectron2.config import get_cfg\n", "from detectron2.utils.visualizer import Visualizer, ColorMode\n", "from detectron2.data import MetadataCatalog\n", "from detectron2.projects.deeplab import add_deeplab_config\n", "coco_metadata = MetadataCatalog.get(\"coco_2017_val_panoptic\")\n", "\n", "# import Mask2Former project\n", "from kmax_deeplab import add_kmax_deeplab_config" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "# Run a pre-trained Mask2Former model\n", "We first download an image from the COCO dataset:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!wget http://images.cocodataset.org/val2017/000000005477.jpg -q -O input.jpg\n", "im = cv2.imread(\"./input.jpg\")\n", "cv2_imshow(im)" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Then, we create a detectron2 config and a detectron2 `DefaultPredictor` to run inference on this image." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cfg = get_cfg()\n", "add_deeplab_config(cfg)\n", "add_kmax_deeplab_config(cfg)\n", "cfg.merge_from_file(\"configs/coco/panoptic-segmentation/kmax_convnext_large.yaml\")\n", "cfg.MODEL.WEIGHTS = 'https://drive.google.com/uc?id=1b6rEnKw4PNTdqSdWpmb0P9dsvN0pkOiN&export=download'\n", "cfg.MODEL.KMAX_DEEPLAB.TEST.SEMANTIC_ON = True\n", "cfg.MODEL.KMAX_DEEPLAB.TEST.INSTANCE_ON = True\n", "cfg.MODEL.KMAX_DEEPLAB.TEST.PANOPTIC_ON = True\n", "predictor = DefaultPredictor(cfg)\n", "outputs = predictor(im)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Show panoptic/instance/semantic predictions: \n", "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n", "panoptic_result = v.draw_panoptic_seg(outputs[\"panoptic_seg\"][0].to(\"cpu\"), outputs[\"panoptic_seg\"][1]).get_image()\n", "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n", "instance_result = v.draw_instance_predictions(outputs[\"instances\"].to(\"cpu\")).get_image()\n", "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n", "semantic_result = v.draw_sem_seg(outputs[\"sem_seg\"].argmax(0).to(\"cpu\")).get_image()\n", "print(\"Panoptic segmentation (top), instance segmentation (middle), semantic segmentation (bottom)\")\n", "cv2_imshow(np.concatenate((panoptic_result, instance_result, semantic_result), axis=0)[:, :, ::-1])" ] }, { "attachments": {}, "cell_type": "markdown", "metadata": {}, "source": [ "Let's try an image not from COCO as well:" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Download a sample image and display. Replace path here to try your own images!\n", "!wget https://web.eecs.umich.edu/~fouhey/fun/desk/desk.jpg\n", "im = cv2.imread(\"./desk.jpg\")\n", "cv2_imshow(im)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "outputs = predictor(im)\n", "# Show panoptic/instance/semantic predictions: \n", "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n", "panoptic_result = v.draw_panoptic_seg(outputs[\"panoptic_seg\"][0].to(\"cpu\"), outputs[\"panoptic_seg\"][1]).get_image()\n", "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n", "instance_result = v.draw_instance_predictions(outputs[\"instances\"].to(\"cpu\")).get_image()\n", "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n", "semantic_result = v.draw_sem_seg(outputs[\"sem_seg\"].argmax(0).to(\"cpu\")).get_image()\n", "print(\"Panoptic segmentation (top), instance segmentation (middle), semantic segmentation (bottom)\")\n", "cv2_imshow(np.concatenate((panoptic_result, instance_result, semantic_result), axis=0)[:, :, ::-1])" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.9.6 (default, Oct 18 2022, 12:41:40) \n[Clang 14.0.0 (clang-1400.0.29.202)]" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6" } } }, "nbformat": 4, "nbformat_minor": 2 }