{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# kMaX-DeepLab Demo\n",
    "This notebook is modified by Qihang Yu, with reference from [Mask2Former's script](https://colab.research.google.com/drive/1uIWE5KbGFSjrxey2aRd5pWkKNY1_SaNq)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Install detectron2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install detectron2\n",
    "import torch\n",
    "TORCH_VERSION = \".\".join(torch.__version__.split(\".\")[:2])\n",
    "CUDA_VERSION = torch.__version__.split(\"+\")[-1]\n",
    "print(\"torch: \", TORCH_VERSION, \"; cuda: \", CUDA_VERSION)\n",
    "# Install detectron2 that matches the above pytorch version\n",
    "# See https://detectron2.readthedocs.io/tutorials/install.html for instructions\n",
    "!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Install kMaX-DeepLab"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# clone and install kMaX-DeepLab\n",
    "!git clone https://github.com/yucornetto/kmaxdeeplab_detectron2.git\n",
    "%cd kmaxdeeplab_detectron2\n",
    "!pip install -U opencv-python\n",
    "!pip install git+https://github.com/cocodataset/panopticapi.git\n",
    "!pip install -r requirements.txt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# You may need to restart your runtime prior to this, to let your installation take effect\n",
    "%cd /content/kmaxdeeplab_detectron2\n",
    "# Some basic setup:\n",
    "# Setup detectron2 logger\n",
    "import detectron2\n",
    "from detectron2.utils.logger import setup_logger\n",
    "setup_logger()\n",
    "setup_logger(name=\"kmax_deeplab\")\n",
    "\n",
    "# import some common libraries\n",
    "import numpy as np\n",
    "import cv2\n",
    "import torch\n",
    "from google.colab.patches import cv2_imshow\n",
    "\n",
    "# import some common detectron2 utilities\n",
    "from detectron2 import model_zoo\n",
    "from detectron2.engine import DefaultPredictor\n",
    "from detectron2.config import get_cfg\n",
    "from detectron2.utils.visualizer import Visualizer, ColorMode\n",
    "from detectron2.data import MetadataCatalog\n",
    "from detectron2.projects.deeplab import add_deeplab_config\n",
    "coco_metadata = MetadataCatalog.get(\"coco_2017_val_panoptic\")\n",
    "\n",
    "# import Mask2Former project\n",
    "from kmax_deeplab import add_kmax_deeplab_config"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Run a pre-trained Mask2Former model\n",
    "We first download an image from the COCO dataset:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!wget http://images.cocodataset.org/val2017/000000005477.jpg -q -O input.jpg\n",
    "im = cv2.imread(\"./input.jpg\")\n",
    "cv2_imshow(im)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Then, we create a detectron2 config and a detectron2 `DefaultPredictor` to run inference on this image."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cfg = get_cfg()\n",
    "add_deeplab_config(cfg)\n",
    "add_kmax_deeplab_config(cfg)\n",
    "cfg.merge_from_file(\"configs/coco/panoptic-segmentation/kmax_convnext_large.yaml\")\n",
    "cfg.MODEL.WEIGHTS = 'https://drive.google.com/uc?id=1b6rEnKw4PNTdqSdWpmb0P9dsvN0pkOiN&export=download'\n",
    "cfg.MODEL.KMAX_DEEPLAB.TEST.SEMANTIC_ON = True\n",
    "cfg.MODEL.KMAX_DEEPLAB.TEST.INSTANCE_ON = True\n",
    "cfg.MODEL.KMAX_DEEPLAB.TEST.PANOPTIC_ON = True\n",
    "predictor = DefaultPredictor(cfg)\n",
    "outputs = predictor(im)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Show panoptic/instance/semantic predictions: \n",
    "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n",
    "panoptic_result = v.draw_panoptic_seg(outputs[\"panoptic_seg\"][0].to(\"cpu\"), outputs[\"panoptic_seg\"][1]).get_image()\n",
    "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n",
    "instance_result = v.draw_instance_predictions(outputs[\"instances\"].to(\"cpu\")).get_image()\n",
    "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n",
    "semantic_result = v.draw_sem_seg(outputs[\"sem_seg\"].argmax(0).to(\"cpu\")).get_image()\n",
    "print(\"Panoptic segmentation (top), instance segmentation (middle), semantic segmentation (bottom)\")\n",
    "cv2_imshow(np.concatenate((panoptic_result, instance_result, semantic_result), axis=0)[:, :, ::-1])"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's try an image not from COCO as well:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Download a sample image and display. Replace path here to try your own images!\n",
    "!wget https://web.eecs.umich.edu/~fouhey/fun/desk/desk.jpg\n",
    "im = cv2.imread(\"./desk.jpg\")\n",
    "cv2_imshow(im)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "outputs = predictor(im)\n",
    "# Show panoptic/instance/semantic predictions: \n",
    "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n",
    "panoptic_result = v.draw_panoptic_seg(outputs[\"panoptic_seg\"][0].to(\"cpu\"), outputs[\"panoptic_seg\"][1]).get_image()\n",
    "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n",
    "instance_result = v.draw_instance_predictions(outputs[\"instances\"].to(\"cpu\")).get_image()\n",
    "v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)\n",
    "semantic_result = v.draw_sem_seg(outputs[\"sem_seg\"].argmax(0).to(\"cpu\")).get_image()\n",
    "print(\"Panoptic segmentation (top), instance segmentation (middle), semantic segmentation (bottom)\")\n",
    "cv2_imshow(np.concatenate((panoptic_result, instance_result, semantic_result), axis=0)[:, :, ::-1])"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.9.6 (default, Oct 18 2022, 12:41:40) \n[Clang 14.0.0 (clang-1400.0.29.202)]"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}