Spaces:
Runtime error
Runtime error
import itertools | |
import config as config | |
import cv2 | |
import gradio as gr | |
import matplotlib.patches as patches | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import torch | |
import torchvision | |
import utils | |
from loss import YoloLoss | |
from model import YOLOv3 | |
from PIL import Image | |
from torch.utils.data import DataLoader | |
from torchvision import transforms | |
from utils import get_loaders | |
new_state_dict = {} | |
state_dict = torch.load('Yolov3_Padmanabh.pth', map_location=torch.device('cpu')) | |
for key, value in state_dict.items(): | |
new_key = key.replace('model.', '') | |
new_state_dict[new_key] = value | |
model = YOLOv3(in_channels=3, num_classes=config.NUM_CLASSES) | |
model.load_state_dict(new_state_dict, strict=True) | |
model.eval() | |
classes = ("aeroplane", | |
"bicycle", | |
"bird", | |
"boat", | |
"bottle", | |
"bus", | |
"car", | |
"cat", | |
"chair", | |
"cow", | |
"diningtable", | |
"dog", | |
"horse", | |
"motorbike", | |
"person", | |
"pottedplant", | |
"sheep", | |
"sofa", | |
"train", | |
"tvmonitor") | |
import grad_cam_func as gcf | |
from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients | |
from pytorch_grad_cam.utils.image import show_cam_on_image | |
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget | |
def inference(input_img=None, iou_threshold=0.6, conf_threshold=0.5, gc_trans=0.3): | |
if input_img is not None: | |
tranform_img = config.infer_transforms(image=input_img) | |
transform_img = tranform_img['image'].unsqueeze(0) | |
transform_img_visual = config.infer_transforms_visualization(image=input_img)['image'] | |
with torch.no_grad(): | |
outputs = model(transform_img) | |
bboxes = [[] for _ in range(transform_img.shape[0])] # range of Batch size | |
for i in range(3): | |
batch_size, A, S, _, _ = outputs[i].shape | |
anchor = np.array(config.SCALED_ANCHORS[i]) | |
boxes_scale_i = utils.cells_to_bboxes( | |
outputs[i], anchor, S=S, is_preds=True) | |
for idx, (box) in enumerate(boxes_scale_i): | |
bboxes[idx] += box | |
nms_boxes = utils.non_max_suppression(bboxes[0], iou_threshold=iou_threshold, | |
threshold=conf_threshold, box_format="midpoint",) | |
image, boxes = transform_img_visual.permute(1,2,0), nms_boxes | |
"""Plots predicted bounding boxes on the image""" | |
cmap = plt.get_cmap("tab20b") | |
class_labels = config.PASCAL_CLASSES | |
colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))] | |
im = np.array(image) | |
height, width, _ = im.shape | |
# Create figure and axes | |
fig, ax = plt.subplots(1) | |
# Display the image | |
ax.imshow(im) | |
# box[0] is x midpoint, box[2] is width | |
# box[1] is y midpoint, box[3] is height | |
# Create a Rectangle patch | |
for box in boxes: | |
assert len(box) == 6, "box should contain class pred, confidence, x, y, width, height" | |
class_pred = box[0] | |
box = box[2:] | |
upper_left_x = box[0] - box[2] / 2 | |
upper_left_y = box[1] - box[3] / 2 | |
rect = patches.Rectangle( | |
(upper_left_x * width, upper_left_y * height), | |
box[2] * width, | |
box[3] * height, | |
linewidth=2, | |
edgecolor=colors[int(class_pred)], | |
facecolor="none", | |
) | |
# Add the patch to the Axes | |
ax.add_patch(rect) | |
plt.text( | |
upper_left_x * width, | |
upper_left_y * height, | |
s=class_labels[int(class_pred)], | |
color="white", | |
verticalalignment="top", | |
bbox={"color": colors[int(class_pred)], "pad": 0}, | |
) | |
plt.axis('off') | |
fig.canvas.draw() | |
fig_img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8) | |
fig_img = fig_img.reshape(fig.canvas.get_width_height()[::-1] + (3,)) | |
plt.close(fig) | |
outputs_inference_bb = fig_img | |
### GradCAM | |
target_layer = [model.layers[-2]] | |
cam = gcf.BaseCAM(model, target_layer) | |
AnG = ActivationsAndGradients(model, target_layer, None) | |
outputs = AnG(transform_img) | |
bboxes = [[] for _ in range(1)] | |
for i in range(3): | |
batch_size, A, S, _, _ = outputs[i].shape | |
anchor = config.SCALED_ANCHORS[i] | |
boxes_scale_i = utils.cells_to_bboxes( | |
outputs[i], anchor, S=S, is_preds=True | |
) | |
for idx, (box) in enumerate(boxes_scale_i): | |
bboxes[idx] += box | |
nms_boxes = utils.non_max_suppression( | |
bboxes[0], iou_threshold=0.5, threshold=0.4, box_format="midpoint", | |
) | |
target_categories = [box[0] for box in nms_boxes] | |
targets = [ClassifierOutputTarget( | |
category) for category in target_categories] | |
help_ = cam.compute_cam_per_layer(transform_img, targets, False) | |
output_gc = cam.aggregate_multi_layers(help_)[0, :, :] | |
img = cv2.resize(input_img, (416, 416)) | |
img = np.float32(img) / 255 | |
cam_image = show_cam_on_image(img, output_gc, use_rgb=True, image_weight=gc_trans) | |
outputs_inference_gc = cam_image | |
else: | |
outputs_inference_bb = None | |
outputs_inference_gc = None | |
return outputs_inference_bb, outputs_inference_gc | |
title = "PASCAL VOC trained on Yolov3" | |
description = "A simple Gradio interface to infer on Yolov3 model, and get GradCAM results" | |
examples = [['examples/test_'+str(i)+'.jpg', 0.6, 0.5, 0.3] for i in range(10)] | |
demo = gr.Interface(inference, | |
inputs = [gr.Image(label="Input image"), | |
gr.Slider(0, 1, value=0.6, label="IOU Threshold"), | |
gr.Slider(0, 1, value=0.4, label="Threshold"), | |
gr.Slider(0, 1, value=0.5, label="GradCAM Transparency"), | |
], | |
outputs = [ | |
gr.Image(label="Yolov3 Prediction"), | |
gr.Image(label="GradCAM Output"),], | |
title = title, | |
description = description, | |
examples = examples | |
) | |
demo.launch() |