Spaces:
Sleeping
Sleeping
ravi.naik
commited on
Commit
•
3129974
1
Parent(s):
adde10a
Directory name bug fix
Browse files- utils/gradcam.py +21 -52
- utils/utils.py +74 -0
- utils/utils/common.py +0 -185
- utils/utils/data.py +0 -294
- utils/utils/gradcam.py +0 -36
- utils/utils/loss.py +0 -90
- utils/utils/utils.py +0 -668
utils/gradcam.py
CHANGED
@@ -1,67 +1,36 @@
|
|
1 |
import numpy as np
|
2 |
-
from pytorch_grad_cam import
|
3 |
-
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
|
4 |
from pytorch_grad_cam.utils.image import show_cam_on_image
|
5 |
|
6 |
import matplotlib.pyplot as plt
|
7 |
|
8 |
|
9 |
-
def generate_gradcam(model, target_layers, images,
|
10 |
results = []
|
11 |
-
cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True)
|
12 |
|
13 |
-
|
14 |
-
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
)
|
20 |
-
|
21 |
-
# In this example grayscale_cam has only one image in the batch:
|
22 |
grayscale_cam = grayscale_cam[0, :]
|
23 |
-
|
24 |
-
|
|
|
|
|
|
|
|
|
|
|
25 |
)
|
26 |
-
results.append(
|
27 |
return results
|
28 |
|
29 |
|
30 |
-
def visualize_gradcam(
|
31 |
-
fig, axes = plt.subplots(len(misimgs) // 2, 2)
|
32 |
-
fig.tight_layout()
|
33 |
-
for ax, img, tgt, pred in zip(axes.ravel(), misimgs, mistgts, mispreds):
|
34 |
-
ax.imshow(img)
|
35 |
-
ax.set_title(f"{classes[tgt]} | {classes[pred]}")
|
36 |
-
ax.grid(False)
|
37 |
-
ax.set_axis_off()
|
38 |
-
plt.show()
|
39 |
-
|
40 |
-
def plot_gradcam(model, data, classes, target_layers, number_of_samples, inv_normalize=None, targets=None, transparency = 0.60, figsize=(10,10), rows=2, cols=5):
|
41 |
-
|
42 |
fig = plt.figure(figsize=figsize)
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
# Get the activations of the layer for the images
|
50 |
-
grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
|
51 |
-
grayscale_cam = grayscale_cam[0, :]
|
52 |
-
|
53 |
-
# Get back the original image
|
54 |
-
img = input_tensor.squeeze(0).to('cpu')
|
55 |
-
if inv_normalize is not None:
|
56 |
-
img = inv_normalize(img)
|
57 |
-
rgb_img = np.transpose(img, (1, 2, 0))
|
58 |
-
rgb_img = rgb_img.numpy()
|
59 |
-
|
60 |
-
# Mix the activations on the original image
|
61 |
-
visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency)
|
62 |
-
|
63 |
-
# Display the images on the plot
|
64 |
-
plt.imshow(visualization)
|
65 |
-
plt.title(f"Label: {classes[data[i][1].item()]} \n Prediction: {classes[data[i][2].item()]}")
|
66 |
-
plt.xticks([])
|
67 |
-
plt.yticks([])
|
|
|
1 |
import numpy as np
|
2 |
+
from pytorch_grad_cam import EigenCAM
|
|
|
3 |
from pytorch_grad_cam.utils.image import show_cam_on_image
|
4 |
|
5 |
import matplotlib.pyplot as plt
|
6 |
|
7 |
|
8 |
+
def generate_gradcam(model, target_layers, images, use_cuda=True, transparency=0.6):
|
9 |
results = []
|
|
|
10 |
|
11 |
+
targets = None
|
12 |
+
cam = EigenCAM(model, target_layers, use_cuda=use_cuda)
|
13 |
|
14 |
+
for image in images:
|
15 |
+
input_tensor = image.unsqueeze(0)
|
16 |
+
grayscale_cam = cam(input_tensor, targets=targets)
|
|
|
|
|
|
|
17 |
grayscale_cam = grayscale_cam[0, :]
|
18 |
+
|
19 |
+
img = input_tensor.squeeze(0).to("cpu")
|
20 |
+
rgb_img = np.transpose(img, (1, 2, 0))
|
21 |
+
rgb_img = rgb_img.numpy()
|
22 |
+
|
23 |
+
cam_image = show_cam_on_image(
|
24 |
+
rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency
|
25 |
)
|
26 |
+
results.append(cam_image)
|
27 |
return results
|
28 |
|
29 |
|
30 |
+
def visualize_gradcam(images, figsize=(10, 10), rows=2, cols=5):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
fig = plt.figure(figsize=figsize)
|
32 |
+
for i in range(len(images)):
|
33 |
+
plt.subplot(rows, cols, i + 1)
|
34 |
+
plt.imshow(images[i])
|
35 |
+
plt.xticks([])
|
36 |
+
plt.yticks([])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/utils.py
CHANGED
@@ -592,3 +592,77 @@ def clip_boxes(boxes, shape):
|
|
592 |
else: # np.array (faster grouped)
|
593 |
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
|
594 |
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
592 |
else: # np.array (faster grouped)
|
593 |
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
|
594 |
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
|
595 |
+
|
596 |
+
|
597 |
+
def save_result(image, boxes, index):
|
598 |
+
"""Plots predicted bounding boxes on the image"""
|
599 |
+
cmap = plt.get_cmap("tab20b")
|
600 |
+
class_labels = config.PASCAL_CLASSES
|
601 |
+
|
602 |
+
colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
|
603 |
+
im = np.array(image)
|
604 |
+
height, width, _ = im.shape
|
605 |
+
|
606 |
+
# Create figure and axes
|
607 |
+
fig, ax = plt.subplots(1)
|
608 |
+
# Display the image
|
609 |
+
ax.imshow(im)
|
610 |
+
|
611 |
+
# box[0] is x midpoint, box[2] is width
|
612 |
+
# box[1] is y midpoint, box[3] is height
|
613 |
+
|
614 |
+
# Create a Rectangle patch
|
615 |
+
for box in boxes:
|
616 |
+
assert (
|
617 |
+
len(box) == 6
|
618 |
+
), "box should contain class pred, confidence, x, y, width, height"
|
619 |
+
class_pred = box[0]
|
620 |
+
box = box[2:]
|
621 |
+
upper_left_x = box[0] - box[2] / 2
|
622 |
+
upper_left_y = box[1] - box[3] / 2
|
623 |
+
rect = patches.Rectangle(
|
624 |
+
(upper_left_x * width, upper_left_y * height),
|
625 |
+
box[2] * width,
|
626 |
+
box[3] * height,
|
627 |
+
linewidth=2,
|
628 |
+
edgecolor=colors[int(class_pred)],
|
629 |
+
facecolor="none",
|
630 |
+
)
|
631 |
+
# Add the patch to the Axes
|
632 |
+
ax.add_patch(rect)
|
633 |
+
plt.text(
|
634 |
+
upper_left_x * width,
|
635 |
+
upper_left_y * height,
|
636 |
+
s=class_labels[int(class_pred)],
|
637 |
+
color="white",
|
638 |
+
verticalalignment="top",
|
639 |
+
bbox={"color": colors[int(class_pred)], "pad": 0},
|
640 |
+
)
|
641 |
+
ax.grid(False)
|
642 |
+
ax.set_axis_off()
|
643 |
+
|
644 |
+
plt.savefig(f"output/img{index}.png")
|
645 |
+
|
646 |
+
|
647 |
+
def generate_result(model, data, thresh, iou_thresh, anchors):
|
648 |
+
model.eval()
|
649 |
+
x = data
|
650 |
+
# x = x.to("cuda")
|
651 |
+
with torch.no_grad():
|
652 |
+
out = model(x)
|
653 |
+
bboxes = [[] for _ in range(x.shape[0])]
|
654 |
+
for i in range(3):
|
655 |
+
batch_size, A, S, _, _ = out[i].shape
|
656 |
+
anchor = anchors[i]
|
657 |
+
boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
|
658 |
+
for idx, (box) in enumerate(boxes_scale_i):
|
659 |
+
bboxes[idx] += box
|
660 |
+
|
661 |
+
for i in range(batch_size):
|
662 |
+
nms_boxes = non_max_suppression(
|
663 |
+
bboxes[i],
|
664 |
+
iou_threshold=iou_thresh,
|
665 |
+
threshold=thresh,
|
666 |
+
box_format="midpoint",
|
667 |
+
)
|
668 |
+
save_result(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes, i)
|
utils/utils/common.py
DELETED
@@ -1,185 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import random
|
3 |
-
import matplotlib.pyplot as plt
|
4 |
-
|
5 |
-
import torch
|
6 |
-
import torchvision
|
7 |
-
from torchinfo import summary
|
8 |
-
from torch_lr_finder import LRFinder
|
9 |
-
|
10 |
-
|
11 |
-
def find_lr(model, optimizer, criterion, device, trainloader, numiter, startlr, endlr):
|
12 |
-
lr_finder = LRFinder(
|
13 |
-
model=model, optimizer=optimizer, criterion=criterion, device=device
|
14 |
-
)
|
15 |
-
|
16 |
-
lr_finder.range_test(
|
17 |
-
train_loader=trainloader,
|
18 |
-
start_lr=startlr,
|
19 |
-
end_lr=endlr,
|
20 |
-
num_iter=numiter,
|
21 |
-
step_mode="exp",
|
22 |
-
)
|
23 |
-
|
24 |
-
lr_finder.plot()
|
25 |
-
|
26 |
-
lr_finder.reset()
|
27 |
-
|
28 |
-
|
29 |
-
def one_cycle_lr(optimizer, maxlr, steps, epochs):
|
30 |
-
scheduler = torch.optim.lr_scheduler.OneCycleLR(
|
31 |
-
optimizer=optimizer,
|
32 |
-
max_lr=maxlr,
|
33 |
-
steps_per_epoch=steps,
|
34 |
-
epochs=epochs,
|
35 |
-
pct_start=5 / epochs,
|
36 |
-
div_factor=100,
|
37 |
-
three_phase=False,
|
38 |
-
final_div_factor=100,
|
39 |
-
anneal_strategy="linear",
|
40 |
-
)
|
41 |
-
return scheduler
|
42 |
-
|
43 |
-
|
44 |
-
def show_random_images_for_each_class(train_data, num_images_per_class=16):
|
45 |
-
for c, cls in enumerate(train_data.classes):
|
46 |
-
rand_targets = random.sample(
|
47 |
-
[n for n, x in enumerate(train_data.targets) if x == c],
|
48 |
-
k=num_images_per_class,
|
49 |
-
)
|
50 |
-
show_img_grid(np.transpose(train_data.data[rand_targets], axes=(0, 3, 1, 2)))
|
51 |
-
plt.title(cls)
|
52 |
-
|
53 |
-
|
54 |
-
def show_img_grid(data):
|
55 |
-
try:
|
56 |
-
grid_img = torchvision.utils.make_grid(data.cpu().detach())
|
57 |
-
except:
|
58 |
-
data = torch.from_numpy(data)
|
59 |
-
grid_img = torchvision.utils.make_grid(data)
|
60 |
-
|
61 |
-
plt.figure(figsize=(10, 10))
|
62 |
-
plt.imshow(grid_img.permute(1, 2, 0))
|
63 |
-
|
64 |
-
|
65 |
-
def show_random_images(data_loader):
|
66 |
-
data, target = next(iter(data_loader))
|
67 |
-
show_img_grid(data)
|
68 |
-
|
69 |
-
|
70 |
-
def show_model_summary(model, batch_size):
|
71 |
-
summary(
|
72 |
-
model=model,
|
73 |
-
input_size=(batch_size, 3, 32, 32),
|
74 |
-
col_names=["input_size", "output_size", "num_params", "kernel_size"],
|
75 |
-
verbose=1,
|
76 |
-
)
|
77 |
-
|
78 |
-
|
79 |
-
def lossacc_plots(results):
|
80 |
-
plt.plot(results["epoch"], results["trainloss"])
|
81 |
-
plt.plot(results["epoch"], results["testloss"])
|
82 |
-
plt.legend(["Train Loss", "Validation Loss"])
|
83 |
-
plt.xlabel("Epochs")
|
84 |
-
plt.ylabel("Loss")
|
85 |
-
plt.title("Loss vs Epochs")
|
86 |
-
plt.show()
|
87 |
-
|
88 |
-
plt.plot(results["epoch"], results["trainacc"])
|
89 |
-
plt.plot(results["epoch"], results["testacc"])
|
90 |
-
plt.legend(["Train Acc", "Validation Acc"])
|
91 |
-
plt.xlabel("Epochs")
|
92 |
-
plt.ylabel("Accuracy")
|
93 |
-
plt.title("Accuracy vs Epochs")
|
94 |
-
plt.show()
|
95 |
-
|
96 |
-
|
97 |
-
def lr_plots(results, length):
|
98 |
-
plt.plot(range(length), results["lr"])
|
99 |
-
plt.xlabel("Epochs")
|
100 |
-
plt.ylabel("Learning Rate")
|
101 |
-
plt.title("Learning Rate vs Epochs")
|
102 |
-
plt.show()
|
103 |
-
|
104 |
-
|
105 |
-
def get_misclassified(model, testloader, device, mis_count=10):
|
106 |
-
misimgs, mistgts, mispreds = [], [], []
|
107 |
-
with torch.no_grad():
|
108 |
-
for data, target in testloader:
|
109 |
-
data, target = data.to(device), target.to(device)
|
110 |
-
output = model(data)
|
111 |
-
pred = output.argmax(dim=1, keepdim=True)
|
112 |
-
misclassified = torch.argwhere(pred.squeeze() != target).squeeze()
|
113 |
-
for idx in misclassified:
|
114 |
-
if len(misimgs) >= mis_count:
|
115 |
-
break
|
116 |
-
misimgs.append(data[idx])
|
117 |
-
mistgts.append(target[idx])
|
118 |
-
mispreds.append(pred[idx].squeeze())
|
119 |
-
return misimgs, mistgts, mispreds
|
120 |
-
|
121 |
-
|
122 |
-
# def plot_misclassified(misimgs, mistgts, mispreds, classes):
|
123 |
-
# fig, axes = plt.subplots(len(misimgs) // 2, 2)
|
124 |
-
# fig.tight_layout()
|
125 |
-
# for ax, img, tgt, pred in zip(axes.ravel(), misimgs, mistgts, mispreds):
|
126 |
-
# ax.imshow((img / img.max()).permute(1, 2, 0).cpu())
|
127 |
-
# ax.set_title(f"{classes[tgt]} | {classes[pred]}")
|
128 |
-
# ax.grid(False)
|
129 |
-
# ax.set_axis_off()
|
130 |
-
# plt.show()
|
131 |
-
|
132 |
-
def get_misclassified_data(model, device, test_loader, count):
|
133 |
-
"""
|
134 |
-
Function to run the model on test set and return misclassified images
|
135 |
-
:param model: Network Architecture
|
136 |
-
:param device: CPU/GPU
|
137 |
-
:param test_loader: DataLoader for test set
|
138 |
-
"""
|
139 |
-
# Prepare the model for evaluation i.e. drop the dropout layer
|
140 |
-
model.eval()
|
141 |
-
|
142 |
-
# List to store misclassified Images
|
143 |
-
misclassified_data = []
|
144 |
-
|
145 |
-
# Reset the gradients
|
146 |
-
with torch.no_grad():
|
147 |
-
# Extract images, labels in a batch
|
148 |
-
for data, target in test_loader:
|
149 |
-
|
150 |
-
# Migrate the data to the device
|
151 |
-
data, target = data.to(device), target.to(device)
|
152 |
-
|
153 |
-
# Extract single image, label from the batch
|
154 |
-
for image, label in zip(data, target):
|
155 |
-
|
156 |
-
# Add batch dimension to the image
|
157 |
-
image = image.unsqueeze(0)
|
158 |
-
|
159 |
-
# Get the model prediction on the image
|
160 |
-
output = model(image)
|
161 |
-
|
162 |
-
# Convert the output from one-hot encoding to a value
|
163 |
-
pred = output.argmax(dim=1, keepdim=True)
|
164 |
-
|
165 |
-
# If prediction is incorrect, append the data
|
166 |
-
if pred != label:
|
167 |
-
misclassified_data.append((image, label, pred))
|
168 |
-
if len(misclassified_data) >= count:
|
169 |
-
break
|
170 |
-
|
171 |
-
return misclassified_data[:count]
|
172 |
-
|
173 |
-
def plot_misclassified(data, classes, size=(10, 10), rows=2, cols=5, inv_normalize=None):
|
174 |
-
fig = plt.figure(figsize=size)
|
175 |
-
number_of_samples = len(data)
|
176 |
-
for i in range(number_of_samples):
|
177 |
-
plt.subplot(rows, cols, i + 1)
|
178 |
-
img = data[i][0].squeeze().to('cpu')
|
179 |
-
if inv_normalize is not None:
|
180 |
-
img = inv_normalize(img)
|
181 |
-
plt.imshow(np.transpose(img, (1, 2, 0)))
|
182 |
-
plt.title(f"Label: {classes[data[i][1].item()]} \n Prediction: {classes[data[i][2].item()]}")
|
183 |
-
plt.xticks([])
|
184 |
-
plt.yticks([])
|
185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/utils/data.py
DELETED
@@ -1,294 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
|
3 |
-
"""
|
4 |
-
|
5 |
-
import numpy as np
|
6 |
-
import os
|
7 |
-
import pandas as pd
|
8 |
-
import torch
|
9 |
-
import random
|
10 |
-
from PIL import Image, ImageFile
|
11 |
-
|
12 |
-
import lightning as L
|
13 |
-
from torch.utils.data import Dataset, DataLoader
|
14 |
-
import config as config
|
15 |
-
|
16 |
-
from utils.utils import xywhn2xyxy, xyxy2xywhn
|
17 |
-
|
18 |
-
from utils.utils import (
|
19 |
-
cells_to_bboxes,
|
20 |
-
iou_width_height as iou,
|
21 |
-
non_max_suppression as nms,
|
22 |
-
plot_image,
|
23 |
-
)
|
24 |
-
|
25 |
-
|
26 |
-
ImageFile.LOAD_TRUNCATED_IMAGES = True
|
27 |
-
|
28 |
-
|
29 |
-
class YOLODataset(Dataset):
|
30 |
-
def __init__(
|
31 |
-
self,
|
32 |
-
csv_file,
|
33 |
-
img_dir,
|
34 |
-
label_dir,
|
35 |
-
anchors,
|
36 |
-
image_size=416,
|
37 |
-
S=[13, 26, 52],
|
38 |
-
C=20,
|
39 |
-
transform=None,
|
40 |
-
):
|
41 |
-
self.annotations = pd.read_csv(csv_file)
|
42 |
-
self.img_dir = img_dir
|
43 |
-
self.label_dir = label_dir
|
44 |
-
self.image_size = image_size
|
45 |
-
self.mosaic_border = [image_size // 2, image_size // 2]
|
46 |
-
self.transform = transform
|
47 |
-
self.S = S
|
48 |
-
self.anchors = torch.tensor(
|
49 |
-
anchors[0] + anchors[1] + anchors[2]
|
50 |
-
) # for all 3 scales
|
51 |
-
self.num_anchors = self.anchors.shape[0]
|
52 |
-
self.num_anchors_per_scale = self.num_anchors // 3
|
53 |
-
self.C = C
|
54 |
-
self.ignore_iou_thresh = 0.5
|
55 |
-
|
56 |
-
def __len__(self):
|
57 |
-
return len(self.annotations)
|
58 |
-
|
59 |
-
def load_mosaic(self, index):
|
60 |
-
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
|
61 |
-
labels4 = []
|
62 |
-
s = self.image_size
|
63 |
-
yc, xc = (
|
64 |
-
int(random.uniform(x, 2 * s - x)) for x in self.mosaic_border
|
65 |
-
) # mosaic center x, y
|
66 |
-
indices = [index] + random.choices(
|
67 |
-
range(len(self)), k=3
|
68 |
-
) # 3 additional image indices
|
69 |
-
random.shuffle(indices)
|
70 |
-
for i, index in enumerate(indices):
|
71 |
-
# Load image
|
72 |
-
label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
|
73 |
-
bboxes = np.roll(
|
74 |
-
np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
|
75 |
-
).tolist()
|
76 |
-
img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
|
77 |
-
img = np.array(Image.open(img_path).convert("RGB"))
|
78 |
-
|
79 |
-
h, w = img.shape[0], img.shape[1]
|
80 |
-
labels = np.array(bboxes)
|
81 |
-
|
82 |
-
# place img in img4
|
83 |
-
if i == 0: # top left
|
84 |
-
img4 = np.full(
|
85 |
-
(s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8
|
86 |
-
) # base image with 4 tiles
|
87 |
-
x1a, y1a, x2a, y2a = (
|
88 |
-
max(xc - w, 0),
|
89 |
-
max(yc - h, 0),
|
90 |
-
xc,
|
91 |
-
yc,
|
92 |
-
) # xmin, ymin, xmax, ymax (large image)
|
93 |
-
x1b, y1b, x2b, y2b = (
|
94 |
-
w - (x2a - x1a),
|
95 |
-
h - (y2a - y1a),
|
96 |
-
w,
|
97 |
-
h,
|
98 |
-
) # xmin, ymin, xmax, ymax (small image)
|
99 |
-
elif i == 1: # top right
|
100 |
-
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
|
101 |
-
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
|
102 |
-
elif i == 2: # bottom left
|
103 |
-
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
|
104 |
-
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
|
105 |
-
elif i == 3: # bottom right
|
106 |
-
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
|
107 |
-
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
|
108 |
-
|
109 |
-
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
|
110 |
-
padw = x1a - x1b
|
111 |
-
padh = y1a - y1b
|
112 |
-
|
113 |
-
# Labels
|
114 |
-
if labels.size:
|
115 |
-
labels[:, :-1] = xywhn2xyxy(
|
116 |
-
labels[:, :-1], w, h, padw, padh
|
117 |
-
) # normalized xywh to pixel xyxy format
|
118 |
-
labels4.append(labels)
|
119 |
-
|
120 |
-
# Concat/clip labels
|
121 |
-
labels4 = np.concatenate(labels4, 0)
|
122 |
-
for x in (labels4[:, :-1],):
|
123 |
-
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
|
124 |
-
# img4, labels4 = replicate(img4, labels4) # replicate
|
125 |
-
labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s)
|
126 |
-
labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1)
|
127 |
-
labels4 = labels4[labels4[:, 2] > 0]
|
128 |
-
labels4 = labels4[labels4[:, 3] > 0]
|
129 |
-
return img4, labels4
|
130 |
-
|
131 |
-
def __getitem__(self, index):
|
132 |
-
if random.random() >= config.P_MOSAIC:
|
133 |
-
image, bboxes = self.load_mosaic(index)
|
134 |
-
else:
|
135 |
-
label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
|
136 |
-
bboxes = np.roll(
|
137 |
-
np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
|
138 |
-
).tolist()
|
139 |
-
img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
|
140 |
-
image = np.array(Image.open(img_path).convert("RGB"))
|
141 |
-
|
142 |
-
if self.transform:
|
143 |
-
augmentations = self.transform(image=image, bboxes=bboxes)
|
144 |
-
image = augmentations["image"]
|
145 |
-
bboxes = augmentations["bboxes"]
|
146 |
-
|
147 |
-
# Below assumes 3 scale predictions (as paper) and same num of anchors per scale
|
148 |
-
targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
|
149 |
-
for box in bboxes:
|
150 |
-
iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
|
151 |
-
anchor_indices = iou_anchors.argsort(descending=True, dim=0)
|
152 |
-
x, y, width, height, class_label = box
|
153 |
-
has_anchor = [False] * 3 # each scale should have one anchor
|
154 |
-
for anchor_idx in anchor_indices:
|
155 |
-
scale_idx = anchor_idx // self.num_anchors_per_scale
|
156 |
-
anchor_on_scale = anchor_idx % self.num_anchors_per_scale
|
157 |
-
S = self.S[scale_idx]
|
158 |
-
i, j = int(S * y), int(S * x) # which cell
|
159 |
-
anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
|
160 |
-
if not anchor_taken and not has_anchor[scale_idx]:
|
161 |
-
targets[scale_idx][anchor_on_scale, i, j, 0] = 1
|
162 |
-
x_cell, y_cell = S * x - j, S * y - i # both between [0,1]
|
163 |
-
width_cell, height_cell = (
|
164 |
-
width * S,
|
165 |
-
height * S,
|
166 |
-
) # can be greater than 1 since it's relative to cell
|
167 |
-
box_coordinates = torch.tensor(
|
168 |
-
[x_cell, y_cell, width_cell, height_cell]
|
169 |
-
)
|
170 |
-
targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
|
171 |
-
targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
|
172 |
-
has_anchor[scale_idx] = True
|
173 |
-
|
174 |
-
elif (
|
175 |
-
not anchor_taken
|
176 |
-
and iou_anchors[anchor_idx] > self.ignore_iou_thresh
|
177 |
-
):
|
178 |
-
targets[scale_idx][
|
179 |
-
anchor_on_scale, i, j, 0
|
180 |
-
] = -1 # ignore prediction
|
181 |
-
|
182 |
-
return image, tuple(targets)
|
183 |
-
|
184 |
-
|
185 |
-
def test():
|
186 |
-
anchors = config.ANCHORS
|
187 |
-
|
188 |
-
transform = config.test_transforms
|
189 |
-
|
190 |
-
dataset = YOLODataset(
|
191 |
-
"COCO/train.csv",
|
192 |
-
"COCO/images/images/",
|
193 |
-
"COCO/labels/labels_new/",
|
194 |
-
S=[13, 26, 52],
|
195 |
-
anchors=anchors,
|
196 |
-
transform=transform,
|
197 |
-
)
|
198 |
-
S = [13, 26, 52]
|
199 |
-
scaled_anchors = torch.tensor(anchors) / (
|
200 |
-
1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
|
201 |
-
)
|
202 |
-
loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
|
203 |
-
for x, y in loader:
|
204 |
-
boxes = []
|
205 |
-
|
206 |
-
for i in range(y[0].shape[1]):
|
207 |
-
anchor = scaled_anchors[i]
|
208 |
-
print(anchor.shape)
|
209 |
-
print(y[i].shape)
|
210 |
-
boxes += cells_to_bboxes(
|
211 |
-
y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
|
212 |
-
)[0]
|
213 |
-
boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
|
214 |
-
print(boxes)
|
215 |
-
plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
|
216 |
-
|
217 |
-
|
218 |
-
class PascalDataModule(L.LightningDataModule):
|
219 |
-
def __init__(
|
220 |
-
self,
|
221 |
-
train_csv_path=None,
|
222 |
-
test_csv_path=None,
|
223 |
-
batch_size=512,
|
224 |
-
shuffle=True,
|
225 |
-
num_workers=4,
|
226 |
-
) -> None:
|
227 |
-
super().__init__()
|
228 |
-
self.train_csv_path = train_csv_path
|
229 |
-
self.test_csv_path = test_csv_path
|
230 |
-
self.batch_size = batch_size
|
231 |
-
self.shuffle = shuffle
|
232 |
-
self.num_workers = num_workers
|
233 |
-
self.IMAGE_SIZE = config.IMAGE_SIZE
|
234 |
-
|
235 |
-
def prepare_data(self) -> None:
|
236 |
-
pass
|
237 |
-
|
238 |
-
def setup(self, stage=None):
|
239 |
-
self.train_dataset = YOLODataset(
|
240 |
-
self.train_csv_path,
|
241 |
-
transform=config.train_transforms,
|
242 |
-
S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
|
243 |
-
img_dir=config.IMG_DIR,
|
244 |
-
label_dir=config.LABEL_DIR,
|
245 |
-
anchors=config.ANCHORS,
|
246 |
-
)
|
247 |
-
|
248 |
-
self.val_dataset = YOLODataset(
|
249 |
-
self.test_csv_path,
|
250 |
-
transform=config.test_transforms,
|
251 |
-
S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
|
252 |
-
img_dir=config.IMG_DIR,
|
253 |
-
label_dir=config.LABEL_DIR,
|
254 |
-
anchors=config.ANCHORS,
|
255 |
-
)
|
256 |
-
|
257 |
-
self.test_dataset = YOLODataset(
|
258 |
-
self.test_csv_path,
|
259 |
-
transform=config.test_transforms,
|
260 |
-
S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
|
261 |
-
img_dir=config.IMG_DIR,
|
262 |
-
label_dir=config.LABEL_DIR,
|
263 |
-
anchors=config.ANCHORS,
|
264 |
-
)
|
265 |
-
|
266 |
-
def train_dataloader(self):
|
267 |
-
return DataLoader(
|
268 |
-
dataset=self.train_dataset,
|
269 |
-
batch_size=config.BATCH_SIZE,
|
270 |
-
num_workers=config.NUM_WORKERS,
|
271 |
-
pin_memory=config.PIN_MEMORY,
|
272 |
-
shuffle=True,
|
273 |
-
drop_last=False,
|
274 |
-
)
|
275 |
-
|
276 |
-
def val_dataloader(self):
|
277 |
-
return DataLoader(
|
278 |
-
dataset=self.val_dataset,
|
279 |
-
batch_size=config.BATCH_SIZE,
|
280 |
-
num_workers=config.NUM_WORKERS,
|
281 |
-
pin_memory=config.PIN_MEMORY,
|
282 |
-
shuffle=False,
|
283 |
-
drop_last=False,
|
284 |
-
)
|
285 |
-
|
286 |
-
def test_dataloader(self):
|
287 |
-
return DataLoader(
|
288 |
-
dataset=self.test_dataset,
|
289 |
-
batch_size=config.BATCH_SIZE,
|
290 |
-
num_workers=config.NUM_WORKERS,
|
291 |
-
pin_memory=config.PIN_MEMORY,
|
292 |
-
shuffle=False,
|
293 |
-
drop_last=False,
|
294 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/utils/gradcam.py
DELETED
@@ -1,36 +0,0 @@
|
|
1 |
-
import numpy as np
|
2 |
-
from pytorch_grad_cam import EigenCAM
|
3 |
-
from pytorch_grad_cam.utils.image import show_cam_on_image
|
4 |
-
|
5 |
-
import matplotlib.pyplot as plt
|
6 |
-
|
7 |
-
|
8 |
-
def generate_gradcam(model, target_layers, images, use_cuda=True, transparency=0.6):
|
9 |
-
results = []
|
10 |
-
|
11 |
-
targets = None
|
12 |
-
cam = EigenCAM(model, target_layers, use_cuda=use_cuda)
|
13 |
-
|
14 |
-
for image in images:
|
15 |
-
input_tensor = image.unsqueeze(0)
|
16 |
-
grayscale_cam = cam(input_tensor, targets=targets)
|
17 |
-
grayscale_cam = grayscale_cam[0, :]
|
18 |
-
|
19 |
-
img = input_tensor.squeeze(0).to("cpu")
|
20 |
-
rgb_img = np.transpose(img, (1, 2, 0))
|
21 |
-
rgb_img = rgb_img.numpy()
|
22 |
-
|
23 |
-
cam_image = show_cam_on_image(
|
24 |
-
rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency
|
25 |
-
)
|
26 |
-
results.append(cam_image)
|
27 |
-
return results
|
28 |
-
|
29 |
-
|
30 |
-
def visualize_gradcam(images, figsize=(10, 10), rows=2, cols=5):
|
31 |
-
fig = plt.figure(figsize=figsize)
|
32 |
-
for i in range(len(images)):
|
33 |
-
plt.subplot(rows, cols, i + 1)
|
34 |
-
plt.imshow(images[i])
|
35 |
-
plt.xticks([])
|
36 |
-
plt.yticks([])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/utils/loss.py
DELETED
@@ -1,90 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Implementation of Yolo Loss Function similar to the one in Yolov3 paper,
|
3 |
-
the difference from what I can tell is I use CrossEntropy for the classes
|
4 |
-
instead of BinaryCrossEntropy.
|
5 |
-
"""
|
6 |
-
import random
|
7 |
-
import torch
|
8 |
-
import torch.nn as nn
|
9 |
-
|
10 |
-
from utils.utils import intersection_over_union
|
11 |
-
|
12 |
-
|
13 |
-
class YoloLoss(nn.Module):
|
14 |
-
def __init__(self):
|
15 |
-
super().__init__()
|
16 |
-
self.mse = nn.MSELoss()
|
17 |
-
self.bce = nn.BCEWithLogitsLoss()
|
18 |
-
self.entropy = nn.CrossEntropyLoss()
|
19 |
-
self.sigmoid = nn.Sigmoid()
|
20 |
-
|
21 |
-
# Constants signifying how much to pay for each respective part of the loss
|
22 |
-
self.lambda_class = 1
|
23 |
-
self.lambda_noobj = 10
|
24 |
-
self.lambda_obj = 1
|
25 |
-
self.lambda_box = 10
|
26 |
-
|
27 |
-
def forward(self, predictions, target, anchors):
|
28 |
-
# Check where obj and noobj (we ignore if target == -1)
|
29 |
-
obj = target[..., 0] == 1 # in paper this is Iobj_i
|
30 |
-
noobj = target[..., 0] == 0 # in paper this is Inoobj_i
|
31 |
-
|
32 |
-
# ======================= #
|
33 |
-
# FOR NO OBJECT LOSS #
|
34 |
-
# ======================= #
|
35 |
-
|
36 |
-
no_object_loss = self.bce(
|
37 |
-
(predictions[..., 0:1][noobj]),
|
38 |
-
(target[..., 0:1][noobj]),
|
39 |
-
)
|
40 |
-
|
41 |
-
# ==================== #
|
42 |
-
# FOR OBJECT LOSS #
|
43 |
-
# ==================== #
|
44 |
-
anchors = anchors.reshape(1, 3, 1, 1, 2)
|
45 |
-
|
46 |
-
box_preds = torch.cat(
|
47 |
-
[
|
48 |
-
self.sigmoid(predictions[..., 1:3]),
|
49 |
-
torch.exp(predictions[..., 3:5]) * anchors,
|
50 |
-
],
|
51 |
-
dim=-1,
|
52 |
-
)
|
53 |
-
ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj]).detach()
|
54 |
-
# ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj])
|
55 |
-
object_loss = self.mse(
|
56 |
-
self.sigmoid(predictions[..., 0:1][obj]), ious * target[..., 0:1][obj]
|
57 |
-
)
|
58 |
-
|
59 |
-
# ======================== #
|
60 |
-
# FOR BOX COORDINATES #
|
61 |
-
# ======================== #
|
62 |
-
|
63 |
-
predictions[..., 1:3] = self.sigmoid(predictions[..., 1:3]) # x,y coordinates
|
64 |
-
target[..., 3:5] = torch.log(
|
65 |
-
(1e-16 + target[..., 3:5] / anchors)
|
66 |
-
) # width, height coordinates
|
67 |
-
box_loss = self.mse(predictions[..., 1:5][obj], target[..., 1:5][obj])
|
68 |
-
|
69 |
-
# ================== #
|
70 |
-
# FOR CLASS LOSS #
|
71 |
-
# ================== #
|
72 |
-
|
73 |
-
class_loss = self.entropy(
|
74 |
-
(predictions[..., 5:][obj]),
|
75 |
-
(target[..., 5][obj].long()),
|
76 |
-
)
|
77 |
-
|
78 |
-
# print("__________________________________")
|
79 |
-
# print(self.lambda_box * box_loss)
|
80 |
-
# print(self.lambda_obj * object_loss)
|
81 |
-
# print(self.lambda_noobj * no_object_loss)
|
82 |
-
# print(self.lambda_class * class_loss)
|
83 |
-
# print("\n")
|
84 |
-
|
85 |
-
return (
|
86 |
-
self.lambda_box * box_loss
|
87 |
-
+ self.lambda_obj * object_loss
|
88 |
-
+ self.lambda_noobj * no_object_loss
|
89 |
-
+ self.lambda_class * class_loss
|
90 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/utils/utils.py
DELETED
@@ -1,668 +0,0 @@
|
|
1 |
-
import config
|
2 |
-
import matplotlib.pyplot as plt
|
3 |
-
import matplotlib.patches as patches
|
4 |
-
import numpy as np
|
5 |
-
import os
|
6 |
-
import random
|
7 |
-
import torch
|
8 |
-
|
9 |
-
from collections import Counter
|
10 |
-
from torch.utils.data import DataLoader
|
11 |
-
from tqdm import tqdm
|
12 |
-
|
13 |
-
|
14 |
-
def iou_width_height(boxes1, boxes2):
|
15 |
-
"""
|
16 |
-
Parameters:
|
17 |
-
boxes1 (tensor): width and height of the first bounding boxes
|
18 |
-
boxes2 (tensor): width and height of the second bounding boxes
|
19 |
-
Returns:
|
20 |
-
tensor: Intersection over union of the corresponding boxes
|
21 |
-
"""
|
22 |
-
intersection = torch.min(boxes1[..., 0], boxes2[..., 0]) * torch.min(
|
23 |
-
boxes1[..., 1], boxes2[..., 1]
|
24 |
-
)
|
25 |
-
union = (
|
26 |
-
boxes1[..., 0] * boxes1[..., 1] + boxes2[..., 0] * boxes2[..., 1] - intersection
|
27 |
-
)
|
28 |
-
return intersection / union
|
29 |
-
|
30 |
-
|
31 |
-
def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
|
32 |
-
"""
|
33 |
-
Video explanation of this function:
|
34 |
-
https://youtu.be/XXYG5ZWtjj0
|
35 |
-
|
36 |
-
This function calculates intersection over union (iou) given pred boxes
|
37 |
-
and target boxes.
|
38 |
-
|
39 |
-
Parameters:
|
40 |
-
boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
|
41 |
-
boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
|
42 |
-
box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
|
43 |
-
|
44 |
-
Returns:
|
45 |
-
tensor: Intersection over union for all examples
|
46 |
-
"""
|
47 |
-
|
48 |
-
if box_format == "midpoint":
|
49 |
-
box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
|
50 |
-
box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
|
51 |
-
box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
|
52 |
-
box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
|
53 |
-
box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
|
54 |
-
box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
|
55 |
-
box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
|
56 |
-
box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
|
57 |
-
|
58 |
-
if box_format == "corners":
|
59 |
-
box1_x1 = boxes_preds[..., 0:1]
|
60 |
-
box1_y1 = boxes_preds[..., 1:2]
|
61 |
-
box1_x2 = boxes_preds[..., 2:3]
|
62 |
-
box1_y2 = boxes_preds[..., 3:4]
|
63 |
-
box2_x1 = boxes_labels[..., 0:1]
|
64 |
-
box2_y1 = boxes_labels[..., 1:2]
|
65 |
-
box2_x2 = boxes_labels[..., 2:3]
|
66 |
-
box2_y2 = boxes_labels[..., 3:4]
|
67 |
-
|
68 |
-
x1 = torch.max(box1_x1, box2_x1)
|
69 |
-
y1 = torch.max(box1_y1, box2_y1)
|
70 |
-
x2 = torch.min(box1_x2, box2_x2)
|
71 |
-
y2 = torch.min(box1_y2, box2_y2)
|
72 |
-
|
73 |
-
intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
|
74 |
-
box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
|
75 |
-
box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
|
76 |
-
|
77 |
-
return intersection / (box1_area + box2_area - intersection + 1e-6)
|
78 |
-
|
79 |
-
|
80 |
-
def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
|
81 |
-
"""
|
82 |
-
Video explanation of this function:
|
83 |
-
https://youtu.be/YDkjWEN8jNA
|
84 |
-
|
85 |
-
Does Non Max Suppression given bboxes
|
86 |
-
|
87 |
-
Parameters:
|
88 |
-
bboxes (list): list of lists containing all bboxes with each bboxes
|
89 |
-
specified as [class_pred, prob_score, x1, y1, x2, y2]
|
90 |
-
iou_threshold (float): threshold where predicted bboxes is correct
|
91 |
-
threshold (float): threshold to remove predicted bboxes (independent of IoU)
|
92 |
-
box_format (str): "midpoint" or "corners" used to specify bboxes
|
93 |
-
|
94 |
-
Returns:
|
95 |
-
list: bboxes after performing NMS given a specific IoU threshold
|
96 |
-
"""
|
97 |
-
|
98 |
-
assert type(bboxes) == list
|
99 |
-
|
100 |
-
bboxes = [box for box in bboxes if box[1] > threshold]
|
101 |
-
bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
|
102 |
-
bboxes_after_nms = []
|
103 |
-
|
104 |
-
while bboxes:
|
105 |
-
chosen_box = bboxes.pop(0)
|
106 |
-
|
107 |
-
bboxes = [
|
108 |
-
box
|
109 |
-
for box in bboxes
|
110 |
-
if box[0] != chosen_box[0]
|
111 |
-
or intersection_over_union(
|
112 |
-
torch.tensor(chosen_box[2:]),
|
113 |
-
torch.tensor(box[2:]),
|
114 |
-
box_format=box_format,
|
115 |
-
)
|
116 |
-
< iou_threshold
|
117 |
-
]
|
118 |
-
|
119 |
-
bboxes_after_nms.append(chosen_box)
|
120 |
-
|
121 |
-
return bboxes_after_nms
|
122 |
-
|
123 |
-
|
124 |
-
def mean_average_precision(
|
125 |
-
pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
|
126 |
-
):
|
127 |
-
"""
|
128 |
-
Video explanation of this function:
|
129 |
-
https://youtu.be/FppOzcDvaDI
|
130 |
-
|
131 |
-
This function calculates mean average precision (mAP)
|
132 |
-
|
133 |
-
Parameters:
|
134 |
-
pred_boxes (list): list of lists containing all bboxes with each bboxes
|
135 |
-
specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
|
136 |
-
true_boxes (list): Similar as pred_boxes except all the correct ones
|
137 |
-
iou_threshold (float): threshold where predicted bboxes is correct
|
138 |
-
box_format (str): "midpoint" or "corners" used to specify bboxes
|
139 |
-
num_classes (int): number of classes
|
140 |
-
|
141 |
-
Returns:
|
142 |
-
float: mAP value across all classes given a specific IoU threshold
|
143 |
-
"""
|
144 |
-
|
145 |
-
# list storing all AP for respective classes
|
146 |
-
average_precisions = []
|
147 |
-
|
148 |
-
# used for numerical stability later on
|
149 |
-
epsilon = 1e-6
|
150 |
-
|
151 |
-
for c in range(num_classes):
|
152 |
-
detections = []
|
153 |
-
ground_truths = []
|
154 |
-
|
155 |
-
# Go through all predictions and targets,
|
156 |
-
# and only add the ones that belong to the
|
157 |
-
# current class c
|
158 |
-
for detection in pred_boxes:
|
159 |
-
if detection[1] == c:
|
160 |
-
detections.append(detection)
|
161 |
-
|
162 |
-
for true_box in true_boxes:
|
163 |
-
if true_box[1] == c:
|
164 |
-
ground_truths.append(true_box)
|
165 |
-
|
166 |
-
# find the amount of bboxes for each training example
|
167 |
-
# Counter here finds how many ground truth bboxes we get
|
168 |
-
# for each training example, so let's say img 0 has 3,
|
169 |
-
# img 1 has 5 then we will obtain a dictionary with:
|
170 |
-
# amount_bboxes = {0:3, 1:5}
|
171 |
-
amount_bboxes = Counter([gt[0] for gt in ground_truths])
|
172 |
-
|
173 |
-
# We then go through each key, val in this dictionary
|
174 |
-
# and convert to the following (w.r.t same example):
|
175 |
-
# ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
|
176 |
-
for key, val in amount_bboxes.items():
|
177 |
-
amount_bboxes[key] = torch.zeros(val)
|
178 |
-
|
179 |
-
# sort by box probabilities which is index 2
|
180 |
-
detections.sort(key=lambda x: x[2], reverse=True)
|
181 |
-
TP = torch.zeros((len(detections)))
|
182 |
-
FP = torch.zeros((len(detections)))
|
183 |
-
total_true_bboxes = len(ground_truths)
|
184 |
-
|
185 |
-
# If none exists for this class then we can safely skip
|
186 |
-
if total_true_bboxes == 0:
|
187 |
-
continue
|
188 |
-
|
189 |
-
for detection_idx, detection in enumerate(detections):
|
190 |
-
# Only take out the ground_truths that have the same
|
191 |
-
# training idx as detection
|
192 |
-
ground_truth_img = [
|
193 |
-
bbox for bbox in ground_truths if bbox[0] == detection[0]
|
194 |
-
]
|
195 |
-
|
196 |
-
num_gts = len(ground_truth_img)
|
197 |
-
best_iou = 0
|
198 |
-
|
199 |
-
for idx, gt in enumerate(ground_truth_img):
|
200 |
-
iou = intersection_over_union(
|
201 |
-
torch.tensor(detection[3:]),
|
202 |
-
torch.tensor(gt[3:]),
|
203 |
-
box_format=box_format,
|
204 |
-
)
|
205 |
-
|
206 |
-
if iou > best_iou:
|
207 |
-
best_iou = iou
|
208 |
-
best_gt_idx = idx
|
209 |
-
|
210 |
-
if best_iou > iou_threshold:
|
211 |
-
# only detect ground truth detection once
|
212 |
-
if amount_bboxes[detection[0]][best_gt_idx] == 0:
|
213 |
-
# true positive and add this bounding box to seen
|
214 |
-
TP[detection_idx] = 1
|
215 |
-
amount_bboxes[detection[0]][best_gt_idx] = 1
|
216 |
-
else:
|
217 |
-
FP[detection_idx] = 1
|
218 |
-
|
219 |
-
# if IOU is lower then the detection is a false positive
|
220 |
-
else:
|
221 |
-
FP[detection_idx] = 1
|
222 |
-
|
223 |
-
TP_cumsum = torch.cumsum(TP, dim=0)
|
224 |
-
FP_cumsum = torch.cumsum(FP, dim=0)
|
225 |
-
recalls = TP_cumsum / (total_true_bboxes + epsilon)
|
226 |
-
precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
|
227 |
-
precisions = torch.cat((torch.tensor([1]), precisions))
|
228 |
-
recalls = torch.cat((torch.tensor([0]), recalls))
|
229 |
-
# torch.trapz for numerical integration
|
230 |
-
average_precisions.append(torch.trapz(precisions, recalls))
|
231 |
-
|
232 |
-
return sum(average_precisions) / len(average_precisions)
|
233 |
-
|
234 |
-
|
235 |
-
def plot_image(image, boxes):
|
236 |
-
"""Plots predicted bounding boxes on the image"""
|
237 |
-
cmap = plt.get_cmap("tab20b")
|
238 |
-
class_labels = (
|
239 |
-
config.COCO_LABELS if config.DATASET == "COCO" else config.PASCAL_CLASSES
|
240 |
-
)
|
241 |
-
colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
|
242 |
-
im = np.array(image)
|
243 |
-
height, width, _ = im.shape
|
244 |
-
|
245 |
-
# Create figure and axes
|
246 |
-
fig, ax = plt.subplots(1)
|
247 |
-
# Display the image
|
248 |
-
ax.imshow(im)
|
249 |
-
|
250 |
-
# box[0] is x midpoint, box[2] is width
|
251 |
-
# box[1] is y midpoint, box[3] is height
|
252 |
-
|
253 |
-
# Create a Rectangle patch
|
254 |
-
for box in boxes:
|
255 |
-
assert (
|
256 |
-
len(box) == 6
|
257 |
-
), "box should contain class pred, confidence, x, y, width, height"
|
258 |
-
class_pred = box[0]
|
259 |
-
box = box[2:]
|
260 |
-
upper_left_x = box[0] - box[2] / 2
|
261 |
-
upper_left_y = box[1] - box[3] / 2
|
262 |
-
rect = patches.Rectangle(
|
263 |
-
(upper_left_x * width, upper_left_y * height),
|
264 |
-
box[2] * width,
|
265 |
-
box[3] * height,
|
266 |
-
linewidth=2,
|
267 |
-
edgecolor=colors[int(class_pred)],
|
268 |
-
facecolor="none",
|
269 |
-
)
|
270 |
-
# Add the patch to the Axes
|
271 |
-
ax.add_patch(rect)
|
272 |
-
plt.text(
|
273 |
-
upper_left_x * width,
|
274 |
-
upper_left_y * height,
|
275 |
-
s=class_labels[int(class_pred)],
|
276 |
-
color="white",
|
277 |
-
verticalalignment="top",
|
278 |
-
bbox={"color": colors[int(class_pred)], "pad": 0},
|
279 |
-
)
|
280 |
-
|
281 |
-
plt.show()
|
282 |
-
|
283 |
-
|
284 |
-
def get_evaluation_bboxes(
|
285 |
-
loader,
|
286 |
-
model,
|
287 |
-
iou_threshold,
|
288 |
-
anchors,
|
289 |
-
threshold,
|
290 |
-
box_format="midpoint",
|
291 |
-
device="cuda",
|
292 |
-
):
|
293 |
-
# make sure model is in eval before get bboxes
|
294 |
-
model.eval()
|
295 |
-
train_idx = 0
|
296 |
-
all_pred_boxes = []
|
297 |
-
all_true_boxes = []
|
298 |
-
for batch_idx, (x, labels) in enumerate(tqdm(loader)):
|
299 |
-
x = x.to(device)
|
300 |
-
|
301 |
-
with torch.no_grad():
|
302 |
-
predictions = model(x)
|
303 |
-
|
304 |
-
batch_size = x.shape[0]
|
305 |
-
bboxes = [[] for _ in range(batch_size)]
|
306 |
-
for i in range(3):
|
307 |
-
S = predictions[i].shape[2]
|
308 |
-
anchor = torch.tensor([*anchors[i]]).to(device) * S
|
309 |
-
boxes_scale_i = cells_to_bboxes(predictions[i], anchor, S=S, is_preds=True)
|
310 |
-
for idx, (box) in enumerate(boxes_scale_i):
|
311 |
-
bboxes[idx] += box
|
312 |
-
|
313 |
-
# we just want one bbox for each label, not one for each scale
|
314 |
-
true_bboxes = cells_to_bboxes(labels[2], anchor, S=S, is_preds=False)
|
315 |
-
|
316 |
-
for idx in range(batch_size):
|
317 |
-
nms_boxes = non_max_suppression(
|
318 |
-
bboxes[idx],
|
319 |
-
iou_threshold=iou_threshold,
|
320 |
-
threshold=threshold,
|
321 |
-
box_format=box_format,
|
322 |
-
)
|
323 |
-
|
324 |
-
for nms_box in nms_boxes:
|
325 |
-
all_pred_boxes.append([train_idx] + nms_box)
|
326 |
-
|
327 |
-
for box in true_bboxes[idx]:
|
328 |
-
if box[1] > threshold:
|
329 |
-
all_true_boxes.append([train_idx] + box)
|
330 |
-
|
331 |
-
train_idx += 1
|
332 |
-
|
333 |
-
model.train()
|
334 |
-
return all_pred_boxes, all_true_boxes
|
335 |
-
|
336 |
-
|
337 |
-
def cells_to_bboxes(predictions, anchors, S, is_preds=True):
|
338 |
-
"""
|
339 |
-
Scales the predictions coming from the model to
|
340 |
-
be relative to the entire image such that they for example later
|
341 |
-
can be plotted or.
|
342 |
-
INPUT:
|
343 |
-
predictions: tensor of size (N, 3, S, S, num_classes+5)
|
344 |
-
anchors: the anchors used for the predictions
|
345 |
-
S: the number of cells the image is divided in on the width (and height)
|
346 |
-
is_preds: whether the input is predictions or the true bounding boxes
|
347 |
-
OUTPUT:
|
348 |
-
converted_bboxes: the converted boxes of sizes (N, num_anchors, S, S, 1+5) with class index,
|
349 |
-
object score, bounding box coordinates
|
350 |
-
"""
|
351 |
-
BATCH_SIZE = predictions.shape[0]
|
352 |
-
num_anchors = len(anchors)
|
353 |
-
box_predictions = predictions[..., 1:5]
|
354 |
-
if is_preds:
|
355 |
-
anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
|
356 |
-
box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2])
|
357 |
-
box_predictions[..., 2:] = torch.exp(box_predictions[..., 2:]) * anchors
|
358 |
-
scores = torch.sigmoid(predictions[..., 0:1])
|
359 |
-
best_class = torch.argmax(predictions[..., 5:], dim=-1).unsqueeze(-1)
|
360 |
-
else:
|
361 |
-
scores = predictions[..., 0:1]
|
362 |
-
best_class = predictions[..., 5:6]
|
363 |
-
|
364 |
-
cell_indices = (
|
365 |
-
torch.arange(S)
|
366 |
-
.repeat(predictions.shape[0], 3, S, 1)
|
367 |
-
.unsqueeze(-1)
|
368 |
-
.to(predictions.device)
|
369 |
-
)
|
370 |
-
x = 1 / S * (box_predictions[..., 0:1] + cell_indices)
|
371 |
-
y = 1 / S * (box_predictions[..., 1:2] + cell_indices.permute(0, 1, 3, 2, 4))
|
372 |
-
w_h = 1 / S * box_predictions[..., 2:4]
|
373 |
-
converted_bboxes = torch.cat((best_class, scores, x, y, w_h), dim=-1).reshape(
|
374 |
-
BATCH_SIZE, num_anchors * S * S, 6
|
375 |
-
)
|
376 |
-
return converted_bboxes.tolist()
|
377 |
-
|
378 |
-
|
379 |
-
def check_class_accuracy(model, loader, threshold):
|
380 |
-
model.eval()
|
381 |
-
tot_class_preds, correct_class = 0, 0
|
382 |
-
tot_noobj, correct_noobj = 0, 0
|
383 |
-
tot_obj, correct_obj = 0, 0
|
384 |
-
|
385 |
-
for idx, (x, y) in enumerate(tqdm(loader)):
|
386 |
-
x = x.to(config.DEVICE)
|
387 |
-
with torch.no_grad():
|
388 |
-
out = model(x)
|
389 |
-
|
390 |
-
for i in range(3):
|
391 |
-
y[i] = y[i].to(config.DEVICE)
|
392 |
-
obj = y[i][..., 0] == 1 # in paper this is Iobj_i
|
393 |
-
noobj = y[i][..., 0] == 0 # in paper this is Iobj_i
|
394 |
-
|
395 |
-
correct_class += torch.sum(
|
396 |
-
torch.argmax(out[i][..., 5:][obj], dim=-1) == y[i][..., 5][obj]
|
397 |
-
)
|
398 |
-
tot_class_preds += torch.sum(obj)
|
399 |
-
|
400 |
-
obj_preds = torch.sigmoid(out[i][..., 0]) > threshold
|
401 |
-
correct_obj += torch.sum(obj_preds[obj] == y[i][..., 0][obj])
|
402 |
-
tot_obj += torch.sum(obj)
|
403 |
-
correct_noobj += torch.sum(obj_preds[noobj] == y[i][..., 0][noobj])
|
404 |
-
tot_noobj += torch.sum(noobj)
|
405 |
-
|
406 |
-
class_acc = (correct_class / (tot_class_preds + 1e-16)) * 100
|
407 |
-
noobj_acc = (correct_noobj / (tot_noobj + 1e-16)) * 100
|
408 |
-
obj_acc = (correct_obj / (tot_obj + 1e-16)) * 100
|
409 |
-
|
410 |
-
print(f"Class accuracy is: {class_acc:2f}%")
|
411 |
-
print(f"No obj accuracy is: {noobj_acc:2f}%")
|
412 |
-
print(f"Obj accuracy is: {obj_acc:2f}%")
|
413 |
-
model.train()
|
414 |
-
return class_acc, noobj_acc, obj_acc
|
415 |
-
|
416 |
-
|
417 |
-
def get_mean_std(loader):
|
418 |
-
# var[X] = E[X**2] - E[X]**2
|
419 |
-
channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0
|
420 |
-
|
421 |
-
for data, _ in tqdm(loader):
|
422 |
-
channels_sum += torch.mean(data, dim=[0, 2, 3])
|
423 |
-
channels_sqrd_sum += torch.mean(data**2, dim=[0, 2, 3])
|
424 |
-
num_batches += 1
|
425 |
-
|
426 |
-
mean = channels_sum / num_batches
|
427 |
-
std = (channels_sqrd_sum / num_batches - mean**2) ** 0.5
|
428 |
-
|
429 |
-
return mean, std
|
430 |
-
|
431 |
-
|
432 |
-
def save_checkpoint(model, optimizer, filename="my_checkpoint.pth.tar"):
|
433 |
-
print("=> Saving checkpoint")
|
434 |
-
checkpoint = {
|
435 |
-
"state_dict": model.state_dict(),
|
436 |
-
"optimizer": optimizer.state_dict(),
|
437 |
-
}
|
438 |
-
torch.save(checkpoint, filename)
|
439 |
-
|
440 |
-
|
441 |
-
def load_checkpoint(checkpoint_file, model, optimizer, lr):
|
442 |
-
print("=> Loading checkpoint")
|
443 |
-
checkpoint = torch.load(checkpoint_file, map_location=config.DEVICE)
|
444 |
-
model.load_state_dict(checkpoint["state_dict"])
|
445 |
-
optimizer.load_state_dict(checkpoint["optimizer"])
|
446 |
-
|
447 |
-
# If we don't do this then it will just have learning rate of old checkpoint
|
448 |
-
# and it will lead to many hours of debugging \:
|
449 |
-
for param_group in optimizer.param_groups:
|
450 |
-
param_group["lr"] = lr
|
451 |
-
|
452 |
-
|
453 |
-
def get_loaders(train_csv_path, test_csv_path):
|
454 |
-
from dataset import YOLODataset
|
455 |
-
|
456 |
-
IMAGE_SIZE = config.IMAGE_SIZE
|
457 |
-
train_dataset = YOLODataset(
|
458 |
-
train_csv_path,
|
459 |
-
transform=config.train_transforms,
|
460 |
-
S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
|
461 |
-
img_dir=config.IMG_DIR,
|
462 |
-
label_dir=config.LABEL_DIR,
|
463 |
-
anchors=config.ANCHORS,
|
464 |
-
)
|
465 |
-
test_dataset = YOLODataset(
|
466 |
-
test_csv_path,
|
467 |
-
transform=config.test_transforms,
|
468 |
-
S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
|
469 |
-
img_dir=config.IMG_DIR,
|
470 |
-
label_dir=config.LABEL_DIR,
|
471 |
-
anchors=config.ANCHORS,
|
472 |
-
)
|
473 |
-
train_loader = DataLoader(
|
474 |
-
dataset=train_dataset,
|
475 |
-
batch_size=config.BATCH_SIZE,
|
476 |
-
num_workers=config.NUM_WORKERS,
|
477 |
-
pin_memory=config.PIN_MEMORY,
|
478 |
-
shuffle=True,
|
479 |
-
drop_last=False,
|
480 |
-
)
|
481 |
-
test_loader = DataLoader(
|
482 |
-
dataset=test_dataset,
|
483 |
-
batch_size=config.BATCH_SIZE,
|
484 |
-
num_workers=config.NUM_WORKERS,
|
485 |
-
pin_memory=config.PIN_MEMORY,
|
486 |
-
shuffle=False,
|
487 |
-
drop_last=False,
|
488 |
-
)
|
489 |
-
|
490 |
-
train_eval_dataset = YOLODataset(
|
491 |
-
train_csv_path,
|
492 |
-
transform=config.test_transforms,
|
493 |
-
S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
|
494 |
-
img_dir=config.IMG_DIR,
|
495 |
-
label_dir=config.LABEL_DIR,
|
496 |
-
anchors=config.ANCHORS,
|
497 |
-
)
|
498 |
-
train_eval_loader = DataLoader(
|
499 |
-
dataset=train_eval_dataset,
|
500 |
-
batch_size=config.BATCH_SIZE,
|
501 |
-
num_workers=config.NUM_WORKERS,
|
502 |
-
pin_memory=config.PIN_MEMORY,
|
503 |
-
shuffle=False,
|
504 |
-
drop_last=False,
|
505 |
-
)
|
506 |
-
|
507 |
-
return train_loader, test_loader, train_eval_loader
|
508 |
-
|
509 |
-
|
510 |
-
def plot_couple_examples(model, loader, thresh, iou_thresh, anchors):
|
511 |
-
model.eval()
|
512 |
-
x, y = next(iter(loader))
|
513 |
-
x = x.to("cuda")
|
514 |
-
with torch.no_grad():
|
515 |
-
out = model(x)
|
516 |
-
bboxes = [[] for _ in range(x.shape[0])]
|
517 |
-
for i in range(3):
|
518 |
-
batch_size, A, S, _, _ = out[i].shape
|
519 |
-
anchor = anchors[i]
|
520 |
-
boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
|
521 |
-
for idx, (box) in enumerate(boxes_scale_i):
|
522 |
-
bboxes[idx] += box
|
523 |
-
|
524 |
-
model.train()
|
525 |
-
|
526 |
-
for i in range(batch_size // 4):
|
527 |
-
nms_boxes = non_max_suppression(
|
528 |
-
bboxes[i],
|
529 |
-
iou_threshold=iou_thresh,
|
530 |
-
threshold=thresh,
|
531 |
-
box_format="midpoint",
|
532 |
-
)
|
533 |
-
plot_image(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes)
|
534 |
-
|
535 |
-
|
536 |
-
def seed_everything(seed=42):
|
537 |
-
os.environ["PYTHONHASHSEED"] = str(seed)
|
538 |
-
random.seed(seed)
|
539 |
-
np.random.seed(seed)
|
540 |
-
torch.manual_seed(seed)
|
541 |
-
torch.cuda.manual_seed(seed)
|
542 |
-
torch.cuda.manual_seed_all(seed)
|
543 |
-
torch.backends.cudnn.deterministic = True
|
544 |
-
torch.backends.cudnn.benchmark = False
|
545 |
-
|
546 |
-
|
547 |
-
def clip_coords(boxes, img_shape):
|
548 |
-
# Clip bounding xyxy bounding boxes to image shape (height, width)
|
549 |
-
boxes[:, 0].clamp_(0, img_shape[1]) # x1
|
550 |
-
boxes[:, 1].clamp_(0, img_shape[0]) # y1
|
551 |
-
boxes[:, 2].clamp_(0, img_shape[1]) # x2
|
552 |
-
boxes[:, 3].clamp_(0, img_shape[0]) # y2
|
553 |
-
|
554 |
-
|
555 |
-
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
|
556 |
-
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
557 |
-
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
558 |
-
y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x
|
559 |
-
y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y
|
560 |
-
y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x
|
561 |
-
y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh # bottom right y
|
562 |
-
return y
|
563 |
-
|
564 |
-
|
565 |
-
def xyn2xy(x, w=640, h=640, padw=0, padh=0):
|
566 |
-
# Convert normalized segments into pixel segments, shape (n,2)
|
567 |
-
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
568 |
-
y[..., 0] = w * x[..., 0] + padw # top left x
|
569 |
-
y[..., 1] = h * x[..., 1] + padh # top left y
|
570 |
-
return y
|
571 |
-
|
572 |
-
|
573 |
-
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
574 |
-
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
|
575 |
-
if clip:
|
576 |
-
clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip
|
577 |
-
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
578 |
-
y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center
|
579 |
-
y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center
|
580 |
-
y[..., 2] = (x[..., 2] - x[..., 0]) / w # width
|
581 |
-
y[..., 3] = (x[..., 3] - x[..., 1]) / h # height
|
582 |
-
return y
|
583 |
-
|
584 |
-
|
585 |
-
def clip_boxes(boxes, shape):
|
586 |
-
# Clip boxes (xyxy) to image shape (height, width)
|
587 |
-
if isinstance(boxes, torch.Tensor): # faster individually
|
588 |
-
boxes[..., 0].clamp_(0, shape[1]) # x1
|
589 |
-
boxes[..., 1].clamp_(0, shape[0]) # y1
|
590 |
-
boxes[..., 2].clamp_(0, shape[1]) # x2
|
591 |
-
boxes[..., 3].clamp_(0, shape[0]) # y2
|
592 |
-
else: # np.array (faster grouped)
|
593 |
-
boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
|
594 |
-
boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
|
595 |
-
|
596 |
-
|
597 |
-
def save_result(image, boxes, index):
|
598 |
-
"""Plots predicted bounding boxes on the image"""
|
599 |
-
cmap = plt.get_cmap("tab20b")
|
600 |
-
class_labels = config.PASCAL_CLASSES
|
601 |
-
|
602 |
-
colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
|
603 |
-
im = np.array(image)
|
604 |
-
height, width, _ = im.shape
|
605 |
-
|
606 |
-
# Create figure and axes
|
607 |
-
fig, ax = plt.subplots(1)
|
608 |
-
# Display the image
|
609 |
-
ax.imshow(im)
|
610 |
-
|
611 |
-
# box[0] is x midpoint, box[2] is width
|
612 |
-
# box[1] is y midpoint, box[3] is height
|
613 |
-
|
614 |
-
# Create a Rectangle patch
|
615 |
-
for box in boxes:
|
616 |
-
assert (
|
617 |
-
len(box) == 6
|
618 |
-
), "box should contain class pred, confidence, x, y, width, height"
|
619 |
-
class_pred = box[0]
|
620 |
-
box = box[2:]
|
621 |
-
upper_left_x = box[0] - box[2] / 2
|
622 |
-
upper_left_y = box[1] - box[3] / 2
|
623 |
-
rect = patches.Rectangle(
|
624 |
-
(upper_left_x * width, upper_left_y * height),
|
625 |
-
box[2] * width,
|
626 |
-
box[3] * height,
|
627 |
-
linewidth=2,
|
628 |
-
edgecolor=colors[int(class_pred)],
|
629 |
-
facecolor="none",
|
630 |
-
)
|
631 |
-
# Add the patch to the Axes
|
632 |
-
ax.add_patch(rect)
|
633 |
-
plt.text(
|
634 |
-
upper_left_x * width,
|
635 |
-
upper_left_y * height,
|
636 |
-
s=class_labels[int(class_pred)],
|
637 |
-
color="white",
|
638 |
-
verticalalignment="top",
|
639 |
-
bbox={"color": colors[int(class_pred)], "pad": 0},
|
640 |
-
)
|
641 |
-
ax.grid(False)
|
642 |
-
ax.set_axis_off()
|
643 |
-
|
644 |
-
plt.savefig(f"output/img{index}.png")
|
645 |
-
|
646 |
-
|
647 |
-
def generate_result(model, data, thresh, iou_thresh, anchors):
|
648 |
-
model.eval()
|
649 |
-
x = data
|
650 |
-
# x = x.to("cuda")
|
651 |
-
with torch.no_grad():
|
652 |
-
out = model(x)
|
653 |
-
bboxes = [[] for _ in range(x.shape[0])]
|
654 |
-
for i in range(3):
|
655 |
-
batch_size, A, S, _, _ = out[i].shape
|
656 |
-
anchor = anchors[i]
|
657 |
-
boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
|
658 |
-
for idx, (box) in enumerate(boxes_scale_i):
|
659 |
-
bboxes[idx] += box
|
660 |
-
|
661 |
-
for i in range(batch_size):
|
662 |
-
nms_boxes = non_max_suppression(
|
663 |
-
bboxes[i],
|
664 |
-
iou_threshold=iou_thresh,
|
665 |
-
threshold=thresh,
|
666 |
-
box_format="midpoint",
|
667 |
-
)
|
668 |
-
save_result(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes, i)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|