ravi.naik commited on
Commit
3129974
1 Parent(s): adde10a

Directory name bug fix

Browse files
utils/gradcam.py CHANGED
@@ -1,67 +1,36 @@
1
  import numpy as np
2
- from pytorch_grad_cam import GradCAM
3
- from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
4
  from pytorch_grad_cam.utils.image import show_cam_on_image
5
 
6
  import matplotlib.pyplot as plt
7
 
8
 
9
- def generate_gradcam(model, target_layers, images, labels, rgb_imgs):
10
  results = []
11
- cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True)
12
 
13
- for image, label, np_image in zip(images, labels, rgb_imgs):
14
- targets = [ClassifierOutputTarget(label.item())]
15
 
16
- # You can also pass aug_smooth=True and eigen_smooth=True, to apply smoothing.
17
- grayscale_cam = cam(
18
- input_tensor=image.unsqueeze(0), targets=targets, aug_smooth=True
19
- )
20
-
21
- # In this example grayscale_cam has only one image in the batch:
22
  grayscale_cam = grayscale_cam[0, :]
23
- visualization = show_cam_on_image(
24
- np_image / np_image.max(), grayscale_cam, use_rgb=True
 
 
 
 
 
25
  )
26
- results.append(visualization)
27
  return results
28
 
29
 
30
- def visualize_gradcam(misimgs, mistgts, mispreds, classes):
31
- fig, axes = plt.subplots(len(misimgs) // 2, 2)
32
- fig.tight_layout()
33
- for ax, img, tgt, pred in zip(axes.ravel(), misimgs, mistgts, mispreds):
34
- ax.imshow(img)
35
- ax.set_title(f"{classes[tgt]} | {classes[pred]}")
36
- ax.grid(False)
37
- ax.set_axis_off()
38
- plt.show()
39
-
40
- def plot_gradcam(model, data, classes, target_layers, number_of_samples, inv_normalize=None, targets=None, transparency = 0.60, figsize=(10,10), rows=2, cols=5):
41
-
42
  fig = plt.figure(figsize=figsize)
43
-
44
- cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True)
45
- for i in range(number_of_samples):
46
- plt.subplot(rows, cols, i + 1)
47
- input_tensor = data[i][0]
48
-
49
- # Get the activations of the layer for the images
50
- grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
51
- grayscale_cam = grayscale_cam[0, :]
52
-
53
- # Get back the original image
54
- img = input_tensor.squeeze(0).to('cpu')
55
- if inv_normalize is not None:
56
- img = inv_normalize(img)
57
- rgb_img = np.transpose(img, (1, 2, 0))
58
- rgb_img = rgb_img.numpy()
59
-
60
- # Mix the activations on the original image
61
- visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency)
62
-
63
- # Display the images on the plot
64
- plt.imshow(visualization)
65
- plt.title(f"Label: {classes[data[i][1].item()]} \n Prediction: {classes[data[i][2].item()]}")
66
- plt.xticks([])
67
- plt.yticks([])
 
1
  import numpy as np
2
+ from pytorch_grad_cam import EigenCAM
 
3
  from pytorch_grad_cam.utils.image import show_cam_on_image
4
 
5
  import matplotlib.pyplot as plt
6
 
7
 
8
+ def generate_gradcam(model, target_layers, images, use_cuda=True, transparency=0.6):
9
  results = []
 
10
 
11
+ targets = None
12
+ cam = EigenCAM(model, target_layers, use_cuda=use_cuda)
13
 
14
+ for image in images:
15
+ input_tensor = image.unsqueeze(0)
16
+ grayscale_cam = cam(input_tensor, targets=targets)
 
 
 
17
  grayscale_cam = grayscale_cam[0, :]
18
+
19
+ img = input_tensor.squeeze(0).to("cpu")
20
+ rgb_img = np.transpose(img, (1, 2, 0))
21
+ rgb_img = rgb_img.numpy()
22
+
23
+ cam_image = show_cam_on_image(
24
+ rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency
25
  )
26
+ results.append(cam_image)
27
  return results
28
 
29
 
30
+ def visualize_gradcam(images, figsize=(10, 10), rows=2, cols=5):
 
 
 
 
 
 
 
 
 
 
 
31
  fig = plt.figure(figsize=figsize)
32
+ for i in range(len(images)):
33
+ plt.subplot(rows, cols, i + 1)
34
+ plt.imshow(images[i])
35
+ plt.xticks([])
36
+ plt.yticks([])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/utils.py CHANGED
@@ -592,3 +592,77 @@ def clip_boxes(boxes, shape):
592
  else: # np.array (faster grouped)
593
  boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
594
  boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
592
  else: # np.array (faster grouped)
593
  boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
594
  boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
595
+
596
+
597
+ def save_result(image, boxes, index):
598
+ """Plots predicted bounding boxes on the image"""
599
+ cmap = plt.get_cmap("tab20b")
600
+ class_labels = config.PASCAL_CLASSES
601
+
602
+ colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
603
+ im = np.array(image)
604
+ height, width, _ = im.shape
605
+
606
+ # Create figure and axes
607
+ fig, ax = plt.subplots(1)
608
+ # Display the image
609
+ ax.imshow(im)
610
+
611
+ # box[0] is x midpoint, box[2] is width
612
+ # box[1] is y midpoint, box[3] is height
613
+
614
+ # Create a Rectangle patch
615
+ for box in boxes:
616
+ assert (
617
+ len(box) == 6
618
+ ), "box should contain class pred, confidence, x, y, width, height"
619
+ class_pred = box[0]
620
+ box = box[2:]
621
+ upper_left_x = box[0] - box[2] / 2
622
+ upper_left_y = box[1] - box[3] / 2
623
+ rect = patches.Rectangle(
624
+ (upper_left_x * width, upper_left_y * height),
625
+ box[2] * width,
626
+ box[3] * height,
627
+ linewidth=2,
628
+ edgecolor=colors[int(class_pred)],
629
+ facecolor="none",
630
+ )
631
+ # Add the patch to the Axes
632
+ ax.add_patch(rect)
633
+ plt.text(
634
+ upper_left_x * width,
635
+ upper_left_y * height,
636
+ s=class_labels[int(class_pred)],
637
+ color="white",
638
+ verticalalignment="top",
639
+ bbox={"color": colors[int(class_pred)], "pad": 0},
640
+ )
641
+ ax.grid(False)
642
+ ax.set_axis_off()
643
+
644
+ plt.savefig(f"output/img{index}.png")
645
+
646
+
647
+ def generate_result(model, data, thresh, iou_thresh, anchors):
648
+ model.eval()
649
+ x = data
650
+ # x = x.to("cuda")
651
+ with torch.no_grad():
652
+ out = model(x)
653
+ bboxes = [[] for _ in range(x.shape[0])]
654
+ for i in range(3):
655
+ batch_size, A, S, _, _ = out[i].shape
656
+ anchor = anchors[i]
657
+ boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
658
+ for idx, (box) in enumerate(boxes_scale_i):
659
+ bboxes[idx] += box
660
+
661
+ for i in range(batch_size):
662
+ nms_boxes = non_max_suppression(
663
+ bboxes[i],
664
+ iou_threshold=iou_thresh,
665
+ threshold=thresh,
666
+ box_format="midpoint",
667
+ )
668
+ save_result(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes, i)
utils/utils/common.py DELETED
@@ -1,185 +0,0 @@
1
- import numpy as np
2
- import random
3
- import matplotlib.pyplot as plt
4
-
5
- import torch
6
- import torchvision
7
- from torchinfo import summary
8
- from torch_lr_finder import LRFinder
9
-
10
-
11
- def find_lr(model, optimizer, criterion, device, trainloader, numiter, startlr, endlr):
12
- lr_finder = LRFinder(
13
- model=model, optimizer=optimizer, criterion=criterion, device=device
14
- )
15
-
16
- lr_finder.range_test(
17
- train_loader=trainloader,
18
- start_lr=startlr,
19
- end_lr=endlr,
20
- num_iter=numiter,
21
- step_mode="exp",
22
- )
23
-
24
- lr_finder.plot()
25
-
26
- lr_finder.reset()
27
-
28
-
29
- def one_cycle_lr(optimizer, maxlr, steps, epochs):
30
- scheduler = torch.optim.lr_scheduler.OneCycleLR(
31
- optimizer=optimizer,
32
- max_lr=maxlr,
33
- steps_per_epoch=steps,
34
- epochs=epochs,
35
- pct_start=5 / epochs,
36
- div_factor=100,
37
- three_phase=False,
38
- final_div_factor=100,
39
- anneal_strategy="linear",
40
- )
41
- return scheduler
42
-
43
-
44
- def show_random_images_for_each_class(train_data, num_images_per_class=16):
45
- for c, cls in enumerate(train_data.classes):
46
- rand_targets = random.sample(
47
- [n for n, x in enumerate(train_data.targets) if x == c],
48
- k=num_images_per_class,
49
- )
50
- show_img_grid(np.transpose(train_data.data[rand_targets], axes=(0, 3, 1, 2)))
51
- plt.title(cls)
52
-
53
-
54
- def show_img_grid(data):
55
- try:
56
- grid_img = torchvision.utils.make_grid(data.cpu().detach())
57
- except:
58
- data = torch.from_numpy(data)
59
- grid_img = torchvision.utils.make_grid(data)
60
-
61
- plt.figure(figsize=(10, 10))
62
- plt.imshow(grid_img.permute(1, 2, 0))
63
-
64
-
65
- def show_random_images(data_loader):
66
- data, target = next(iter(data_loader))
67
- show_img_grid(data)
68
-
69
-
70
- def show_model_summary(model, batch_size):
71
- summary(
72
- model=model,
73
- input_size=(batch_size, 3, 32, 32),
74
- col_names=["input_size", "output_size", "num_params", "kernel_size"],
75
- verbose=1,
76
- )
77
-
78
-
79
- def lossacc_plots(results):
80
- plt.plot(results["epoch"], results["trainloss"])
81
- plt.plot(results["epoch"], results["testloss"])
82
- plt.legend(["Train Loss", "Validation Loss"])
83
- plt.xlabel("Epochs")
84
- plt.ylabel("Loss")
85
- plt.title("Loss vs Epochs")
86
- plt.show()
87
-
88
- plt.plot(results["epoch"], results["trainacc"])
89
- plt.plot(results["epoch"], results["testacc"])
90
- plt.legend(["Train Acc", "Validation Acc"])
91
- plt.xlabel("Epochs")
92
- plt.ylabel("Accuracy")
93
- plt.title("Accuracy vs Epochs")
94
- plt.show()
95
-
96
-
97
- def lr_plots(results, length):
98
- plt.plot(range(length), results["lr"])
99
- plt.xlabel("Epochs")
100
- plt.ylabel("Learning Rate")
101
- plt.title("Learning Rate vs Epochs")
102
- plt.show()
103
-
104
-
105
- def get_misclassified(model, testloader, device, mis_count=10):
106
- misimgs, mistgts, mispreds = [], [], []
107
- with torch.no_grad():
108
- for data, target in testloader:
109
- data, target = data.to(device), target.to(device)
110
- output = model(data)
111
- pred = output.argmax(dim=1, keepdim=True)
112
- misclassified = torch.argwhere(pred.squeeze() != target).squeeze()
113
- for idx in misclassified:
114
- if len(misimgs) >= mis_count:
115
- break
116
- misimgs.append(data[idx])
117
- mistgts.append(target[idx])
118
- mispreds.append(pred[idx].squeeze())
119
- return misimgs, mistgts, mispreds
120
-
121
-
122
- # def plot_misclassified(misimgs, mistgts, mispreds, classes):
123
- # fig, axes = plt.subplots(len(misimgs) // 2, 2)
124
- # fig.tight_layout()
125
- # for ax, img, tgt, pred in zip(axes.ravel(), misimgs, mistgts, mispreds):
126
- # ax.imshow((img / img.max()).permute(1, 2, 0).cpu())
127
- # ax.set_title(f"{classes[tgt]} | {classes[pred]}")
128
- # ax.grid(False)
129
- # ax.set_axis_off()
130
- # plt.show()
131
-
132
- def get_misclassified_data(model, device, test_loader, count):
133
- """
134
- Function to run the model on test set and return misclassified images
135
- :param model: Network Architecture
136
- :param device: CPU/GPU
137
- :param test_loader: DataLoader for test set
138
- """
139
- # Prepare the model for evaluation i.e. drop the dropout layer
140
- model.eval()
141
-
142
- # List to store misclassified Images
143
- misclassified_data = []
144
-
145
- # Reset the gradients
146
- with torch.no_grad():
147
- # Extract images, labels in a batch
148
- for data, target in test_loader:
149
-
150
- # Migrate the data to the device
151
- data, target = data.to(device), target.to(device)
152
-
153
- # Extract single image, label from the batch
154
- for image, label in zip(data, target):
155
-
156
- # Add batch dimension to the image
157
- image = image.unsqueeze(0)
158
-
159
- # Get the model prediction on the image
160
- output = model(image)
161
-
162
- # Convert the output from one-hot encoding to a value
163
- pred = output.argmax(dim=1, keepdim=True)
164
-
165
- # If prediction is incorrect, append the data
166
- if pred != label:
167
- misclassified_data.append((image, label, pred))
168
- if len(misclassified_data) >= count:
169
- break
170
-
171
- return misclassified_data[:count]
172
-
173
- def plot_misclassified(data, classes, size=(10, 10), rows=2, cols=5, inv_normalize=None):
174
- fig = plt.figure(figsize=size)
175
- number_of_samples = len(data)
176
- for i in range(number_of_samples):
177
- plt.subplot(rows, cols, i + 1)
178
- img = data[i][0].squeeze().to('cpu')
179
- if inv_normalize is not None:
180
- img = inv_normalize(img)
181
- plt.imshow(np.transpose(img, (1, 2, 0)))
182
- plt.title(f"Label: {classes[data[i][1].item()]} \n Prediction: {classes[data[i][2].item()]}")
183
- plt.xticks([])
184
- plt.yticks([])
185
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/utils/data.py DELETED
@@ -1,294 +0,0 @@
1
- """
2
- Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
3
- """
4
-
5
- import numpy as np
6
- import os
7
- import pandas as pd
8
- import torch
9
- import random
10
- from PIL import Image, ImageFile
11
-
12
- import lightning as L
13
- from torch.utils.data import Dataset, DataLoader
14
- import config as config
15
-
16
- from utils.utils import xywhn2xyxy, xyxy2xywhn
17
-
18
- from utils.utils import (
19
- cells_to_bboxes,
20
- iou_width_height as iou,
21
- non_max_suppression as nms,
22
- plot_image,
23
- )
24
-
25
-
26
- ImageFile.LOAD_TRUNCATED_IMAGES = True
27
-
28
-
29
- class YOLODataset(Dataset):
30
- def __init__(
31
- self,
32
- csv_file,
33
- img_dir,
34
- label_dir,
35
- anchors,
36
- image_size=416,
37
- S=[13, 26, 52],
38
- C=20,
39
- transform=None,
40
- ):
41
- self.annotations = pd.read_csv(csv_file)
42
- self.img_dir = img_dir
43
- self.label_dir = label_dir
44
- self.image_size = image_size
45
- self.mosaic_border = [image_size // 2, image_size // 2]
46
- self.transform = transform
47
- self.S = S
48
- self.anchors = torch.tensor(
49
- anchors[0] + anchors[1] + anchors[2]
50
- ) # for all 3 scales
51
- self.num_anchors = self.anchors.shape[0]
52
- self.num_anchors_per_scale = self.num_anchors // 3
53
- self.C = C
54
- self.ignore_iou_thresh = 0.5
55
-
56
- def __len__(self):
57
- return len(self.annotations)
58
-
59
- def load_mosaic(self, index):
60
- # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
61
- labels4 = []
62
- s = self.image_size
63
- yc, xc = (
64
- int(random.uniform(x, 2 * s - x)) for x in self.mosaic_border
65
- ) # mosaic center x, y
66
- indices = [index] + random.choices(
67
- range(len(self)), k=3
68
- ) # 3 additional image indices
69
- random.shuffle(indices)
70
- for i, index in enumerate(indices):
71
- # Load image
72
- label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
73
- bboxes = np.roll(
74
- np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
75
- ).tolist()
76
- img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
77
- img = np.array(Image.open(img_path).convert("RGB"))
78
-
79
- h, w = img.shape[0], img.shape[1]
80
- labels = np.array(bboxes)
81
-
82
- # place img in img4
83
- if i == 0: # top left
84
- img4 = np.full(
85
- (s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8
86
- ) # base image with 4 tiles
87
- x1a, y1a, x2a, y2a = (
88
- max(xc - w, 0),
89
- max(yc - h, 0),
90
- xc,
91
- yc,
92
- ) # xmin, ymin, xmax, ymax (large image)
93
- x1b, y1b, x2b, y2b = (
94
- w - (x2a - x1a),
95
- h - (y2a - y1a),
96
- w,
97
- h,
98
- ) # xmin, ymin, xmax, ymax (small image)
99
- elif i == 1: # top right
100
- x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
101
- x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
102
- elif i == 2: # bottom left
103
- x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
104
- x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
105
- elif i == 3: # bottom right
106
- x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
107
- x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
108
-
109
- img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
110
- padw = x1a - x1b
111
- padh = y1a - y1b
112
-
113
- # Labels
114
- if labels.size:
115
- labels[:, :-1] = xywhn2xyxy(
116
- labels[:, :-1], w, h, padw, padh
117
- ) # normalized xywh to pixel xyxy format
118
- labels4.append(labels)
119
-
120
- # Concat/clip labels
121
- labels4 = np.concatenate(labels4, 0)
122
- for x in (labels4[:, :-1],):
123
- np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
124
- # img4, labels4 = replicate(img4, labels4) # replicate
125
- labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s)
126
- labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1)
127
- labels4 = labels4[labels4[:, 2] > 0]
128
- labels4 = labels4[labels4[:, 3] > 0]
129
- return img4, labels4
130
-
131
- def __getitem__(self, index):
132
- if random.random() >= config.P_MOSAIC:
133
- image, bboxes = self.load_mosaic(index)
134
- else:
135
- label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
136
- bboxes = np.roll(
137
- np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
138
- ).tolist()
139
- img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
140
- image = np.array(Image.open(img_path).convert("RGB"))
141
-
142
- if self.transform:
143
- augmentations = self.transform(image=image, bboxes=bboxes)
144
- image = augmentations["image"]
145
- bboxes = augmentations["bboxes"]
146
-
147
- # Below assumes 3 scale predictions (as paper) and same num of anchors per scale
148
- targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
149
- for box in bboxes:
150
- iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
151
- anchor_indices = iou_anchors.argsort(descending=True, dim=0)
152
- x, y, width, height, class_label = box
153
- has_anchor = [False] * 3 # each scale should have one anchor
154
- for anchor_idx in anchor_indices:
155
- scale_idx = anchor_idx // self.num_anchors_per_scale
156
- anchor_on_scale = anchor_idx % self.num_anchors_per_scale
157
- S = self.S[scale_idx]
158
- i, j = int(S * y), int(S * x) # which cell
159
- anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
160
- if not anchor_taken and not has_anchor[scale_idx]:
161
- targets[scale_idx][anchor_on_scale, i, j, 0] = 1
162
- x_cell, y_cell = S * x - j, S * y - i # both between [0,1]
163
- width_cell, height_cell = (
164
- width * S,
165
- height * S,
166
- ) # can be greater than 1 since it's relative to cell
167
- box_coordinates = torch.tensor(
168
- [x_cell, y_cell, width_cell, height_cell]
169
- )
170
- targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
171
- targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
172
- has_anchor[scale_idx] = True
173
-
174
- elif (
175
- not anchor_taken
176
- and iou_anchors[anchor_idx] > self.ignore_iou_thresh
177
- ):
178
- targets[scale_idx][
179
- anchor_on_scale, i, j, 0
180
- ] = -1 # ignore prediction
181
-
182
- return image, tuple(targets)
183
-
184
-
185
- def test():
186
- anchors = config.ANCHORS
187
-
188
- transform = config.test_transforms
189
-
190
- dataset = YOLODataset(
191
- "COCO/train.csv",
192
- "COCO/images/images/",
193
- "COCO/labels/labels_new/",
194
- S=[13, 26, 52],
195
- anchors=anchors,
196
- transform=transform,
197
- )
198
- S = [13, 26, 52]
199
- scaled_anchors = torch.tensor(anchors) / (
200
- 1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
201
- )
202
- loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
203
- for x, y in loader:
204
- boxes = []
205
-
206
- for i in range(y[0].shape[1]):
207
- anchor = scaled_anchors[i]
208
- print(anchor.shape)
209
- print(y[i].shape)
210
- boxes += cells_to_bboxes(
211
- y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
212
- )[0]
213
- boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
214
- print(boxes)
215
- plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
216
-
217
-
218
- class PascalDataModule(L.LightningDataModule):
219
- def __init__(
220
- self,
221
- train_csv_path=None,
222
- test_csv_path=None,
223
- batch_size=512,
224
- shuffle=True,
225
- num_workers=4,
226
- ) -> None:
227
- super().__init__()
228
- self.train_csv_path = train_csv_path
229
- self.test_csv_path = test_csv_path
230
- self.batch_size = batch_size
231
- self.shuffle = shuffle
232
- self.num_workers = num_workers
233
- self.IMAGE_SIZE = config.IMAGE_SIZE
234
-
235
- def prepare_data(self) -> None:
236
- pass
237
-
238
- def setup(self, stage=None):
239
- self.train_dataset = YOLODataset(
240
- self.train_csv_path,
241
- transform=config.train_transforms,
242
- S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
243
- img_dir=config.IMG_DIR,
244
- label_dir=config.LABEL_DIR,
245
- anchors=config.ANCHORS,
246
- )
247
-
248
- self.val_dataset = YOLODataset(
249
- self.test_csv_path,
250
- transform=config.test_transforms,
251
- S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
252
- img_dir=config.IMG_DIR,
253
- label_dir=config.LABEL_DIR,
254
- anchors=config.ANCHORS,
255
- )
256
-
257
- self.test_dataset = YOLODataset(
258
- self.test_csv_path,
259
- transform=config.test_transforms,
260
- S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
261
- img_dir=config.IMG_DIR,
262
- label_dir=config.LABEL_DIR,
263
- anchors=config.ANCHORS,
264
- )
265
-
266
- def train_dataloader(self):
267
- return DataLoader(
268
- dataset=self.train_dataset,
269
- batch_size=config.BATCH_SIZE,
270
- num_workers=config.NUM_WORKERS,
271
- pin_memory=config.PIN_MEMORY,
272
- shuffle=True,
273
- drop_last=False,
274
- )
275
-
276
- def val_dataloader(self):
277
- return DataLoader(
278
- dataset=self.val_dataset,
279
- batch_size=config.BATCH_SIZE,
280
- num_workers=config.NUM_WORKERS,
281
- pin_memory=config.PIN_MEMORY,
282
- shuffle=False,
283
- drop_last=False,
284
- )
285
-
286
- def test_dataloader(self):
287
- return DataLoader(
288
- dataset=self.test_dataset,
289
- batch_size=config.BATCH_SIZE,
290
- num_workers=config.NUM_WORKERS,
291
- pin_memory=config.PIN_MEMORY,
292
- shuffle=False,
293
- drop_last=False,
294
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/utils/gradcam.py DELETED
@@ -1,36 +0,0 @@
1
- import numpy as np
2
- from pytorch_grad_cam import EigenCAM
3
- from pytorch_grad_cam.utils.image import show_cam_on_image
4
-
5
- import matplotlib.pyplot as plt
6
-
7
-
8
- def generate_gradcam(model, target_layers, images, use_cuda=True, transparency=0.6):
9
- results = []
10
-
11
- targets = None
12
- cam = EigenCAM(model, target_layers, use_cuda=use_cuda)
13
-
14
- for image in images:
15
- input_tensor = image.unsqueeze(0)
16
- grayscale_cam = cam(input_tensor, targets=targets)
17
- grayscale_cam = grayscale_cam[0, :]
18
-
19
- img = input_tensor.squeeze(0).to("cpu")
20
- rgb_img = np.transpose(img, (1, 2, 0))
21
- rgb_img = rgb_img.numpy()
22
-
23
- cam_image = show_cam_on_image(
24
- rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency
25
- )
26
- results.append(cam_image)
27
- return results
28
-
29
-
30
- def visualize_gradcam(images, figsize=(10, 10), rows=2, cols=5):
31
- fig = plt.figure(figsize=figsize)
32
- for i in range(len(images)):
33
- plt.subplot(rows, cols, i + 1)
34
- plt.imshow(images[i])
35
- plt.xticks([])
36
- plt.yticks([])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/utils/loss.py DELETED
@@ -1,90 +0,0 @@
1
- """
2
- Implementation of Yolo Loss Function similar to the one in Yolov3 paper,
3
- the difference from what I can tell is I use CrossEntropy for the classes
4
- instead of BinaryCrossEntropy.
5
- """
6
- import random
7
- import torch
8
- import torch.nn as nn
9
-
10
- from utils.utils import intersection_over_union
11
-
12
-
13
- class YoloLoss(nn.Module):
14
- def __init__(self):
15
- super().__init__()
16
- self.mse = nn.MSELoss()
17
- self.bce = nn.BCEWithLogitsLoss()
18
- self.entropy = nn.CrossEntropyLoss()
19
- self.sigmoid = nn.Sigmoid()
20
-
21
- # Constants signifying how much to pay for each respective part of the loss
22
- self.lambda_class = 1
23
- self.lambda_noobj = 10
24
- self.lambda_obj = 1
25
- self.lambda_box = 10
26
-
27
- def forward(self, predictions, target, anchors):
28
- # Check where obj and noobj (we ignore if target == -1)
29
- obj = target[..., 0] == 1 # in paper this is Iobj_i
30
- noobj = target[..., 0] == 0 # in paper this is Inoobj_i
31
-
32
- # ======================= #
33
- # FOR NO OBJECT LOSS #
34
- # ======================= #
35
-
36
- no_object_loss = self.bce(
37
- (predictions[..., 0:1][noobj]),
38
- (target[..., 0:1][noobj]),
39
- )
40
-
41
- # ==================== #
42
- # FOR OBJECT LOSS #
43
- # ==================== #
44
- anchors = anchors.reshape(1, 3, 1, 1, 2)
45
-
46
- box_preds = torch.cat(
47
- [
48
- self.sigmoid(predictions[..., 1:3]),
49
- torch.exp(predictions[..., 3:5]) * anchors,
50
- ],
51
- dim=-1,
52
- )
53
- ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj]).detach()
54
- # ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj])
55
- object_loss = self.mse(
56
- self.sigmoid(predictions[..., 0:1][obj]), ious * target[..., 0:1][obj]
57
- )
58
-
59
- # ======================== #
60
- # FOR BOX COORDINATES #
61
- # ======================== #
62
-
63
- predictions[..., 1:3] = self.sigmoid(predictions[..., 1:3]) # x,y coordinates
64
- target[..., 3:5] = torch.log(
65
- (1e-16 + target[..., 3:5] / anchors)
66
- ) # width, height coordinates
67
- box_loss = self.mse(predictions[..., 1:5][obj], target[..., 1:5][obj])
68
-
69
- # ================== #
70
- # FOR CLASS LOSS #
71
- # ================== #
72
-
73
- class_loss = self.entropy(
74
- (predictions[..., 5:][obj]),
75
- (target[..., 5][obj].long()),
76
- )
77
-
78
- # print("__________________________________")
79
- # print(self.lambda_box * box_loss)
80
- # print(self.lambda_obj * object_loss)
81
- # print(self.lambda_noobj * no_object_loss)
82
- # print(self.lambda_class * class_loss)
83
- # print("\n")
84
-
85
- return (
86
- self.lambda_box * box_loss
87
- + self.lambda_obj * object_loss
88
- + self.lambda_noobj * no_object_loss
89
- + self.lambda_class * class_loss
90
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils/utils/utils.py DELETED
@@ -1,668 +0,0 @@
1
- import config
2
- import matplotlib.pyplot as plt
3
- import matplotlib.patches as patches
4
- import numpy as np
5
- import os
6
- import random
7
- import torch
8
-
9
- from collections import Counter
10
- from torch.utils.data import DataLoader
11
- from tqdm import tqdm
12
-
13
-
14
- def iou_width_height(boxes1, boxes2):
15
- """
16
- Parameters:
17
- boxes1 (tensor): width and height of the first bounding boxes
18
- boxes2 (tensor): width and height of the second bounding boxes
19
- Returns:
20
- tensor: Intersection over union of the corresponding boxes
21
- """
22
- intersection = torch.min(boxes1[..., 0], boxes2[..., 0]) * torch.min(
23
- boxes1[..., 1], boxes2[..., 1]
24
- )
25
- union = (
26
- boxes1[..., 0] * boxes1[..., 1] + boxes2[..., 0] * boxes2[..., 1] - intersection
27
- )
28
- return intersection / union
29
-
30
-
31
- def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
32
- """
33
- Video explanation of this function:
34
- https://youtu.be/XXYG5ZWtjj0
35
-
36
- This function calculates intersection over union (iou) given pred boxes
37
- and target boxes.
38
-
39
- Parameters:
40
- boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
41
- boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
42
- box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
43
-
44
- Returns:
45
- tensor: Intersection over union for all examples
46
- """
47
-
48
- if box_format == "midpoint":
49
- box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
50
- box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
51
- box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
52
- box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
53
- box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
54
- box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
55
- box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
56
- box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
57
-
58
- if box_format == "corners":
59
- box1_x1 = boxes_preds[..., 0:1]
60
- box1_y1 = boxes_preds[..., 1:2]
61
- box1_x2 = boxes_preds[..., 2:3]
62
- box1_y2 = boxes_preds[..., 3:4]
63
- box2_x1 = boxes_labels[..., 0:1]
64
- box2_y1 = boxes_labels[..., 1:2]
65
- box2_x2 = boxes_labels[..., 2:3]
66
- box2_y2 = boxes_labels[..., 3:4]
67
-
68
- x1 = torch.max(box1_x1, box2_x1)
69
- y1 = torch.max(box1_y1, box2_y1)
70
- x2 = torch.min(box1_x2, box2_x2)
71
- y2 = torch.min(box1_y2, box2_y2)
72
-
73
- intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
74
- box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
75
- box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
76
-
77
- return intersection / (box1_area + box2_area - intersection + 1e-6)
78
-
79
-
80
- def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
81
- """
82
- Video explanation of this function:
83
- https://youtu.be/YDkjWEN8jNA
84
-
85
- Does Non Max Suppression given bboxes
86
-
87
- Parameters:
88
- bboxes (list): list of lists containing all bboxes with each bboxes
89
- specified as [class_pred, prob_score, x1, y1, x2, y2]
90
- iou_threshold (float): threshold where predicted bboxes is correct
91
- threshold (float): threshold to remove predicted bboxes (independent of IoU)
92
- box_format (str): "midpoint" or "corners" used to specify bboxes
93
-
94
- Returns:
95
- list: bboxes after performing NMS given a specific IoU threshold
96
- """
97
-
98
- assert type(bboxes) == list
99
-
100
- bboxes = [box for box in bboxes if box[1] > threshold]
101
- bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
102
- bboxes_after_nms = []
103
-
104
- while bboxes:
105
- chosen_box = bboxes.pop(0)
106
-
107
- bboxes = [
108
- box
109
- for box in bboxes
110
- if box[0] != chosen_box[0]
111
- or intersection_over_union(
112
- torch.tensor(chosen_box[2:]),
113
- torch.tensor(box[2:]),
114
- box_format=box_format,
115
- )
116
- < iou_threshold
117
- ]
118
-
119
- bboxes_after_nms.append(chosen_box)
120
-
121
- return bboxes_after_nms
122
-
123
-
124
- def mean_average_precision(
125
- pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
126
- ):
127
- """
128
- Video explanation of this function:
129
- https://youtu.be/FppOzcDvaDI
130
-
131
- This function calculates mean average precision (mAP)
132
-
133
- Parameters:
134
- pred_boxes (list): list of lists containing all bboxes with each bboxes
135
- specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
136
- true_boxes (list): Similar as pred_boxes except all the correct ones
137
- iou_threshold (float): threshold where predicted bboxes is correct
138
- box_format (str): "midpoint" or "corners" used to specify bboxes
139
- num_classes (int): number of classes
140
-
141
- Returns:
142
- float: mAP value across all classes given a specific IoU threshold
143
- """
144
-
145
- # list storing all AP for respective classes
146
- average_precisions = []
147
-
148
- # used for numerical stability later on
149
- epsilon = 1e-6
150
-
151
- for c in range(num_classes):
152
- detections = []
153
- ground_truths = []
154
-
155
- # Go through all predictions and targets,
156
- # and only add the ones that belong to the
157
- # current class c
158
- for detection in pred_boxes:
159
- if detection[1] == c:
160
- detections.append(detection)
161
-
162
- for true_box in true_boxes:
163
- if true_box[1] == c:
164
- ground_truths.append(true_box)
165
-
166
- # find the amount of bboxes for each training example
167
- # Counter here finds how many ground truth bboxes we get
168
- # for each training example, so let's say img 0 has 3,
169
- # img 1 has 5 then we will obtain a dictionary with:
170
- # amount_bboxes = {0:3, 1:5}
171
- amount_bboxes = Counter([gt[0] for gt in ground_truths])
172
-
173
- # We then go through each key, val in this dictionary
174
- # and convert to the following (w.r.t same example):
175
- # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
176
- for key, val in amount_bboxes.items():
177
- amount_bboxes[key] = torch.zeros(val)
178
-
179
- # sort by box probabilities which is index 2
180
- detections.sort(key=lambda x: x[2], reverse=True)
181
- TP = torch.zeros((len(detections)))
182
- FP = torch.zeros((len(detections)))
183
- total_true_bboxes = len(ground_truths)
184
-
185
- # If none exists for this class then we can safely skip
186
- if total_true_bboxes == 0:
187
- continue
188
-
189
- for detection_idx, detection in enumerate(detections):
190
- # Only take out the ground_truths that have the same
191
- # training idx as detection
192
- ground_truth_img = [
193
- bbox for bbox in ground_truths if bbox[0] == detection[0]
194
- ]
195
-
196
- num_gts = len(ground_truth_img)
197
- best_iou = 0
198
-
199
- for idx, gt in enumerate(ground_truth_img):
200
- iou = intersection_over_union(
201
- torch.tensor(detection[3:]),
202
- torch.tensor(gt[3:]),
203
- box_format=box_format,
204
- )
205
-
206
- if iou > best_iou:
207
- best_iou = iou
208
- best_gt_idx = idx
209
-
210
- if best_iou > iou_threshold:
211
- # only detect ground truth detection once
212
- if amount_bboxes[detection[0]][best_gt_idx] == 0:
213
- # true positive and add this bounding box to seen
214
- TP[detection_idx] = 1
215
- amount_bboxes[detection[0]][best_gt_idx] = 1
216
- else:
217
- FP[detection_idx] = 1
218
-
219
- # if IOU is lower then the detection is a false positive
220
- else:
221
- FP[detection_idx] = 1
222
-
223
- TP_cumsum = torch.cumsum(TP, dim=0)
224
- FP_cumsum = torch.cumsum(FP, dim=0)
225
- recalls = TP_cumsum / (total_true_bboxes + epsilon)
226
- precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
227
- precisions = torch.cat((torch.tensor([1]), precisions))
228
- recalls = torch.cat((torch.tensor([0]), recalls))
229
- # torch.trapz for numerical integration
230
- average_precisions.append(torch.trapz(precisions, recalls))
231
-
232
- return sum(average_precisions) / len(average_precisions)
233
-
234
-
235
- def plot_image(image, boxes):
236
- """Plots predicted bounding boxes on the image"""
237
- cmap = plt.get_cmap("tab20b")
238
- class_labels = (
239
- config.COCO_LABELS if config.DATASET == "COCO" else config.PASCAL_CLASSES
240
- )
241
- colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
242
- im = np.array(image)
243
- height, width, _ = im.shape
244
-
245
- # Create figure and axes
246
- fig, ax = plt.subplots(1)
247
- # Display the image
248
- ax.imshow(im)
249
-
250
- # box[0] is x midpoint, box[2] is width
251
- # box[1] is y midpoint, box[3] is height
252
-
253
- # Create a Rectangle patch
254
- for box in boxes:
255
- assert (
256
- len(box) == 6
257
- ), "box should contain class pred, confidence, x, y, width, height"
258
- class_pred = box[0]
259
- box = box[2:]
260
- upper_left_x = box[0] - box[2] / 2
261
- upper_left_y = box[1] - box[3] / 2
262
- rect = patches.Rectangle(
263
- (upper_left_x * width, upper_left_y * height),
264
- box[2] * width,
265
- box[3] * height,
266
- linewidth=2,
267
- edgecolor=colors[int(class_pred)],
268
- facecolor="none",
269
- )
270
- # Add the patch to the Axes
271
- ax.add_patch(rect)
272
- plt.text(
273
- upper_left_x * width,
274
- upper_left_y * height,
275
- s=class_labels[int(class_pred)],
276
- color="white",
277
- verticalalignment="top",
278
- bbox={"color": colors[int(class_pred)], "pad": 0},
279
- )
280
-
281
- plt.show()
282
-
283
-
284
- def get_evaluation_bboxes(
285
- loader,
286
- model,
287
- iou_threshold,
288
- anchors,
289
- threshold,
290
- box_format="midpoint",
291
- device="cuda",
292
- ):
293
- # make sure model is in eval before get bboxes
294
- model.eval()
295
- train_idx = 0
296
- all_pred_boxes = []
297
- all_true_boxes = []
298
- for batch_idx, (x, labels) in enumerate(tqdm(loader)):
299
- x = x.to(device)
300
-
301
- with torch.no_grad():
302
- predictions = model(x)
303
-
304
- batch_size = x.shape[0]
305
- bboxes = [[] for _ in range(batch_size)]
306
- for i in range(3):
307
- S = predictions[i].shape[2]
308
- anchor = torch.tensor([*anchors[i]]).to(device) * S
309
- boxes_scale_i = cells_to_bboxes(predictions[i], anchor, S=S, is_preds=True)
310
- for idx, (box) in enumerate(boxes_scale_i):
311
- bboxes[idx] += box
312
-
313
- # we just want one bbox for each label, not one for each scale
314
- true_bboxes = cells_to_bboxes(labels[2], anchor, S=S, is_preds=False)
315
-
316
- for idx in range(batch_size):
317
- nms_boxes = non_max_suppression(
318
- bboxes[idx],
319
- iou_threshold=iou_threshold,
320
- threshold=threshold,
321
- box_format=box_format,
322
- )
323
-
324
- for nms_box in nms_boxes:
325
- all_pred_boxes.append([train_idx] + nms_box)
326
-
327
- for box in true_bboxes[idx]:
328
- if box[1] > threshold:
329
- all_true_boxes.append([train_idx] + box)
330
-
331
- train_idx += 1
332
-
333
- model.train()
334
- return all_pred_boxes, all_true_boxes
335
-
336
-
337
- def cells_to_bboxes(predictions, anchors, S, is_preds=True):
338
- """
339
- Scales the predictions coming from the model to
340
- be relative to the entire image such that they for example later
341
- can be plotted or.
342
- INPUT:
343
- predictions: tensor of size (N, 3, S, S, num_classes+5)
344
- anchors: the anchors used for the predictions
345
- S: the number of cells the image is divided in on the width (and height)
346
- is_preds: whether the input is predictions or the true bounding boxes
347
- OUTPUT:
348
- converted_bboxes: the converted boxes of sizes (N, num_anchors, S, S, 1+5) with class index,
349
- object score, bounding box coordinates
350
- """
351
- BATCH_SIZE = predictions.shape[0]
352
- num_anchors = len(anchors)
353
- box_predictions = predictions[..., 1:5]
354
- if is_preds:
355
- anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
356
- box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2])
357
- box_predictions[..., 2:] = torch.exp(box_predictions[..., 2:]) * anchors
358
- scores = torch.sigmoid(predictions[..., 0:1])
359
- best_class = torch.argmax(predictions[..., 5:], dim=-1).unsqueeze(-1)
360
- else:
361
- scores = predictions[..., 0:1]
362
- best_class = predictions[..., 5:6]
363
-
364
- cell_indices = (
365
- torch.arange(S)
366
- .repeat(predictions.shape[0], 3, S, 1)
367
- .unsqueeze(-1)
368
- .to(predictions.device)
369
- )
370
- x = 1 / S * (box_predictions[..., 0:1] + cell_indices)
371
- y = 1 / S * (box_predictions[..., 1:2] + cell_indices.permute(0, 1, 3, 2, 4))
372
- w_h = 1 / S * box_predictions[..., 2:4]
373
- converted_bboxes = torch.cat((best_class, scores, x, y, w_h), dim=-1).reshape(
374
- BATCH_SIZE, num_anchors * S * S, 6
375
- )
376
- return converted_bboxes.tolist()
377
-
378
-
379
- def check_class_accuracy(model, loader, threshold):
380
- model.eval()
381
- tot_class_preds, correct_class = 0, 0
382
- tot_noobj, correct_noobj = 0, 0
383
- tot_obj, correct_obj = 0, 0
384
-
385
- for idx, (x, y) in enumerate(tqdm(loader)):
386
- x = x.to(config.DEVICE)
387
- with torch.no_grad():
388
- out = model(x)
389
-
390
- for i in range(3):
391
- y[i] = y[i].to(config.DEVICE)
392
- obj = y[i][..., 0] == 1 # in paper this is Iobj_i
393
- noobj = y[i][..., 0] == 0 # in paper this is Iobj_i
394
-
395
- correct_class += torch.sum(
396
- torch.argmax(out[i][..., 5:][obj], dim=-1) == y[i][..., 5][obj]
397
- )
398
- tot_class_preds += torch.sum(obj)
399
-
400
- obj_preds = torch.sigmoid(out[i][..., 0]) > threshold
401
- correct_obj += torch.sum(obj_preds[obj] == y[i][..., 0][obj])
402
- tot_obj += torch.sum(obj)
403
- correct_noobj += torch.sum(obj_preds[noobj] == y[i][..., 0][noobj])
404
- tot_noobj += torch.sum(noobj)
405
-
406
- class_acc = (correct_class / (tot_class_preds + 1e-16)) * 100
407
- noobj_acc = (correct_noobj / (tot_noobj + 1e-16)) * 100
408
- obj_acc = (correct_obj / (tot_obj + 1e-16)) * 100
409
-
410
- print(f"Class accuracy is: {class_acc:2f}%")
411
- print(f"No obj accuracy is: {noobj_acc:2f}%")
412
- print(f"Obj accuracy is: {obj_acc:2f}%")
413
- model.train()
414
- return class_acc, noobj_acc, obj_acc
415
-
416
-
417
- def get_mean_std(loader):
418
- # var[X] = E[X**2] - E[X]**2
419
- channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0
420
-
421
- for data, _ in tqdm(loader):
422
- channels_sum += torch.mean(data, dim=[0, 2, 3])
423
- channels_sqrd_sum += torch.mean(data**2, dim=[0, 2, 3])
424
- num_batches += 1
425
-
426
- mean = channels_sum / num_batches
427
- std = (channels_sqrd_sum / num_batches - mean**2) ** 0.5
428
-
429
- return mean, std
430
-
431
-
432
- def save_checkpoint(model, optimizer, filename="my_checkpoint.pth.tar"):
433
- print("=> Saving checkpoint")
434
- checkpoint = {
435
- "state_dict": model.state_dict(),
436
- "optimizer": optimizer.state_dict(),
437
- }
438
- torch.save(checkpoint, filename)
439
-
440
-
441
- def load_checkpoint(checkpoint_file, model, optimizer, lr):
442
- print("=> Loading checkpoint")
443
- checkpoint = torch.load(checkpoint_file, map_location=config.DEVICE)
444
- model.load_state_dict(checkpoint["state_dict"])
445
- optimizer.load_state_dict(checkpoint["optimizer"])
446
-
447
- # If we don't do this then it will just have learning rate of old checkpoint
448
- # and it will lead to many hours of debugging \:
449
- for param_group in optimizer.param_groups:
450
- param_group["lr"] = lr
451
-
452
-
453
- def get_loaders(train_csv_path, test_csv_path):
454
- from dataset import YOLODataset
455
-
456
- IMAGE_SIZE = config.IMAGE_SIZE
457
- train_dataset = YOLODataset(
458
- train_csv_path,
459
- transform=config.train_transforms,
460
- S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
461
- img_dir=config.IMG_DIR,
462
- label_dir=config.LABEL_DIR,
463
- anchors=config.ANCHORS,
464
- )
465
- test_dataset = YOLODataset(
466
- test_csv_path,
467
- transform=config.test_transforms,
468
- S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
469
- img_dir=config.IMG_DIR,
470
- label_dir=config.LABEL_DIR,
471
- anchors=config.ANCHORS,
472
- )
473
- train_loader = DataLoader(
474
- dataset=train_dataset,
475
- batch_size=config.BATCH_SIZE,
476
- num_workers=config.NUM_WORKERS,
477
- pin_memory=config.PIN_MEMORY,
478
- shuffle=True,
479
- drop_last=False,
480
- )
481
- test_loader = DataLoader(
482
- dataset=test_dataset,
483
- batch_size=config.BATCH_SIZE,
484
- num_workers=config.NUM_WORKERS,
485
- pin_memory=config.PIN_MEMORY,
486
- shuffle=False,
487
- drop_last=False,
488
- )
489
-
490
- train_eval_dataset = YOLODataset(
491
- train_csv_path,
492
- transform=config.test_transforms,
493
- S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
494
- img_dir=config.IMG_DIR,
495
- label_dir=config.LABEL_DIR,
496
- anchors=config.ANCHORS,
497
- )
498
- train_eval_loader = DataLoader(
499
- dataset=train_eval_dataset,
500
- batch_size=config.BATCH_SIZE,
501
- num_workers=config.NUM_WORKERS,
502
- pin_memory=config.PIN_MEMORY,
503
- shuffle=False,
504
- drop_last=False,
505
- )
506
-
507
- return train_loader, test_loader, train_eval_loader
508
-
509
-
510
- def plot_couple_examples(model, loader, thresh, iou_thresh, anchors):
511
- model.eval()
512
- x, y = next(iter(loader))
513
- x = x.to("cuda")
514
- with torch.no_grad():
515
- out = model(x)
516
- bboxes = [[] for _ in range(x.shape[0])]
517
- for i in range(3):
518
- batch_size, A, S, _, _ = out[i].shape
519
- anchor = anchors[i]
520
- boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
521
- for idx, (box) in enumerate(boxes_scale_i):
522
- bboxes[idx] += box
523
-
524
- model.train()
525
-
526
- for i in range(batch_size // 4):
527
- nms_boxes = non_max_suppression(
528
- bboxes[i],
529
- iou_threshold=iou_thresh,
530
- threshold=thresh,
531
- box_format="midpoint",
532
- )
533
- plot_image(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes)
534
-
535
-
536
- def seed_everything(seed=42):
537
- os.environ["PYTHONHASHSEED"] = str(seed)
538
- random.seed(seed)
539
- np.random.seed(seed)
540
- torch.manual_seed(seed)
541
- torch.cuda.manual_seed(seed)
542
- torch.cuda.manual_seed_all(seed)
543
- torch.backends.cudnn.deterministic = True
544
- torch.backends.cudnn.benchmark = False
545
-
546
-
547
- def clip_coords(boxes, img_shape):
548
- # Clip bounding xyxy bounding boxes to image shape (height, width)
549
- boxes[:, 0].clamp_(0, img_shape[1]) # x1
550
- boxes[:, 1].clamp_(0, img_shape[0]) # y1
551
- boxes[:, 2].clamp_(0, img_shape[1]) # x2
552
- boxes[:, 3].clamp_(0, img_shape[0]) # y2
553
-
554
-
555
- def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
556
- # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
557
- y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
558
- y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x
559
- y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y
560
- y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw # bottom right x
561
- y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh # bottom right y
562
- return y
563
-
564
-
565
- def xyn2xy(x, w=640, h=640, padw=0, padh=0):
566
- # Convert normalized segments into pixel segments, shape (n,2)
567
- y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
568
- y[..., 0] = w * x[..., 0] + padw # top left x
569
- y[..., 1] = h * x[..., 1] + padh # top left y
570
- return y
571
-
572
-
573
- def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
574
- # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
575
- if clip:
576
- clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip
577
- y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
578
- y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w # x center
579
- y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h # y center
580
- y[..., 2] = (x[..., 2] - x[..., 0]) / w # width
581
- y[..., 3] = (x[..., 3] - x[..., 1]) / h # height
582
- return y
583
-
584
-
585
- def clip_boxes(boxes, shape):
586
- # Clip boxes (xyxy) to image shape (height, width)
587
- if isinstance(boxes, torch.Tensor): # faster individually
588
- boxes[..., 0].clamp_(0, shape[1]) # x1
589
- boxes[..., 1].clamp_(0, shape[0]) # y1
590
- boxes[..., 2].clamp_(0, shape[1]) # x2
591
- boxes[..., 3].clamp_(0, shape[0]) # y2
592
- else: # np.array (faster grouped)
593
- boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2
594
- boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2
595
-
596
-
597
- def save_result(image, boxes, index):
598
- """Plots predicted bounding boxes on the image"""
599
- cmap = plt.get_cmap("tab20b")
600
- class_labels = config.PASCAL_CLASSES
601
-
602
- colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
603
- im = np.array(image)
604
- height, width, _ = im.shape
605
-
606
- # Create figure and axes
607
- fig, ax = plt.subplots(1)
608
- # Display the image
609
- ax.imshow(im)
610
-
611
- # box[0] is x midpoint, box[2] is width
612
- # box[1] is y midpoint, box[3] is height
613
-
614
- # Create a Rectangle patch
615
- for box in boxes:
616
- assert (
617
- len(box) == 6
618
- ), "box should contain class pred, confidence, x, y, width, height"
619
- class_pred = box[0]
620
- box = box[2:]
621
- upper_left_x = box[0] - box[2] / 2
622
- upper_left_y = box[1] - box[3] / 2
623
- rect = patches.Rectangle(
624
- (upper_left_x * width, upper_left_y * height),
625
- box[2] * width,
626
- box[3] * height,
627
- linewidth=2,
628
- edgecolor=colors[int(class_pred)],
629
- facecolor="none",
630
- )
631
- # Add the patch to the Axes
632
- ax.add_patch(rect)
633
- plt.text(
634
- upper_left_x * width,
635
- upper_left_y * height,
636
- s=class_labels[int(class_pred)],
637
- color="white",
638
- verticalalignment="top",
639
- bbox={"color": colors[int(class_pred)], "pad": 0},
640
- )
641
- ax.grid(False)
642
- ax.set_axis_off()
643
-
644
- plt.savefig(f"output/img{index}.png")
645
-
646
-
647
- def generate_result(model, data, thresh, iou_thresh, anchors):
648
- model.eval()
649
- x = data
650
- # x = x.to("cuda")
651
- with torch.no_grad():
652
- out = model(x)
653
- bboxes = [[] for _ in range(x.shape[0])]
654
- for i in range(3):
655
- batch_size, A, S, _, _ = out[i].shape
656
- anchor = anchors[i]
657
- boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
658
- for idx, (box) in enumerate(boxes_scale_i):
659
- bboxes[idx] += box
660
-
661
- for i in range(batch_size):
662
- nms_boxes = non_max_suppression(
663
- bboxes[i],
664
- iou_threshold=iou_thresh,
665
- threshold=thresh,
666
- box_format="midpoint",
667
- )
668
- save_result(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes, i)