Spaces:

RaviNaik
/

ERA-SESSION13

Sleeping

App Files Files Community

ravi.naik commited on Aug 18, 2023

Commit

3129974

•

1 Parent(s): adde10a

Directory name bug fix

Browse files

Files changed (7) hide show

utils/gradcam.py +21 -52
utils/utils.py +74 -0
utils/utils/common.py +0 -185
utils/utils/data.py +0 -294
utils/utils/gradcam.py +0 -36
utils/utils/loss.py +0 -90
utils/utils/utils.py +0 -668

utils/gradcam.py CHANGED Viewed

@@ -1,67 +1,36 @@
 import numpy as np
-from pytorch_grad_cam import GradCAM
-from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
 from pytorch_grad_cam.utils.image import show_cam_on_image
 import matplotlib.pyplot as plt
-def generate_gradcam(model, target_layers, images, labels, rgb_imgs):
     results = []
-    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True)
-    for image, label, np_image in zip(images, labels, rgb_imgs):
-        targets = [ClassifierOutputTarget(label.item())]
-        # You can also pass aug_smooth=True and eigen_smooth=True, to apply smoothing.
-        grayscale_cam = cam(
-            input_tensor=image.unsqueeze(0), targets=targets, aug_smooth=True
-        )
-        # In this example grayscale_cam has only one image in the batch:
         grayscale_cam = grayscale_cam[0, :]
-        visualization = show_cam_on_image(
-            np_image / np_image.max(), grayscale_cam, use_rgb=True
         )
-        results.append(visualization)
     return results
-def visualize_gradcam(misimgs, mistgts, mispreds, classes):
-    fig, axes = plt.subplots(len(misimgs) // 2, 2)
-    fig.tight_layout()
-    for ax, img, tgt, pred in zip(axes.ravel(), misimgs, mistgts, mispreds):
-        ax.imshow(img)
-        ax.set_title(f"{classes[tgt]} | {classes[pred]}")
-        ax.grid(False)
-        ax.set_axis_off()
-    plt.show()
-def plot_gradcam(model, data, classes, target_layers, number_of_samples, inv_normalize=None, targets=None, transparency = 0.60, figsize=(10,10), rows=2, cols=5):
     fig = plt.figure(figsize=figsize)
-    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True)
-    for i in range(number_of_samples):
-            plt.subplot(rows, cols, i + 1)
-            input_tensor = data[i][0]
-            # Get the activations of the layer for the images
-            grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
-            grayscale_cam = grayscale_cam[0, :]
-            # Get back the original image
-            img = input_tensor.squeeze(0).to('cpu')
-            if inv_normalize is not None:
-                img = inv_normalize(img)
-            rgb_img = np.transpose(img, (1, 2, 0))
-            rgb_img = rgb_img.numpy()
-            # Mix the activations on the original image
-            visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency)
-            # Display the images on the plot
-            plt.imshow(visualization)
-            plt.title(f"Label: {classes[data[i][1].item()]} \n Prediction: {classes[data[i][2].item()]}")
-            plt.xticks([])
-            plt.yticks([])

 import numpy as np
+from pytorch_grad_cam import EigenCAM
 from pytorch_grad_cam.utils.image import show_cam_on_image
 import matplotlib.pyplot as plt
+def generate_gradcam(model, target_layers, images, use_cuda=True, transparency=0.6):
     results = []
+    targets = None
+    cam = EigenCAM(model, target_layers, use_cuda=use_cuda)
+    for image in images:
+        input_tensor = image.unsqueeze(0)
+        grayscale_cam = cam(input_tensor, targets=targets)
         grayscale_cam = grayscale_cam[0, :]
+        img = input_tensor.squeeze(0).to("cpu")
+        rgb_img = np.transpose(img, (1, 2, 0))
+        rgb_img = rgb_img.numpy()
+        cam_image = show_cam_on_image(
+            rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency
         )
+        results.append(cam_image)
     return results
+def visualize_gradcam(images, figsize=(10, 10), rows=2, cols=5):
     fig = plt.figure(figsize=figsize)
+    for i in range(len(images)):
+        plt.subplot(rows, cols, i + 1)
+        plt.imshow(images[i])
+        plt.xticks([])
+        plt.yticks([])

utils/utils.py CHANGED Viewed

@@ -592,3 +592,77 @@ def clip_boxes(boxes, shape):
     else:  # np.array (faster grouped)
         boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
         boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2

     else:  # np.array (faster grouped)
         boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
         boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
+def save_result(image, boxes, index):
+    """Plots predicted bounding boxes on the image"""
+    cmap = plt.get_cmap("tab20b")
+    class_labels = config.PASCAL_CLASSES
+    colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
+    im = np.array(image)
+    height, width, _ = im.shape
+    # Create figure and axes
+    fig, ax = plt.subplots(1)
+    # Display the image
+    ax.imshow(im)
+    # box[0] is x midpoint, box[2] is width
+    # box[1] is y midpoint, box[3] is height
+    # Create a Rectangle patch
+    for box in boxes:
+        assert (
+            len(box) == 6
+        ), "box should contain class pred, confidence, x, y, width, height"
+        class_pred = box[0]
+        box = box[2:]
+        upper_left_x = box[0] - box[2] / 2
+        upper_left_y = box[1] - box[3] / 2
+        rect = patches.Rectangle(
+            (upper_left_x * width, upper_left_y * height),
+            box[2] * width,
+            box[3] * height,
+            linewidth=2,
+            edgecolor=colors[int(class_pred)],
+            facecolor="none",
+        )
+        # Add the patch to the Axes
+        ax.add_patch(rect)
+        plt.text(
+            upper_left_x * width,
+            upper_left_y * height,
+            s=class_labels[int(class_pred)],
+            color="white",
+            verticalalignment="top",
+            bbox={"color": colors[int(class_pred)], "pad": 0},
+        )
+    ax.grid(False)
+    ax.set_axis_off()
+    plt.savefig(f"output/img{index}.png")
+def generate_result(model, data, thresh, iou_thresh, anchors):
+    model.eval()
+    x = data
+    # x = x.to("cuda")
+    with torch.no_grad():
+        out = model(x)
+        bboxes = [[] for _ in range(x.shape[0])]
+        for i in range(3):
+            batch_size, A, S, _, _ = out[i].shape
+            anchor = anchors[i]
+            boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
+            for idx, (box) in enumerate(boxes_scale_i):
+                bboxes[idx] += box
+    for i in range(batch_size):
+        nms_boxes = non_max_suppression(
+            bboxes[i],
+            iou_threshold=iou_thresh,
+            threshold=thresh,
+            box_format="midpoint",
+        )
+        save_result(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes, i)

utils/utils/common.py DELETED Viewed

@@ -1,185 +0,0 @@
-import numpy as np
-import random
-import matplotlib.pyplot as plt
-import torch
-import torchvision
-from torchinfo import summary
-from torch_lr_finder import LRFinder
-def find_lr(model, optimizer, criterion, device, trainloader, numiter, startlr, endlr):
-    lr_finder = LRFinder(
-        model=model, optimizer=optimizer, criterion=criterion, device=device
-    )
-    lr_finder.range_test(
-        train_loader=trainloader,
-        start_lr=startlr,
-        end_lr=endlr,
-        num_iter=numiter,
-        step_mode="exp",
-    )
-    lr_finder.plot()
-    lr_finder.reset()
-def one_cycle_lr(optimizer, maxlr, steps, epochs):
-    scheduler = torch.optim.lr_scheduler.OneCycleLR(
-        optimizer=optimizer,
-        max_lr=maxlr,
-        steps_per_epoch=steps,
-        epochs=epochs,
-        pct_start=5 / epochs,
-        div_factor=100,
-        three_phase=False,
-        final_div_factor=100,
-        anneal_strategy="linear",
-    )
-    return scheduler
-def show_random_images_for_each_class(train_data, num_images_per_class=16):
-    for c, cls in enumerate(train_data.classes):
-        rand_targets = random.sample(
-            [n for n, x in enumerate(train_data.targets) if x == c],
-            k=num_images_per_class,
-        )
-        show_img_grid(np.transpose(train_data.data[rand_targets], axes=(0, 3, 1, 2)))
-        plt.title(cls)
-def show_img_grid(data):
-    try:
-        grid_img = torchvision.utils.make_grid(data.cpu().detach())
-    except:
-        data = torch.from_numpy(data)
-        grid_img = torchvision.utils.make_grid(data)
-    plt.figure(figsize=(10, 10))
-    plt.imshow(grid_img.permute(1, 2, 0))
-def show_random_images(data_loader):
-    data, target = next(iter(data_loader))
-    show_img_grid(data)
-def show_model_summary(model, batch_size):
-    summary(
-        model=model,
-        input_size=(batch_size, 3, 32, 32),
-        col_names=["input_size", "output_size", "num_params", "kernel_size"],
-        verbose=1,
-    )
-def lossacc_plots(results):
-    plt.plot(results["epoch"], results["trainloss"])
-    plt.plot(results["epoch"], results["testloss"])
-    plt.legend(["Train Loss", "Validation Loss"])
-    plt.xlabel("Epochs")
-    plt.ylabel("Loss")
-    plt.title("Loss vs Epochs")
-    plt.show()
-    plt.plot(results["epoch"], results["trainacc"])
-    plt.plot(results["epoch"], results["testacc"])
-    plt.legend(["Train Acc", "Validation Acc"])
-    plt.xlabel("Epochs")
-    plt.ylabel("Accuracy")
-    plt.title("Accuracy vs Epochs")
-    plt.show()
-def lr_plots(results, length):
-    plt.plot(range(length), results["lr"])
-    plt.xlabel("Epochs")
-    plt.ylabel("Learning Rate")
-    plt.title("Learning Rate vs Epochs")
-    plt.show()
-def get_misclassified(model, testloader, device, mis_count=10):
-    misimgs, mistgts, mispreds = [], [], []
-    with torch.no_grad():
-        for data, target in testloader:
-            data, target = data.to(device), target.to(device)
-            output = model(data)
-            pred = output.argmax(dim=1, keepdim=True)
-            misclassified = torch.argwhere(pred.squeeze() != target).squeeze()
-            for idx in misclassified:
-                if len(misimgs) >= mis_count:
-                    break
-                misimgs.append(data[idx])
-                mistgts.append(target[idx])
-                mispreds.append(pred[idx].squeeze())
-    return misimgs, mistgts, mispreds
-# def plot_misclassified(misimgs, mistgts, mispreds, classes):
-#     fig, axes = plt.subplots(len(misimgs) // 2, 2)
-#     fig.tight_layout()
-#     for ax, img, tgt, pred in zip(axes.ravel(), misimgs, mistgts, mispreds):
-#         ax.imshow((img / img.max()).permute(1, 2, 0).cpu())
-#         ax.set_title(f"{classes[tgt]} | {classes[pred]}")
-#         ax.grid(False)
-#         ax.set_axis_off()
-#     plt.show()
-def get_misclassified_data(model, device, test_loader, count):
-    """
-    Function to run the model on test set and return misclassified images
-    :param model: Network Architecture
-    :param device: CPU/GPU
-    :param test_loader: DataLoader for test set
-    """
-    # Prepare the model for evaluation i.e. drop the dropout layer
-    model.eval()
-    # List to store misclassified Images
-    misclassified_data = []
-    # Reset the gradients
-    with torch.no_grad():
-        # Extract images, labels in a batch
-        for data, target in test_loader:
-            # Migrate the data to the device
-            data, target = data.to(device), target.to(device)
-            # Extract single image, label from the batch
-            for image, label in zip(data, target):
-                # Add batch dimension to the image
-                image = image.unsqueeze(0)
-                # Get the model prediction on the image
-                output = model(image)
-                # Convert the output from one-hot encoding to a value
-                pred = output.argmax(dim=1, keepdim=True)
-                # If prediction is incorrect, append the data
-                if pred != label:
-                    misclassified_data.append((image, label, pred))
-            if len(misclassified_data) >= count:
-                        break
-    return misclassified_data[:count]
-def plot_misclassified(data, classes, size=(10, 10), rows=2, cols=5, inv_normalize=None):
-    fig = plt.figure(figsize=size)
-    number_of_samples = len(data)
-    for i in range(number_of_samples):
-        plt.subplot(rows, cols, i + 1)
-        img = data[i][0].squeeze().to('cpu')
-        if inv_normalize is not None:
-            img = inv_normalize(img)
-        plt.imshow(np.transpose(img, (1, 2, 0)))
-        plt.title(f"Label: {classes[data[i][1].item()]} \n Prediction: {classes[data[i][2].item()]}")
-        plt.xticks([])
-        plt.yticks([])

utils/utils/data.py DELETED Viewed

@@ -1,294 +0,0 @@
-"""
-Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
-"""
-import numpy as np
-import os
-import pandas as pd
-import torch
-import random
-from PIL import Image, ImageFile
-import lightning as L
-from torch.utils.data import Dataset, DataLoader
-import config as config
-from utils.utils import xywhn2xyxy, xyxy2xywhn
-from utils.utils import (
-    cells_to_bboxes,
-    iou_width_height as iou,
-    non_max_suppression as nms,
-    plot_image,
-)
-ImageFile.LOAD_TRUNCATED_IMAGES = True
-class YOLODataset(Dataset):
-    def __init__(
-        self,
-        csv_file,
-        img_dir,
-        label_dir,
-        anchors,
-        image_size=416,
-        S=[13, 26, 52],
-        C=20,
-        transform=None,
-    ):
-        self.annotations = pd.read_csv(csv_file)
-        self.img_dir = img_dir
-        self.label_dir = label_dir
-        self.image_size = image_size
-        self.mosaic_border = [image_size // 2, image_size // 2]
-        self.transform = transform
-        self.S = S
-        self.anchors = torch.tensor(
-            anchors[0] + anchors[1] + anchors[2]
-        )  # for all 3 scales
-        self.num_anchors = self.anchors.shape[0]
-        self.num_anchors_per_scale = self.num_anchors // 3
-        self.C = C
-        self.ignore_iou_thresh = 0.5
-    def __len__(self):
-        return len(self.annotations)
-    def load_mosaic(self, index):
-        # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
-        labels4 = []
-        s = self.image_size
-        yc, xc = (
-            int(random.uniform(x, 2 * s - x)) for x in self.mosaic_border
-        )  # mosaic center x, y
-        indices = [index] + random.choices(
-            range(len(self)), k=3
-        )  # 3 additional image indices
-        random.shuffle(indices)
-        for i, index in enumerate(indices):
-            # Load image
-            label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
-            bboxes = np.roll(
-                np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
-            ).tolist()
-            img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
-            img = np.array(Image.open(img_path).convert("RGB"))
-            h, w = img.shape[0], img.shape[1]
-            labels = np.array(bboxes)
-            # place img in img4
-            if i == 0:  # top left
-                img4 = np.full(
-                    (s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8
-                )  # base image with 4 tiles
-                x1a, y1a, x2a, y2a = (
-                    max(xc - w, 0),
-                    max(yc - h, 0),
-                    xc,
-                    yc,
-                )  # xmin, ymin, xmax, ymax (large image)
-                x1b, y1b, x2b, y2b = (
-                    w - (x2a - x1a),
-                    h - (y2a - y1a),
-                    w,
-                    h,
-                )  # xmin, ymin, xmax, ymax (small image)
-            elif i == 1:  # top right
-                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
-                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
-            elif i == 2:  # bottom left
-                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
-                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
-            elif i == 3:  # bottom right
-                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
-                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
-            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
-            padw = x1a - x1b
-            padh = y1a - y1b
-            # Labels
-            if labels.size:
-                labels[:, :-1] = xywhn2xyxy(
-                    labels[:, :-1], w, h, padw, padh
-                )  # normalized xywh to pixel xyxy format
-            labels4.append(labels)
-        # Concat/clip labels
-        labels4 = np.concatenate(labels4, 0)
-        for x in (labels4[:, :-1],):
-            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
-        # img4, labels4 = replicate(img4, labels4)  # replicate
-        labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s)
-        labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1)
-        labels4 = labels4[labels4[:, 2] > 0]
-        labels4 = labels4[labels4[:, 3] > 0]
-        return img4, labels4
-    def __getitem__(self, index):
-        if random.random() >= config.P_MOSAIC:
-            image, bboxes = self.load_mosaic(index)
-        else:
-            label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
-            bboxes = np.roll(
-                np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
-            ).tolist()
-            img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
-            image = np.array(Image.open(img_path).convert("RGB"))
-        if self.transform:
-            augmentations = self.transform(image=image, bboxes=bboxes)
-            image = augmentations["image"]
-            bboxes = augmentations["bboxes"]
-        # Below assumes 3 scale predictions (as paper) and same num of anchors per scale
-        targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
-        for box in bboxes:
-            iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
-            anchor_indices = iou_anchors.argsort(descending=True, dim=0)
-            x, y, width, height, class_label = box
-            has_anchor = [False] * 3  # each scale should have one anchor
-            for anchor_idx in anchor_indices:
-                scale_idx = anchor_idx // self.num_anchors_per_scale
-                anchor_on_scale = anchor_idx % self.num_anchors_per_scale
-                S = self.S[scale_idx]
-                i, j = int(S * y), int(S * x)  # which cell
-                anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
-                if not anchor_taken and not has_anchor[scale_idx]:
-                    targets[scale_idx][anchor_on_scale, i, j, 0] = 1
-                    x_cell, y_cell = S * x - j, S * y - i  # both between [0,1]
-                    width_cell, height_cell = (
-                        width * S,
-                        height * S,
-                    )  # can be greater than 1 since it's relative to cell
-                    box_coordinates = torch.tensor(
-                        [x_cell, y_cell, width_cell, height_cell]
-                    )
-                    targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
-                    targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
-                    has_anchor[scale_idx] = True
-                elif (
-                    not anchor_taken
-                    and iou_anchors[anchor_idx] > self.ignore_iou_thresh
-                ):
-                    targets[scale_idx][
-                        anchor_on_scale, i, j, 0
-                    ] = -1  # ignore prediction
-        return image, tuple(targets)
-def test():
-    anchors = config.ANCHORS
-    transform = config.test_transforms
-    dataset = YOLODataset(
-        "COCO/train.csv",
-        "COCO/images/images/",
-        "COCO/labels/labels_new/",
-        S=[13, 26, 52],
-        anchors=anchors,
-        transform=transform,
-    )
-    S = [13, 26, 52]
-    scaled_anchors = torch.tensor(anchors) / (
-        1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
-    )
-    loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
-    for x, y in loader:
-        boxes = []
-        for i in range(y[0].shape[1]):
-            anchor = scaled_anchors[i]
-            print(anchor.shape)
-            print(y[i].shape)
-            boxes += cells_to_bboxes(
-                y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
-            )[0]
-        boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
-        print(boxes)
-        plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
-class PascalDataModule(L.LightningDataModule):
-    def __init__(
-        self,
-        train_csv_path=None,
-        test_csv_path=None,
-        batch_size=512,
-        shuffle=True,
-        num_workers=4,
-    ) -> None:
-        super().__init__()
-        self.train_csv_path = train_csv_path
-        self.test_csv_path = test_csv_path
-        self.batch_size = batch_size
-        self.shuffle = shuffle
-        self.num_workers = num_workers
-        self.IMAGE_SIZE = config.IMAGE_SIZE
-    def prepare_data(self) -> None:
-        pass
-    def setup(self, stage=None):
-        self.train_dataset = YOLODataset(
-            self.train_csv_path,
-            transform=config.train_transforms,
-            S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
-            img_dir=config.IMG_DIR,
-            label_dir=config.LABEL_DIR,
-            anchors=config.ANCHORS,
-        )
-        self.val_dataset = YOLODataset(
-            self.test_csv_path,
-            transform=config.test_transforms,
-            S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
-            img_dir=config.IMG_DIR,
-            label_dir=config.LABEL_DIR,
-            anchors=config.ANCHORS,
-        )
-        self.test_dataset = YOLODataset(
-            self.test_csv_path,
-            transform=config.test_transforms,
-            S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
-            img_dir=config.IMG_DIR,
-            label_dir=config.LABEL_DIR,
-            anchors=config.ANCHORS,
-        )
-    def train_dataloader(self):
-        return DataLoader(
-            dataset=self.train_dataset,
-            batch_size=config.BATCH_SIZE,
-            num_workers=config.NUM_WORKERS,
-            pin_memory=config.PIN_MEMORY,
-            shuffle=True,
-            drop_last=False,
-        )
-    def val_dataloader(self):
-        return DataLoader(
-            dataset=self.val_dataset,
-            batch_size=config.BATCH_SIZE,
-            num_workers=config.NUM_WORKERS,
-            pin_memory=config.PIN_MEMORY,
-            shuffle=False,
-            drop_last=False,
-        )
-    def test_dataloader(self):
-        return DataLoader(
-            dataset=self.test_dataset,
-            batch_size=config.BATCH_SIZE,
-            num_workers=config.NUM_WORKERS,
-            pin_memory=config.PIN_MEMORY,
-            shuffle=False,
-            drop_last=False,
-        )

utils/utils/gradcam.py DELETED Viewed

@@ -1,36 +0,0 @@
-import numpy as np
-from pytorch_grad_cam import EigenCAM
-from pytorch_grad_cam.utils.image import show_cam_on_image
-import matplotlib.pyplot as plt
-def generate_gradcam(model, target_layers, images, use_cuda=True, transparency=0.6):
-    results = []
-    targets = None
-    cam = EigenCAM(model, target_layers, use_cuda=use_cuda)
-    for image in images:
-        input_tensor = image.unsqueeze(0)
-        grayscale_cam = cam(input_tensor, targets=targets)
-        grayscale_cam = grayscale_cam[0, :]
-        img = input_tensor.squeeze(0).to("cpu")
-        rgb_img = np.transpose(img, (1, 2, 0))
-        rgb_img = rgb_img.numpy()
-        cam_image = show_cam_on_image(
-            rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency
-        )
-        results.append(cam_image)
-    return results
-def visualize_gradcam(images, figsize=(10, 10), rows=2, cols=5):
-    fig = plt.figure(figsize=figsize)
-    for i in range(len(images)):
-        plt.subplot(rows, cols, i + 1)
-        plt.imshow(images[i])
-        plt.xticks([])
-        plt.yticks([])

utils/utils/loss.py DELETED Viewed

@@ -1,90 +0,0 @@
-"""
-Implementation of Yolo Loss Function similar to the one in Yolov3 paper,
-the difference from what I can tell is I use CrossEntropy for the classes
-instead of BinaryCrossEntropy.
-"""
-import random
-import torch
-import torch.nn as nn
-from utils.utils import intersection_over_union
-class YoloLoss(nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.mse = nn.MSELoss()
-        self.bce = nn.BCEWithLogitsLoss()
-        self.entropy = nn.CrossEntropyLoss()
-        self.sigmoid = nn.Sigmoid()
-        # Constants signifying how much to pay for each respective part of the loss
-        self.lambda_class = 1
-        self.lambda_noobj = 10
-        self.lambda_obj = 1
-        self.lambda_box = 10
-    def forward(self, predictions, target, anchors):
-        # Check where obj and noobj (we ignore if target == -1)
-        obj = target[..., 0] == 1  # in paper this is Iobj_i
-        noobj = target[..., 0] == 0  # in paper this is Inoobj_i
-        # ======================= #
-        #   FOR NO OBJECT LOSS    #
-        # ======================= #
-        no_object_loss = self.bce(
-            (predictions[..., 0:1][noobj]),
-            (target[..., 0:1][noobj]),
-        )
-        # ==================== #
-        #   FOR OBJECT LOSS    #
-        # ==================== #
-        anchors = anchors.reshape(1, 3, 1, 1, 2)
-        box_preds = torch.cat(
-            [
-                self.sigmoid(predictions[..., 1:3]),
-                torch.exp(predictions[..., 3:5]) * anchors,
-            ],
-            dim=-1,
-        )
-        ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj]).detach()
-        # ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj])
-        object_loss = self.mse(
-            self.sigmoid(predictions[..., 0:1][obj]), ious * target[..., 0:1][obj]
-        )
-        # ======================== #
-        #   FOR BOX COORDINATES    #
-        # ======================== #
-        predictions[..., 1:3] = self.sigmoid(predictions[..., 1:3])  # x,y coordinates
-        target[..., 3:5] = torch.log(
-            (1e-16 + target[..., 3:5] / anchors)
-        )  # width, height coordinates
-        box_loss = self.mse(predictions[..., 1:5][obj], target[..., 1:5][obj])
-        # ================== #
-        #   FOR CLASS LOSS   #
-        # ================== #
-        class_loss = self.entropy(
-            (predictions[..., 5:][obj]),
-            (target[..., 5][obj].long()),
-        )
-        # print("__________________________________")
-        # print(self.lambda_box * box_loss)
-        # print(self.lambda_obj * object_loss)
-        # print(self.lambda_noobj * no_object_loss)
-        # print(self.lambda_class * class_loss)
-        # print("\n")
-        return (
-            self.lambda_box * box_loss
-            + self.lambda_obj * object_loss
-            + self.lambda_noobj * no_object_loss
-            + self.lambda_class * class_loss
-        )

utils/utils/utils.py DELETED Viewed

@@ -1,668 +0,0 @@
-import config
-import matplotlib.pyplot as plt
-import matplotlib.patches as patches
-import numpy as np
-import os
-import random
-import torch
-from collections import Counter
-from torch.utils.data import DataLoader
-from tqdm import tqdm
-def iou_width_height(boxes1, boxes2):
-    """
-    Parameters:
-        boxes1 (tensor): width and height of the first bounding boxes
-        boxes2 (tensor): width and height of the second bounding boxes
-    Returns:
-        tensor: Intersection over union of the corresponding boxes
-    """
-    intersection = torch.min(boxes1[..., 0], boxes2[..., 0]) * torch.min(
-        boxes1[..., 1], boxes2[..., 1]
-    )
-    union = (
-        boxes1[..., 0] * boxes1[..., 1] + boxes2[..., 0] * boxes2[..., 1] - intersection
-    )
-    return intersection / union
-def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
-    """
-    Video explanation of this function:
-    https://youtu.be/XXYG5ZWtjj0
-    This function calculates intersection over union (iou) given pred boxes
-    and target boxes.
-    Parameters:
-        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
-        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
-        box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
-    Returns:
-        tensor: Intersection over union for all examples
-    """
-    if box_format == "midpoint":
-        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
-        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
-        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
-        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
-        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
-        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
-        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
-        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
-    if box_format == "corners":
-        box1_x1 = boxes_preds[..., 0:1]
-        box1_y1 = boxes_preds[..., 1:2]
-        box1_x2 = boxes_preds[..., 2:3]
-        box1_y2 = boxes_preds[..., 3:4]
-        box2_x1 = boxes_labels[..., 0:1]
-        box2_y1 = boxes_labels[..., 1:2]
-        box2_x2 = boxes_labels[..., 2:3]
-        box2_y2 = boxes_labels[..., 3:4]
-    x1 = torch.max(box1_x1, box2_x1)
-    y1 = torch.max(box1_y1, box2_y1)
-    x2 = torch.min(box1_x2, box2_x2)
-    y2 = torch.min(box1_y2, box2_y2)
-    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
-    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
-    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
-    return intersection / (box1_area + box2_area - intersection + 1e-6)
-def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
-    """
-    Video explanation of this function:
-    https://youtu.be/YDkjWEN8jNA
-    Does Non Max Suppression given bboxes
-    Parameters:
-        bboxes (list): list of lists containing all bboxes with each bboxes
-        specified as [class_pred, prob_score, x1, y1, x2, y2]
-        iou_threshold (float): threshold where predicted bboxes is correct
-        threshold (float): threshold to remove predicted bboxes (independent of IoU)
-        box_format (str): "midpoint" or "corners" used to specify bboxes
-    Returns:
-        list: bboxes after performing NMS given a specific IoU threshold
-    """
-    assert type(bboxes) == list
-    bboxes = [box for box in bboxes if box[1] > threshold]
-    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
-    bboxes_after_nms = []
-    while bboxes:
-        chosen_box = bboxes.pop(0)
-        bboxes = [
-            box
-            for box in bboxes
-            if box[0] != chosen_box[0]
-            or intersection_over_union(
-                torch.tensor(chosen_box[2:]),
-                torch.tensor(box[2:]),
-                box_format=box_format,
-            )
-            < iou_threshold
-        ]
-        bboxes_after_nms.append(chosen_box)
-    return bboxes_after_nms
-def mean_average_precision(
-    pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
-):
-    """
-    Video explanation of this function:
-    https://youtu.be/FppOzcDvaDI
-    This function calculates mean average precision (mAP)
-    Parameters:
-        pred_boxes (list): list of lists containing all bboxes with each bboxes
-        specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
-        true_boxes (list): Similar as pred_boxes except all the correct ones
-        iou_threshold (float): threshold where predicted bboxes is correct
-        box_format (str): "midpoint" or "corners" used to specify bboxes
-        num_classes (int): number of classes
-    Returns:
-        float: mAP value across all classes given a specific IoU threshold
-    """
-    # list storing all AP for respective classes
-    average_precisions = []
-    # used for numerical stability later on
-    epsilon = 1e-6
-    for c in range(num_classes):
-        detections = []
-        ground_truths = []
-        # Go through all predictions and targets,
-        # and only add the ones that belong to the
-        # current class c
-        for detection in pred_boxes:
-            if detection[1] == c:
-                detections.append(detection)
-        for true_box in true_boxes:
-            if true_box[1] == c:
-                ground_truths.append(true_box)
-        # find the amount of bboxes for each training example
-        # Counter here finds how many ground truth bboxes we get
-        # for each training example, so let's say img 0 has 3,
-        # img 1 has 5 then we will obtain a dictionary with:
-        # amount_bboxes = {0:3, 1:5}
-        amount_bboxes = Counter([gt[0] for gt in ground_truths])
-        # We then go through each key, val in this dictionary
-        # and convert to the following (w.r.t same example):
-        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
-        for key, val in amount_bboxes.items():
-            amount_bboxes[key] = torch.zeros(val)
-        # sort by box probabilities which is index 2
-        detections.sort(key=lambda x: x[2], reverse=True)
-        TP = torch.zeros((len(detections)))
-        FP = torch.zeros((len(detections)))
-        total_true_bboxes = len(ground_truths)
-        # If none exists for this class then we can safely skip
-        if total_true_bboxes == 0:
-            continue
-        for detection_idx, detection in enumerate(detections):
-            # Only take out the ground_truths that have the same
-            # training idx as detection
-            ground_truth_img = [
-                bbox for bbox in ground_truths if bbox[0] == detection[0]
-            ]
-            num_gts = len(ground_truth_img)
-            best_iou = 0
-            for idx, gt in enumerate(ground_truth_img):
-                iou = intersection_over_union(
-                    torch.tensor(detection[3:]),
-                    torch.tensor(gt[3:]),
-                    box_format=box_format,
-                )
-                if iou > best_iou:
-                    best_iou = iou
-                    best_gt_idx = idx
-            if best_iou > iou_threshold:
-                # only detect ground truth detection once
-                if amount_bboxes[detection[0]][best_gt_idx] == 0:
-                    # true positive and add this bounding box to seen
-                    TP[detection_idx] = 1
-                    amount_bboxes[detection[0]][best_gt_idx] = 1
-                else:
-                    FP[detection_idx] = 1
-            # if IOU is lower then the detection is a false positive
-            else:
-                FP[detection_idx] = 1
-        TP_cumsum = torch.cumsum(TP, dim=0)
-        FP_cumsum = torch.cumsum(FP, dim=0)
-        recalls = TP_cumsum / (total_true_bboxes + epsilon)
-        precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
-        precisions = torch.cat((torch.tensor([1]), precisions))
-        recalls = torch.cat((torch.tensor([0]), recalls))
-        # torch.trapz for numerical integration
-        average_precisions.append(torch.trapz(precisions, recalls))
-    return sum(average_precisions) / len(average_precisions)
-def plot_image(image, boxes):
-    """Plots predicted bounding boxes on the image"""
-    cmap = plt.get_cmap("tab20b")
-    class_labels = (
-        config.COCO_LABELS if config.DATASET == "COCO" else config.PASCAL_CLASSES
-    )
-    colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
-    im = np.array(image)
-    height, width, _ = im.shape
-    # Create figure and axes
-    fig, ax = plt.subplots(1)
-    # Display the image
-    ax.imshow(im)
-    # box[0] is x midpoint, box[2] is width
-    # box[1] is y midpoint, box[3] is height
-    # Create a Rectangle patch
-    for box in boxes:
-        assert (
-            len(box) == 6
-        ), "box should contain class pred, confidence, x, y, width, height"
-        class_pred = box[0]
-        box = box[2:]
-        upper_left_x = box[0] - box[2] / 2
-        upper_left_y = box[1] - box[3] / 2
-        rect = patches.Rectangle(
-            (upper_left_x * width, upper_left_y * height),
-            box[2] * width,
-            box[3] * height,
-            linewidth=2,
-            edgecolor=colors[int(class_pred)],
-            facecolor="none",
-        )
-        # Add the patch to the Axes
-        ax.add_patch(rect)
-        plt.text(
-            upper_left_x * width,
-            upper_left_y * height,
-            s=class_labels[int(class_pred)],
-            color="white",
-            verticalalignment="top",
-            bbox={"color": colors[int(class_pred)], "pad": 0},
-        )
-    plt.show()
-def get_evaluation_bboxes(
-    loader,
-    model,
-    iou_threshold,
-    anchors,
-    threshold,
-    box_format="midpoint",
-    device="cuda",
-):
-    # make sure model is in eval before get bboxes
-    model.eval()
-    train_idx = 0
-    all_pred_boxes = []
-    all_true_boxes = []
-    for batch_idx, (x, labels) in enumerate(tqdm(loader)):
-        x = x.to(device)
-        with torch.no_grad():
-            predictions = model(x)
-        batch_size = x.shape[0]
-        bboxes = [[] for _ in range(batch_size)]
-        for i in range(3):
-            S = predictions[i].shape[2]
-            anchor = torch.tensor([*anchors[i]]).to(device) * S
-            boxes_scale_i = cells_to_bboxes(predictions[i], anchor, S=S, is_preds=True)
-            for idx, (box) in enumerate(boxes_scale_i):
-                bboxes[idx] += box
-        # we just want one bbox for each label, not one for each scale
-        true_bboxes = cells_to_bboxes(labels[2], anchor, S=S, is_preds=False)
-        for idx in range(batch_size):
-            nms_boxes = non_max_suppression(
-                bboxes[idx],
-                iou_threshold=iou_threshold,
-                threshold=threshold,
-                box_format=box_format,
-            )
-            for nms_box in nms_boxes:
-                all_pred_boxes.append([train_idx] + nms_box)
-            for box in true_bboxes[idx]:
-                if box[1] > threshold:
-                    all_true_boxes.append([train_idx] + box)
-            train_idx += 1
-    model.train()
-    return all_pred_boxes, all_true_boxes
-def cells_to_bboxes(predictions, anchors, S, is_preds=True):
-    """
-    Scales the predictions coming from the model to
-    be relative to the entire image such that they for example later
-    can be plotted or.
-    INPUT:
-    predictions: tensor of size (N, 3, S, S, num_classes+5)
-    anchors: the anchors used for the predictions
-    S: the number of cells the image is divided in on the width (and height)
-    is_preds: whether the input is predictions or the true bounding boxes
-    OUTPUT:
-    converted_bboxes: the converted boxes of sizes (N, num_anchors, S, S, 1+5) with class index,
-                      object score, bounding box coordinates
-    """
-    BATCH_SIZE = predictions.shape[0]
-    num_anchors = len(anchors)
-    box_predictions = predictions[..., 1:5]
-    if is_preds:
-        anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
-        box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2])
-        box_predictions[..., 2:] = torch.exp(box_predictions[..., 2:]) * anchors
-        scores = torch.sigmoid(predictions[..., 0:1])
-        best_class = torch.argmax(predictions[..., 5:], dim=-1).unsqueeze(-1)
-    else:
-        scores = predictions[..., 0:1]
-        best_class = predictions[..., 5:6]
-    cell_indices = (
-        torch.arange(S)
-        .repeat(predictions.shape[0], 3, S, 1)
-        .unsqueeze(-1)
-        .to(predictions.device)
-    )
-    x = 1 / S * (box_predictions[..., 0:1] + cell_indices)
-    y = 1 / S * (box_predictions[..., 1:2] + cell_indices.permute(0, 1, 3, 2, 4))
-    w_h = 1 / S * box_predictions[..., 2:4]
-    converted_bboxes = torch.cat((best_class, scores, x, y, w_h), dim=-1).reshape(
-        BATCH_SIZE, num_anchors * S * S, 6
-    )
-    return converted_bboxes.tolist()
-def check_class_accuracy(model, loader, threshold):
-    model.eval()
-    tot_class_preds, correct_class = 0, 0
-    tot_noobj, correct_noobj = 0, 0
-    tot_obj, correct_obj = 0, 0
-    for idx, (x, y) in enumerate(tqdm(loader)):
-        x = x.to(config.DEVICE)
-        with torch.no_grad():
-            out = model(x)
-        for i in range(3):
-            y[i] = y[i].to(config.DEVICE)
-            obj = y[i][..., 0] == 1  # in paper this is Iobj_i
-            noobj = y[i][..., 0] == 0  # in paper this is Iobj_i
-            correct_class += torch.sum(
-                torch.argmax(out[i][..., 5:][obj], dim=-1) == y[i][..., 5][obj]
-            )
-            tot_class_preds += torch.sum(obj)
-            obj_preds = torch.sigmoid(out[i][..., 0]) > threshold
-            correct_obj += torch.sum(obj_preds[obj] == y[i][..., 0][obj])
-            tot_obj += torch.sum(obj)
-            correct_noobj += torch.sum(obj_preds[noobj] == y[i][..., 0][noobj])
-            tot_noobj += torch.sum(noobj)
-    class_acc = (correct_class / (tot_class_preds + 1e-16)) * 100
-    noobj_acc = (correct_noobj / (tot_noobj + 1e-16)) * 100
-    obj_acc = (correct_obj / (tot_obj + 1e-16)) * 100
-    print(f"Class accuracy is: {class_acc:2f}%")
-    print(f"No obj accuracy is: {noobj_acc:2f}%")
-    print(f"Obj accuracy is: {obj_acc:2f}%")
-    model.train()
-    return class_acc, noobj_acc, obj_acc
-def get_mean_std(loader):
-    # var[X] = E[X**2] - E[X]**2
-    channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0
-    for data, _ in tqdm(loader):
-        channels_sum += torch.mean(data, dim=[0, 2, 3])
-        channels_sqrd_sum += torch.mean(data**2, dim=[0, 2, 3])
-        num_batches += 1
-    mean = channels_sum / num_batches
-    std = (channels_sqrd_sum / num_batches - mean**2) ** 0.5
-    return mean, std
-def save_checkpoint(model, optimizer, filename="my_checkpoint.pth.tar"):
-    print("=> Saving checkpoint")
-    checkpoint = {
-        "state_dict": model.state_dict(),
-        "optimizer": optimizer.state_dict(),
-    }
-    torch.save(checkpoint, filename)
-def load_checkpoint(checkpoint_file, model, optimizer, lr):
-    print("=> Loading checkpoint")
-    checkpoint = torch.load(checkpoint_file, map_location=config.DEVICE)
-    model.load_state_dict(checkpoint["state_dict"])
-    optimizer.load_state_dict(checkpoint["optimizer"])
-    # If we don't do this then it will just have learning rate of old checkpoint
-    # and it will lead to many hours of debugging \:
-    for param_group in optimizer.param_groups:
-        param_group["lr"] = lr
-def get_loaders(train_csv_path, test_csv_path):
-    from dataset import YOLODataset
-    IMAGE_SIZE = config.IMAGE_SIZE
-    train_dataset = YOLODataset(
-        train_csv_path,
-        transform=config.train_transforms,
-        S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
-        img_dir=config.IMG_DIR,
-        label_dir=config.LABEL_DIR,
-        anchors=config.ANCHORS,
-    )
-    test_dataset = YOLODataset(
-        test_csv_path,
-        transform=config.test_transforms,
-        S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
-        img_dir=config.IMG_DIR,
-        label_dir=config.LABEL_DIR,
-        anchors=config.ANCHORS,
-    )
-    train_loader = DataLoader(
-        dataset=train_dataset,
-        batch_size=config.BATCH_SIZE,
-        num_workers=config.NUM_WORKERS,
-        pin_memory=config.PIN_MEMORY,
-        shuffle=True,
-        drop_last=False,
-    )
-    test_loader = DataLoader(
-        dataset=test_dataset,
-        batch_size=config.BATCH_SIZE,
-        num_workers=config.NUM_WORKERS,
-        pin_memory=config.PIN_MEMORY,
-        shuffle=False,
-        drop_last=False,
-    )
-    train_eval_dataset = YOLODataset(
-        train_csv_path,
-        transform=config.test_transforms,
-        S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
-        img_dir=config.IMG_DIR,
-        label_dir=config.LABEL_DIR,
-        anchors=config.ANCHORS,
-    )
-    train_eval_loader = DataLoader(
-        dataset=train_eval_dataset,
-        batch_size=config.BATCH_SIZE,
-        num_workers=config.NUM_WORKERS,
-        pin_memory=config.PIN_MEMORY,
-        shuffle=False,
-        drop_last=False,
-    )
-    return train_loader, test_loader, train_eval_loader
-def plot_couple_examples(model, loader, thresh, iou_thresh, anchors):
-    model.eval()
-    x, y = next(iter(loader))
-    x = x.to("cuda")
-    with torch.no_grad():
-        out = model(x)
-        bboxes = [[] for _ in range(x.shape[0])]
-        for i in range(3):
-            batch_size, A, S, _, _ = out[i].shape
-            anchor = anchors[i]
-            boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
-            for idx, (box) in enumerate(boxes_scale_i):
-                bboxes[idx] += box
-        model.train()
-    for i in range(batch_size // 4):
-        nms_boxes = non_max_suppression(
-            bboxes[i],
-            iou_threshold=iou_thresh,
-            threshold=thresh,
-            box_format="midpoint",
-        )
-        plot_image(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes)
-def seed_everything(seed=42):
-    os.environ["PYTHONHASHSEED"] = str(seed)
-    random.seed(seed)
-    np.random.seed(seed)
-    torch.manual_seed(seed)
-    torch.cuda.manual_seed(seed)
-    torch.cuda.manual_seed_all(seed)
-    torch.backends.cudnn.deterministic = True
-    torch.backends.cudnn.benchmark = False
-def clip_coords(boxes, img_shape):
-    # Clip bounding xyxy bounding boxes to image shape (height, width)
-    boxes[:, 0].clamp_(0, img_shape[1])  # x1
-    boxes[:, 1].clamp_(0, img_shape[0])  # y1
-    boxes[:, 2].clamp_(0, img_shape[1])  # x2
-    boxes[:, 3].clamp_(0, img_shape[0])  # y2
-def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
-    # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
-    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-    y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw  # top left x
-    y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh  # top left y
-    y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw  # bottom right x
-    y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh  # bottom right y
-    return y
-def xyn2xy(x, w=640, h=640, padw=0, padh=0):
-    # Convert normalized segments into pixel segments, shape (n,2)
-    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-    y[..., 0] = w * x[..., 0] + padw  # top left x
-    y[..., 1] = h * x[..., 1] + padh  # top left y
-    return y
-def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
-    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
-    if clip:
-        clip_boxes(x, (h - eps, w - eps))  # warning: inplace clip
-    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
-    y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w  # x center
-    y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h  # y center
-    y[..., 2] = (x[..., 2] - x[..., 0]) / w  # width
-    y[..., 3] = (x[..., 3] - x[..., 1]) / h  # height
-    return y
-def clip_boxes(boxes, shape):
-    # Clip boxes (xyxy) to image shape (height, width)
-    if isinstance(boxes, torch.Tensor):  # faster individually
-        boxes[..., 0].clamp_(0, shape[1])  # x1
-        boxes[..., 1].clamp_(0, shape[0])  # y1
-        boxes[..., 2].clamp_(0, shape[1])  # x2
-        boxes[..., 3].clamp_(0, shape[0])  # y2
-    else:  # np.array (faster grouped)
-        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
-        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
-def save_result(image, boxes, index):
-    """Plots predicted bounding boxes on the image"""
-    cmap = plt.get_cmap("tab20b")
-    class_labels = config.PASCAL_CLASSES
-    colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
-    im = np.array(image)
-    height, width, _ = im.shape
-    # Create figure and axes
-    fig, ax = plt.subplots(1)
-    # Display the image
-    ax.imshow(im)
-    # box[0] is x midpoint, box[2] is width
-    # box[1] is y midpoint, box[3] is height
-    # Create a Rectangle patch
-    for box in boxes:
-        assert (
-            len(box) == 6
-        ), "box should contain class pred, confidence, x, y, width, height"
-        class_pred = box[0]
-        box = box[2:]
-        upper_left_x = box[0] - box[2] / 2
-        upper_left_y = box[1] - box[3] / 2
-        rect = patches.Rectangle(
-            (upper_left_x * width, upper_left_y * height),
-            box[2] * width,
-            box[3] * height,
-            linewidth=2,
-            edgecolor=colors[int(class_pred)],
-            facecolor="none",
-        )
-        # Add the patch to the Axes
-        ax.add_patch(rect)
-        plt.text(
-            upper_left_x * width,
-            upper_left_y * height,
-            s=class_labels[int(class_pred)],
-            color="white",
-            verticalalignment="top",
-            bbox={"color": colors[int(class_pred)], "pad": 0},
-        )
-    ax.grid(False)
-    ax.set_axis_off()
-    plt.savefig(f"output/img{index}.png")
-def generate_result(model, data, thresh, iou_thresh, anchors):
-    model.eval()
-    x = data
-    # x = x.to("cuda")
-    with torch.no_grad():
-        out = model(x)
-        bboxes = [[] for _ in range(x.shape[0])]
-        for i in range(3):
-            batch_size, A, S, _, _ = out[i].shape
-            anchor = anchors[i]
-            boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
-            for idx, (box) in enumerate(boxes_scale_i):
-                bboxes[idx] += box
-    for i in range(batch_size):
-        nms_boxes = non_max_suppression(
-            bboxes[i],
-            iou_threshold=iou_thresh,
-            threshold=thresh,
-            box_format="midpoint",
-        )
-        save_result(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes, i)