Spaces:

RaviNaik
/

ERA-SESSION13

Sleeping

App Files Files Community

ravi.naik commited on Aug 18, 2023

Commit

d0ef04f

1 Parent(s): 0be79cc

initial code push

Browse files

Files changed (15) hide show

.gitignore +162 -0
app.py +224 -0
config.py +200 -0
markdown.py +48 -0
models/yolo.py +295 -0
utils/common.py +185 -0
utils/data.py +294 -0
utils/gradcam.py +67 -0
utils/loss.py +90 -0
utils/utils.py +594 -0
utils/utils/common.py +185 -0
utils/utils/data.py +294 -0
utils/utils/gradcam.py +36 -0
utils/utils/loss.py +90 -0
utils/utils/utils.py +668 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,162 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+model.ckpt
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

app.py ADDED Viewed

	@@ -0,0 +1,224 @@

+import gradio as gr
+import numpy as np
+from PIL import Image
+import torch
+import os
+import shutil
+import config
+from models.yolo import YOLOv3
+from utils.data import PascalDataModule
+from utils.loss import YoloLoss
+from utils.gradcam import generate_gradcam
+from utils.utils import generate_result
+from markdown import model_stats, data_stats
+datamodule = PascalDataModule(
+    train_csv_path=f"{config.DATASET}/train.csv",
+    test_csv_path=f"{config.DATASET}/test.csv",
+    batch_size=config.BATCH_SIZE,
+    shuffle=config.SHUFFLE,
+    num_workers=os.cpu_count() - 1,
+)
+datamodule.setup()
+class FilterModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.yolo = YOLOv3.load_from_checkpoint(
+            "model.ckpt",
+            in_channels=3,
+            num_classes=config.NUM_CLASSES,
+            epochs=config.NUM_EPOCHS,
+            loss_fn=YoloLoss,
+            datamodule=datamodule,
+            learning_rate=config.LEARNING_RATE,
+            maxlr=config.LEARNING_RATE,
+            scheduler_steps=len(datamodule.train_dataloader()),
+            device_count=config.NUM_WORKERS,
+        )
+        self.yolo = self.yolo.to("cpu")
+    def forward(self, x):
+        x = self.yolo(x)
+        return x[-1]
+model = FilterModel()
+prediction_image = None
+def upload_file(files):
+    file_paths = [file.name for file in files]
+    return file_paths
+def read_image(path):
+    img = Image.open(path)
+    img.load()
+    data = np.asarray(img, dtype="uint8")
+    return data
+def sample_images():
+    all_imgs = os.listdir(config.IMG_DIR)
+    rand_inds = np.random.random_integers(0, len(all_imgs), 10).tolist()
+    images = [f"{config.IMG_DIR}/{all_imgs[ind]}" for ind in rand_inds]
+    return images
+def get_gradcam_images(gradcam_layer, images, gradcam_opacity):
+    gradcam_images = []
+    target_layers = [model.yolo.layers[int(gradcam_layer)]]
+    gradcam_images = generate_gradcam(
+        model=model,
+        target_layers=target_layers,
+        images=images,
+        use_cuda=False,
+        transparency=gradcam_opacity,
+    )
+    return gradcam_images
+def show_hide_gradcam(status):
+    if not status:
+        return [gr.update(visible=False) for i in range(3)]
+    return [gr.update(visible=True) for i in range(3)]
+def set_prediction_image(evt: gr.SelectData, gallery):
+    global prediction_image
+    if isinstance(gallery[evt.index], dict):
+        prediction_image = gallery[evt.index]["name"]
+    else:
+        prediction_image = gallery[evt.index][0]["name"]
+def predict(is_gradcam, gradcam_layer, gradcam_opacity):
+    gradcam_images = None
+    img = read_image(prediction_image)
+    image_transformed = config.test_transforms(image=img, bboxes=[])["image"]
+    if is_gradcam:
+        images = [image_transformed]
+        gradcam_images = get_gradcam_images(gradcam_layer, images, gradcam_opacity)
+    data = image_transformed.unsqueeze(0)
+    if not os.path.exists("output"):
+        os.mkdir("output")
+    else:
+        shutil.rmtree("output")
+        os.mkdir("output")
+    generate_result(
+        model=model.yolo,
+        data=data,
+        thresh=0.6,
+        iou_thresh=0.5,
+        anchors=model.yolo.scaled_anchors,
+    )
+    result_images = os.listdir("output")
+    result_images = [
+        f"output/{file}" for file in result_images if file.endswith(".png")
+    ]
+    return {
+        output: gr.update(value=result_images[0]),
+        gradcam_output: gr.update(value=gradcam_images[0]),
+    }
+with gr.Blocks() as app:
+    gr.Markdown("## ERA Session13 - PASCAL-VOC Object Detection with YoloV3")
+    with gr.Row():
+        with gr.Column():
+            with gr.Box():
+                is_gradcam = gr.Checkbox(
+                    label="GradCAM Images",
+                    info="Display GradCAM images?",
+                )
+                gradcam_layer = gr.Dropdown(
+                    choices=list(range(len(model.yolo.layers))),
+                    label="Select the layer",
+                    info="Please select the layer for which the GradCAM is required",
+                    interactive=True,
+                    visible=False,
+                )
+                gradcam_opacity = gr.Slider(
+                    minimum=0,
+                    maximum=1,
+                    value=0.6,
+                    label="Opacity",
+                    info="Opacity of GradCAM output",
+                    interactive=True,
+                    visible=False,
+                )
+                is_gradcam.input(
+                    show_hide_gradcam,
+                    inputs=[is_gradcam],
+                    outputs=[gradcam_layer, gradcam_opacity],
+                )
+            with gr.Box():
+                # file_output = gr.File(file_types=["image"])
+                with gr.Group():
+                    upload_gallery = gr.Gallery(
+                        value=None,
+                        label="Uploaded images",
+                        show_label=False,
+                        elem_id="gallery_upload",
+                        columns=5,
+                        rows=2,
+                        height="auto",
+                        object_fit="contain",
+                    )
+                    upload_button = gr.UploadButton(
+                        "Click to Upload images",
+                        file_types=["image"],
+                        file_count="multiple",
+                    )
+                    upload_button.upload(upload_file, upload_button, upload_gallery)
+                with gr.Group():
+                    sample_gallery = gr.Gallery(
+                        value=sample_images,
+                        label="Sample images",
+                        show_label=True,
+                        elem_id="gallery_sample",
+                        columns=5,
+                        rows=2,
+                        height="auto",
+                        object_fit="contain",
+                    )
+                upload_gallery.select(set_prediction_image, inputs=[upload_gallery])
+                sample_gallery.select(set_prediction_image, inputs=[sample_gallery])
+            run_btn = gr.Button()
+        with gr.Column():
+            with gr.Box():
+                output = gr.Image(value=None, label="Model Result")
+            with gr.Box():
+                gradcam_output = gr.Image(value=None, label="GradCAM Image")
+        run_btn.click(
+            predict,
+            inputs=[
+                is_gradcam,
+                gradcam_layer,
+                gradcam_opacity,
+            ],
+            outputs=[output, gradcam_output],
+        )
+    with gr.Row():
+        with gr.Box():
+            with gr.Row():
+                with gr.Column():
+                    with gr.Box():
+                        gr.Markdown(model_stats)
+                with gr.Column():
+                    with gr.Box():
+                        gr.Markdown(data_stats)
+app.launch()

config.py ADDED Viewed

	@@ -0,0 +1,200 @@

+import albumentations as A
+import cv2
+import torch
+from albumentations.pytorch import ToTensorV2
+from utils.utils import seed_everything
+DATASET = "PASCAL_VOC"
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+# seed_everything()  # If you want deterministic behavior
+DEVICE_COUNT = torch.cuda.device_count()
+NUM_WORKERS = 0
+BATCH_SIZE = 128
+SHUFFLE = True
+IMAGE_SIZE = 416
+NUM_CLASSES = 20
+LEARNING_RATE = 1e-3
+WEIGHT_DECAY = 1e-4
+NUM_EPOCHS = 40
+CONF_THRESHOLD = 0.05
+MAP_IOU_THRESH = 0.5
+NMS_IOU_THRESH = 0.45
+S = [IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8]
+PIN_MEMORY = True
+LOAD_MODEL = False
+SAVE_MODEL = True
+CHECKPOINT_FILE = "checkpoint.pth.tar"
+IMG_DIR = DATASET + "/images/"
+LABEL_DIR = DATASET + "/labels/"
+P_MOSAIC = 0.5
+ANCHORS = [
+    [(0.28, 0.22), (0.38, 0.48), (0.9, 0.78)],
+    [(0.07, 0.15), (0.15, 0.11), (0.14, 0.29)],
+    [(0.02, 0.03), (0.04, 0.07), (0.08, 0.06)],
+]  # Note these have been rescaled to be between [0, 1]
+means = [0.485, 0.456, 0.406]
+scale = 1.1
+train_transforms = A.Compose(
+    [
+        A.LongestMaxSize(max_size=int(IMAGE_SIZE * scale)),
+        A.PadIfNeeded(
+            min_height=int(IMAGE_SIZE * scale),
+            min_width=int(IMAGE_SIZE * scale),
+            border_mode=cv2.BORDER_CONSTANT,
+        ),
+        A.Rotate(limit=10, interpolation=1, border_mode=4),
+        A.RandomCrop(width=IMAGE_SIZE, height=IMAGE_SIZE),
+        A.ColorJitter(brightness=0.6, contrast=0.6, saturation=0.6, hue=0.6, p=0.4),
+        A.OneOf(
+            [
+                A.ShiftScaleRotate(
+                    rotate_limit=20, p=0.5, border_mode=cv2.BORDER_CONSTANT
+                ),
+                # A.Affine(shear=15, p=0.5, mode="constant"),
+            ],
+            p=1.0,
+        ),
+        A.HorizontalFlip(p=0.5),
+        A.Blur(p=0.1),
+        A.CLAHE(p=0.1),
+        A.Posterize(p=0.1),
+        A.ToGray(p=0.1),
+        A.ChannelShuffle(p=0.05),
+        A.Normalize(
+            mean=[0, 0, 0],
+            std=[1, 1, 1],
+            max_pixel_value=255,
+        ),
+        ToTensorV2(),
+    ],
+    bbox_params=A.BboxParams(
+        format="yolo",
+        min_visibility=0.4,
+        label_fields=[],
+    ),
+)
+test_transforms = A.Compose(
+    [
+        A.LongestMaxSize(max_size=IMAGE_SIZE),
+        A.PadIfNeeded(
+            min_height=IMAGE_SIZE, min_width=IMAGE_SIZE, border_mode=cv2.BORDER_CONSTANT
+        ),
+        A.Normalize(
+            mean=[0, 0, 0],
+            std=[1, 1, 1],
+            max_pixel_value=255,
+        ),
+        ToTensorV2(),
+    ],
+    bbox_params=A.BboxParams(format="yolo", min_visibility=0.4, label_fields=[]),
+)
+PASCAL_CLASSES = [
+    "aeroplane",
+    "bicycle",
+    "bird",
+    "boat",
+    "bottle",
+    "bus",
+    "car",
+    "cat",
+    "chair",
+    "cow",
+    "diningtable",
+    "dog",
+    "horse",
+    "motorbike",
+    "person",
+    "pottedplant",
+    "sheep",
+    "sofa",
+    "train",
+    "tvmonitor",
+]
+COCO_LABELS = [
+    "person",
+    "bicycle",
+    "car",
+    "motorcycle",
+    "airplane",
+    "bus",
+    "train",
+    "truck",
+    "boat",
+    "traffic light",
+    "fire hydrant",
+    "stop sign",
+    "parking meter",
+    "bench",
+    "bird",
+    "cat",
+    "dog",
+    "horse",
+    "sheep",
+    "cow",
+    "elephant",
+    "bear",
+    "zebra",
+    "giraffe",
+    "backpack",
+    "umbrella",
+    "handbag",
+    "tie",
+    "suitcase",
+    "frisbee",
+    "skis",
+    "snowboard",
+    "sports ball",
+    "kite",
+    "baseball bat",
+    "baseball glove",
+    "skateboard",
+    "surfboard",
+    "tennis racket",
+    "bottle",
+    "wine glass",
+    "cup",
+    "fork",
+    "knife",
+    "spoon",
+    "bowl",
+    "banana",
+    "apple",
+    "sandwich",
+    "orange",
+    "broccoli",
+    "carrot",
+    "hot dog",
+    "pizza",
+    "donut",
+    "cake",
+    "chair",
+    "couch",
+    "potted plant",
+    "bed",
+    "dining table",
+    "toilet",
+    "tv",
+    "laptop",
+    "mouse",
+    "remote",
+    "keyboard",
+    "cell phone",
+    "microwave",
+    "oven",
+    "toaster",
+    "sink",
+    "refrigerator",
+    "book",
+    "clock",
+    "vase",
+    "scissors",
+    "teddy bear",
+    "hair drier",
+    "toothbrush",
+]

markdown.py ADDED Viewed

	@@ -0,0 +1,48 @@

+model_stats = """
+### YoloV3 Model Implementation & Training Details
+Github Link: https://github.com/RaviNaik/ERA-SESSION13/tree/main
+#### Achieved:
+1. **Training Loss: 3.680**
+2. **Validation Loss: 4.940**
+3. **Class accuracy: 81.601883%**
+4. **No obj accuracy: 97.991463%**
+5. **Obj accuracy: 75.976616%**
+6. **MAP: 0.4366795**
+Model Link:
+"""
+data_stats = """
+### Pascal-Voc Dataset Details
+Dataset Link: https://www.kaggle.com/datasets/aladdinpersson/pascal-voc-dataset-used-in-yolov3-video?resource=download
+```python
+Number of images: 43.2k
+Dataset size: ~5GB
+Classes Supported: [
+    "aeroplane",
+    "bicycle",
+    "bird",
+    "boat",
+    "bottle",
+    "bus",
+    "car",
+    "cat",
+    "chair",
+    "cow",
+    "diningtable",
+    "dog",
+    "horse",
+    "motorbike",
+    "person",
+    "pottedplant",
+    "sheep",
+    "sofa",
+    "train",
+    "tvmonitor",
+]
+```
+"""

models/yolo.py ADDED Viewed

	@@ -0,0 +1,295 @@

+"""
+Implementation of YOLOv3 architecture
+"""
+from typing import Any, Dict
+from lightning.pytorch.utilities.types import STEP_OUTPUT
+import torch
+import torch.nn as nn
+import lightning as L
+import config as config_
+from utils.common import one_cycle_lr
+from utils.data import PascalDataModule
+from utils.loss import YoloLoss
+from utils.utils import (
+    mean_average_precision,
+    cells_to_bboxes,
+    get_evaluation_bboxes,
+    save_checkpoint,
+    load_checkpoint,
+    check_class_accuracy,
+    get_loaders,
+    plot_couple_examples,
+)
+"""
+Information about architecture config:
+Tuple is structured by (filters, kernel_size, stride)
+Every conv is a same convolution.
+List is structured by "B" indicating a residual block followed by the number of repeats
+"S" is for scale prediction block and computing the yolo loss
+"U" is for upsampling the feature map and concatenating with a previous layer
+"""
+config = [
+    (32, 3, 1),
+    (64, 3, 2),
+    ["B", 1],
+    (128, 3, 2),
+    ["B", 2],
+    (256, 3, 2),
+    ["B", 8],
+    (512, 3, 2),
+    ["B", 8],
+    (1024, 3, 2),
+    ["B", 4],  # To this point is Darknet-53
+    (512, 1, 1),
+    (1024, 3, 1),
+    "S",
+    (256, 1, 1),
+    "U",
+    (256, 1, 1),
+    (512, 3, 1),
+    "S",
+    (128, 1, 1),
+    "U",
+    (128, 1, 1),
+    (256, 3, 1),
+    "S",
+]
+class CNNBlock(L.LightningModule):
+    def __init__(self, in_channels, out_channels, bn_act=True, **kwargs):
+        super().__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, bias=not bn_act, **kwargs)
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.leaky = nn.LeakyReLU(0.1)
+        self.use_bn_act = bn_act
+    def forward(self, x):
+        if self.use_bn_act:
+            return self.leaky(self.bn(self.conv(x)))
+        else:
+            return self.conv(x)
+class ResidualBlock(L.LightningModule):
+    def __init__(self, channels, use_residual=True, num_repeats=1):
+        super().__init__()
+        self.layers = nn.ModuleList()
+        for repeat in range(num_repeats):
+            self.layers += [
+                nn.Sequential(
+                    CNNBlock(channels, channels // 2, kernel_size=1),
+                    CNNBlock(channels // 2, channels, kernel_size=3, padding=1),
+                )
+            ]
+        self.use_residual = use_residual
+        self.num_repeats = num_repeats
+    def forward(self, x):
+        for layer in self.layers:
+            if self.use_residual:
+                x = x + layer(x)
+            else:
+                x = layer(x)
+        return x
+class ScalePrediction(L.LightningModule):
+    def __init__(self, in_channels, num_classes):
+        super().__init__()
+        self.pred = nn.Sequential(
+            CNNBlock(in_channels, 2 * in_channels, kernel_size=3, padding=1),
+            CNNBlock(
+                2 * in_channels, (num_classes + 5) * 3, bn_act=False, kernel_size=1
+            ),
+        )
+        self.num_classes = num_classes
+    def forward(self, x):
+        return (
+            self.pred(x)
+            .reshape(x.shape[0], 3, self.num_classes + 5, x.shape[2], x.shape[3])
+            .permute(0, 1, 3, 4, 2)
+        )
+class YOLOv3(L.LightningModule):
+    def __init__(
+        self,
+        in_channels=3,
+        num_classes=80,
+        epochs=40,
+        loss_fn=YoloLoss,
+        datamodule=PascalDataModule(),
+        learning_rate=None,
+        maxlr=None,
+        scheduler_steps=None,
+        device_count=2,
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.in_channels = in_channels
+        self.epochs = epochs
+        self.loss_fn = loss_fn()
+        self.layers = self._create_conv_layers()
+        self.scaled_anchors = torch.tensor(config_.ANCHORS) * torch.tensor(
+            config_.S
+        ).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2).to(self.device)
+        self.datamodule = datamodule
+        self.learning_rate = learning_rate
+        self.maxlr = maxlr
+        self.scheduler_steps = scheduler_steps
+        self.device_count = device_count
+    def forward(self, x):
+        outputs = []  # for each scale
+        route_connections = []
+        for layer in self.layers:
+            if isinstance(layer, ScalePrediction):
+                outputs.append(layer(x))
+                continue
+            x = layer(x)
+            if isinstance(layer, ResidualBlock) and layer.num_repeats == 8:
+                route_connections.append(x)
+            elif isinstance(layer, nn.Upsample):
+                x = torch.cat([x, route_connections[-1]], dim=1)
+                route_connections.pop()
+        return outputs
+    def _create_conv_layers(self):
+        layers = nn.ModuleList()
+        in_channels = self.in_channels
+        for module in config:
+            if isinstance(module, tuple):
+                out_channels, kernel_size, stride = module
+                layers.append(
+                    CNNBlock(
+                        in_channels,
+                        out_channels,
+                        kernel_size=kernel_size,
+                        stride=stride,
+                        padding=1 if kernel_size == 3 else 0,
+                    )
+                )
+                in_channels = out_channels
+            elif isinstance(module, list):
+                num_repeats = module[1]
+                layers.append(
+                    ResidualBlock(
+                        in_channels,
+                        num_repeats=num_repeats,
+                    )
+                )
+            elif isinstance(module, str):
+                if module == "S":
+                    layers += [
+                        ResidualBlock(in_channels, use_residual=False, num_repeats=1),
+                        CNNBlock(in_channels, in_channels // 2, kernel_size=1),
+                        ScalePrediction(in_channels // 2, num_classes=self.num_classes),
+                    ]
+                    in_channels = in_channels // 2
+                elif module == "U":
+                    layers.append(
+                        nn.Upsample(scale_factor=2),
+                    )
+                    in_channels = in_channels * 3
+        return layers
+    def configure_optimizers(self) -> Dict:
+        # effective_lr = self.learning_rate * self.device_count
+        optimizer = torch.optim.Adam(
+            self.parameters(), lr=self.learning_rate, weight_decay=config_.WEIGHT_DECAY
+        )
+        scheduler = one_cycle_lr(
+            optimizer=optimizer,
+            maxlr=self.maxlr,
+            steps=self.scheduler_steps,
+            epochs=self.epochs,
+        )
+        return {
+            "optimizer": optimizer,
+            "lr_scheduler": {"scheduler": scheduler, "interval": "step"},
+        }
+    def _common_step(self, batch, batch_idx):
+        self.scaled_anchors = self.scaled_anchors.to(self.device)
+        x, y = batch
+        y0, y1, y2 = y[0], y[1], y[2]
+        out = self(x)
+        loss = (
+            self.loss_fn(out[0], y0, self.scaled_anchors[0])
+            + self.loss_fn(out[1], y1, self.scaled_anchors[1])
+            + self.loss_fn(out[2], y2, self.scaled_anchors[2])
+        )
+        return loss
+    def training_step(self, batch, batch_idx):
+        loss = self._common_step(batch, batch_idx)
+        self.log(name="train_loss", value=loss, on_step=True, on_epoch=True, prog_bar=True)
+        return loss
+    def validation_step(self, batch, batch_idx):
+        loss = self._common_step(batch, batch_idx)
+        self.log(name="val_loss", value=loss, on_step=True, on_epoch=True, prog_bar=True)
+        return loss
+    def test_step(self, batch, batch_idx):
+        class_acc, noobj_acc, obj_acc = check_class_accuracy(
+            model=self,
+            loader=self.datamodule.test_dataloader(),
+            threshold=config_.CONF_THRESHOLD,
+        )
+        self.log_dict(
+            {
+                "class_acc": class_acc,
+                "noobj_acc": noobj_acc,
+                "obj_acc": obj_acc,
+            },
+            prog_bar=True,
+        )
+if __name__ == "__main__":
+    num_classes = 20
+    IMAGE_SIZE = 416
+    model = YOLOv3(num_classes=num_classes)
+    x = torch.randn((2, 3, IMAGE_SIZE, IMAGE_SIZE))
+    out = model(x)
+    assert model(x)[0].shape == (
+        2,
+        3,
+        IMAGE_SIZE // 32,
+        IMAGE_SIZE // 32,
+        num_classes + 5,
+    )
+    assert model(x)[1].shape == (
+        2,
+        3,
+        IMAGE_SIZE // 16,
+        IMAGE_SIZE // 16,
+        num_classes + 5,
+    )
+    assert model(x)[2].shape == (
+        2,
+        3,
+        IMAGE_SIZE // 8,
+        IMAGE_SIZE // 8,
+        num_classes + 5,
+    )
+    print("Success!")

utils/common.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import numpy as np
+import random
+import matplotlib.pyplot as plt
+import torch
+import torchvision
+from torchinfo import summary
+from torch_lr_finder import LRFinder
+def find_lr(model, optimizer, criterion, device, trainloader, numiter, startlr, endlr):
+    lr_finder = LRFinder(
+        model=model, optimizer=optimizer, criterion=criterion, device=device
+    )
+    lr_finder.range_test(
+        train_loader=trainloader,
+        start_lr=startlr,
+        end_lr=endlr,
+        num_iter=numiter,
+        step_mode="exp",
+    )
+    lr_finder.plot()
+    lr_finder.reset()
+def one_cycle_lr(optimizer, maxlr, steps, epochs):
+    scheduler = torch.optim.lr_scheduler.OneCycleLR(
+        optimizer=optimizer,
+        max_lr=maxlr,
+        steps_per_epoch=steps,
+        epochs=epochs,
+        pct_start=5 / epochs,
+        div_factor=100,
+        three_phase=False,
+        final_div_factor=100,
+        anneal_strategy="linear",
+    )
+    return scheduler
+def show_random_images_for_each_class(train_data, num_images_per_class=16):
+    for c, cls in enumerate(train_data.classes):
+        rand_targets = random.sample(
+            [n for n, x in enumerate(train_data.targets) if x == c],
+            k=num_images_per_class,
+        )
+        show_img_grid(np.transpose(train_data.data[rand_targets], axes=(0, 3, 1, 2)))
+        plt.title(cls)
+def show_img_grid(data):
+    try:
+        grid_img = torchvision.utils.make_grid(data.cpu().detach())
+    except:
+        data = torch.from_numpy(data)
+        grid_img = torchvision.utils.make_grid(data)
+    plt.figure(figsize=(10, 10))
+    plt.imshow(grid_img.permute(1, 2, 0))
+def show_random_images(data_loader):
+    data, target = next(iter(data_loader))
+    show_img_grid(data)
+def show_model_summary(model, batch_size):
+    summary(
+        model=model,
+        input_size=(batch_size, 3, 32, 32),
+        col_names=["input_size", "output_size", "num_params", "kernel_size"],
+        verbose=1,
+    )
+def lossacc_plots(results):
+    plt.plot(results["epoch"], results["trainloss"])
+    plt.plot(results["epoch"], results["testloss"])
+    plt.legend(["Train Loss", "Validation Loss"])
+    plt.xlabel("Epochs")
+    plt.ylabel("Loss")
+    plt.title("Loss vs Epochs")
+    plt.show()
+    plt.plot(results["epoch"], results["trainacc"])
+    plt.plot(results["epoch"], results["testacc"])
+    plt.legend(["Train Acc", "Validation Acc"])
+    plt.xlabel("Epochs")
+    plt.ylabel("Accuracy")
+    plt.title("Accuracy vs Epochs")
+    plt.show()
+def lr_plots(results, length):
+    plt.plot(range(length), results["lr"])
+    plt.xlabel("Epochs")
+    plt.ylabel("Learning Rate")
+    plt.title("Learning Rate vs Epochs")
+    plt.show()
+def get_misclassified(model, testloader, device, mis_count=10):
+    misimgs, mistgts, mispreds = [], [], []
+    with torch.no_grad():
+        for data, target in testloader:
+            data, target = data.to(device), target.to(device)
+            output = model(data)
+            pred = output.argmax(dim=1, keepdim=True)
+            misclassified = torch.argwhere(pred.squeeze() != target).squeeze()
+            for idx in misclassified:
+                if len(misimgs) >= mis_count:
+                    break
+                misimgs.append(data[idx])
+                mistgts.append(target[idx])
+                mispreds.append(pred[idx].squeeze())
+    return misimgs, mistgts, mispreds
+# def plot_misclassified(misimgs, mistgts, mispreds, classes):
+#     fig, axes = plt.subplots(len(misimgs) // 2, 2)
+#     fig.tight_layout()
+#     for ax, img, tgt, pred in zip(axes.ravel(), misimgs, mistgts, mispreds):
+#         ax.imshow((img / img.max()).permute(1, 2, 0).cpu())
+#         ax.set_title(f"{classes[tgt]} | {classes[pred]}")
+#         ax.grid(False)
+#         ax.set_axis_off()
+#     plt.show()
+def get_misclassified_data(model, device, test_loader, count):
+    """
+    Function to run the model on test set and return misclassified images
+    :param model: Network Architecture
+    :param device: CPU/GPU
+    :param test_loader: DataLoader for test set
+    """
+    # Prepare the model for evaluation i.e. drop the dropout layer
+    model.eval()
+    # List to store misclassified Images
+    misclassified_data = []
+    # Reset the gradients
+    with torch.no_grad():
+        # Extract images, labels in a batch
+        for data, target in test_loader:
+            # Migrate the data to the device
+            data, target = data.to(device), target.to(device)
+            # Extract single image, label from the batch
+            for image, label in zip(data, target):
+                # Add batch dimension to the image
+                image = image.unsqueeze(0)
+                # Get the model prediction on the image
+                output = model(image)
+                # Convert the output from one-hot encoding to a value
+                pred = output.argmax(dim=1, keepdim=True)
+                # If prediction is incorrect, append the data
+                if pred != label:
+                    misclassified_data.append((image, label, pred))
+            if len(misclassified_data) >= count:
+                        break
+    return misclassified_data[:count]
+def plot_misclassified(data, classes, size=(10, 10), rows=2, cols=5, inv_normalize=None):
+    fig = plt.figure(figsize=size)
+    number_of_samples = len(data)
+    for i in range(number_of_samples):
+        plt.subplot(rows, cols, i + 1)
+        img = data[i][0].squeeze().to('cpu')
+        if inv_normalize is not None:
+            img = inv_normalize(img)
+        plt.imshow(np.transpose(img, (1, 2, 0)))
+        plt.title(f"Label: {classes[data[i][1].item()]} \n Prediction: {classes[data[i][2].item()]}")
+        plt.xticks([])
+        plt.yticks([])

utils/data.py ADDED Viewed

	@@ -0,0 +1,294 @@

+"""
+Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
+"""
+import numpy as np
+import os
+import pandas as pd
+import torch
+import random
+from PIL import Image, ImageFile
+import lightning as L
+from torch.utils.data import Dataset, DataLoader
+import config as config
+from utils.utils import xywhn2xyxy, xyxy2xywhn
+from utils.utils import (
+    cells_to_bboxes,
+    iou_width_height as iou,
+    non_max_suppression as nms,
+    plot_image,
+)
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+class YOLODataset(Dataset):
+    def __init__(
+        self,
+        csv_file,
+        img_dir,
+        label_dir,
+        anchors,
+        image_size=416,
+        S=[13, 26, 52],
+        C=20,
+        transform=None,
+    ):
+        self.annotations = pd.read_csv(csv_file)
+        self.img_dir = img_dir
+        self.label_dir = label_dir
+        self.image_size = image_size
+        self.mosaic_border = [image_size // 2, image_size // 2]
+        self.transform = transform
+        self.S = S
+        self.anchors = torch.tensor(
+            anchors[0] + anchors[1] + anchors[2]
+        )  # for all 3 scales
+        self.num_anchors = self.anchors.shape[0]
+        self.num_anchors_per_scale = self.num_anchors // 3
+        self.C = C
+        self.ignore_iou_thresh = 0.5
+    def __len__(self):
+        return len(self.annotations)
+    def load_mosaic(self, index):
+        # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
+        labels4 = []
+        s = self.image_size
+        yc, xc = (
+            int(random.uniform(x, 2 * s - x)) for x in self.mosaic_border
+        )  # mosaic center x, y
+        indices = [index] + random.choices(
+            range(len(self)), k=3
+        )  # 3 additional image indices
+        random.shuffle(indices)
+        for i, index in enumerate(indices):
+            # Load image
+            label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
+            bboxes = np.roll(
+                np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
+            ).tolist()
+            img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
+            img = np.array(Image.open(img_path).convert("RGB"))
+            h, w = img.shape[0], img.shape[1]
+            labels = np.array(bboxes)
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full(
+                    (s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8
+                )  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = (
+                    max(xc - w, 0),
+                    max(yc - h, 0),
+                    xc,
+                    yc,
+                )  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = (
+                    w - (x2a - x1a),
+                    h - (y2a - y1a),
+                    w,
+                    h,
+                )  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+            # Labels
+            if labels.size:
+                labels[:, :-1] = xywhn2xyxy(
+                    labels[:, :-1], w, h, padw, padh
+                )  # normalized xywh to pixel xyxy format
+            labels4.append(labels)
+        # Concat/clip labels
+        labels4 = np.concatenate(labels4, 0)
+        for x in (labels4[:, :-1],):
+            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+        # img4, labels4 = replicate(img4, labels4)  # replicate
+        labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s)
+        labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1)
+        labels4 = labels4[labels4[:, 2] > 0]
+        labels4 = labels4[labels4[:, 3] > 0]
+        return img4, labels4
+    def __getitem__(self, index):
+        if random.random() >= config.P_MOSAIC:
+            image, bboxes = self.load_mosaic(index)
+        else:
+            label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
+            bboxes = np.roll(
+                np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
+            ).tolist()
+            img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
+            image = np.array(Image.open(img_path).convert("RGB"))
+        if self.transform:
+            augmentations = self.transform(image=image, bboxes=bboxes)
+            image = augmentations["image"]
+            bboxes = augmentations["bboxes"]
+        # Below assumes 3 scale predictions (as paper) and same num of anchors per scale
+        targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
+        for box in bboxes:
+            iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
+            anchor_indices = iou_anchors.argsort(descending=True, dim=0)
+            x, y, width, height, class_label = box
+            has_anchor = [False] * 3  # each scale should have one anchor
+            for anchor_idx in anchor_indices:
+                scale_idx = anchor_idx // self.num_anchors_per_scale
+                anchor_on_scale = anchor_idx % self.num_anchors_per_scale
+                S = self.S[scale_idx]
+                i, j = int(S * y), int(S * x)  # which cell
+                anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
+                if not anchor_taken and not has_anchor[scale_idx]:
+                    targets[scale_idx][anchor_on_scale, i, j, 0] = 1
+                    x_cell, y_cell = S * x - j, S * y - i  # both between [0,1]
+                    width_cell, height_cell = (
+                        width * S,
+                        height * S,
+                    )  # can be greater than 1 since it's relative to cell
+                    box_coordinates = torch.tensor(
+                        [x_cell, y_cell, width_cell, height_cell]
+                    )
+                    targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
+                    targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
+                    has_anchor[scale_idx] = True
+                elif (
+                    not anchor_taken
+                    and iou_anchors[anchor_idx] > self.ignore_iou_thresh
+                ):
+                    targets[scale_idx][
+                        anchor_on_scale, i, j, 0
+                    ] = -1  # ignore prediction
+        return image, tuple(targets)
+def test():
+    anchors = config.ANCHORS
+    transform = config.test_transforms
+    dataset = YOLODataset(
+        "COCO/train.csv",
+        "COCO/images/images/",
+        "COCO/labels/labels_new/",
+        S=[13, 26, 52],
+        anchors=anchors,
+        transform=transform,
+    )
+    S = [13, 26, 52]
+    scaled_anchors = torch.tensor(anchors) / (
+        1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
+    )
+    loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
+    for x, y in loader:
+        boxes = []
+        for i in range(y[0].shape[1]):
+            anchor = scaled_anchors[i]
+            print(anchor.shape)
+            print(y[i].shape)
+            boxes += cells_to_bboxes(
+                y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
+            )[0]
+        boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
+        print(boxes)
+        plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
+class PascalDataModule(L.LightningDataModule):
+    def __init__(
+        self,
+        train_csv_path=None,
+        test_csv_path=None,
+        batch_size=512,
+        shuffle=True,
+        num_workers=4,
+    ) -> None:
+        super().__init__()
+        self.train_csv_path = train_csv_path
+        self.test_csv_path = test_csv_path
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.num_workers = num_workers
+        self.IMAGE_SIZE = config.IMAGE_SIZE
+    def prepare_data(self) -> None:
+        pass
+    def setup(self, stage=None):
+        self.train_dataset = YOLODataset(
+            self.train_csv_path,
+            transform=config.train_transforms,
+            S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
+            img_dir=config.IMG_DIR,
+            label_dir=config.LABEL_DIR,
+            anchors=config.ANCHORS,
+        )
+        self.val_dataset = YOLODataset(
+            self.test_csv_path,
+            transform=config.test_transforms,
+            S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
+            img_dir=config.IMG_DIR,
+            label_dir=config.LABEL_DIR,
+            anchors=config.ANCHORS,
+        )
+        self.test_dataset = YOLODataset(
+            self.test_csv_path,
+            transform=config.test_transforms,
+            S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
+            img_dir=config.IMG_DIR,
+            label_dir=config.LABEL_DIR,
+            anchors=config.ANCHORS,
+        )
+    def train_dataloader(self):
+        return DataLoader(
+            dataset=self.train_dataset,
+            batch_size=config.BATCH_SIZE,
+            num_workers=config.NUM_WORKERS,
+            pin_memory=config.PIN_MEMORY,
+            shuffle=True,
+            drop_last=False,
+        )
+    def val_dataloader(self):
+        return DataLoader(
+            dataset=self.val_dataset,
+            batch_size=config.BATCH_SIZE,
+            num_workers=config.NUM_WORKERS,
+            pin_memory=config.PIN_MEMORY,
+            shuffle=False,
+            drop_last=False,
+        )
+    def test_dataloader(self):
+        return DataLoader(
+            dataset=self.test_dataset,
+            batch_size=config.BATCH_SIZE,
+            num_workers=config.NUM_WORKERS,
+            pin_memory=config.PIN_MEMORY,
+            shuffle=False,
+            drop_last=False,
+        )

utils/gradcam.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import numpy as np
+from pytorch_grad_cam import GradCAM
+from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
+from pytorch_grad_cam.utils.image import show_cam_on_image
+import matplotlib.pyplot as plt
+def generate_gradcam(model, target_layers, images, labels, rgb_imgs):
+    results = []
+    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True)
+    for image, label, np_image in zip(images, labels, rgb_imgs):
+        targets = [ClassifierOutputTarget(label.item())]
+        # You can also pass aug_smooth=True and eigen_smooth=True, to apply smoothing.
+        grayscale_cam = cam(
+            input_tensor=image.unsqueeze(0), targets=targets, aug_smooth=True
+        )
+        # In this example grayscale_cam has only one image in the batch:
+        grayscale_cam = grayscale_cam[0, :]
+        visualization = show_cam_on_image(
+            np_image / np_image.max(), grayscale_cam, use_rgb=True
+        )
+        results.append(visualization)
+    return results
+def visualize_gradcam(misimgs, mistgts, mispreds, classes):
+    fig, axes = plt.subplots(len(misimgs) // 2, 2)
+    fig.tight_layout()
+    for ax, img, tgt, pred in zip(axes.ravel(), misimgs, mistgts, mispreds):
+        ax.imshow(img)
+        ax.set_title(f"{classes[tgt]} | {classes[pred]}")
+        ax.grid(False)
+        ax.set_axis_off()
+    plt.show()
+def plot_gradcam(model, data, classes, target_layers, number_of_samples, inv_normalize=None, targets=None, transparency = 0.60, figsize=(10,10), rows=2, cols=5):
+    fig = plt.figure(figsize=figsize)
+    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True)
+    for i in range(number_of_samples):
+            plt.subplot(rows, cols, i + 1)
+            input_tensor = data[i][0]
+            # Get the activations of the layer for the images
+            grayscale_cam = cam(input_tensor=input_tensor, targets=targets)
+            grayscale_cam = grayscale_cam[0, :]
+            # Get back the original image
+            img = input_tensor.squeeze(0).to('cpu')
+            if inv_normalize is not None:
+                img = inv_normalize(img)
+            rgb_img = np.transpose(img, (1, 2, 0))
+            rgb_img = rgb_img.numpy()
+            # Mix the activations on the original image
+            visualization = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency)
+            # Display the images on the plot
+            plt.imshow(visualization)
+            plt.title(f"Label: {classes[data[i][1].item()]} \n Prediction: {classes[data[i][2].item()]}")
+            plt.xticks([])
+            plt.yticks([])

utils/loss.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""
+Implementation of Yolo Loss Function similar to the one in Yolov3 paper,
+the difference from what I can tell is I use CrossEntropy for the classes
+instead of BinaryCrossEntropy.
+"""
+import random
+import torch
+import torch.nn as nn
+from utils.utils import intersection_over_union
+class YoloLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.mse = nn.MSELoss()
+        self.bce = nn.BCEWithLogitsLoss()
+        self.entropy = nn.CrossEntropyLoss()
+        self.sigmoid = nn.Sigmoid()
+        # Constants signifying how much to pay for each respective part of the loss
+        self.lambda_class = 1
+        self.lambda_noobj = 10
+        self.lambda_obj = 1
+        self.lambda_box = 10
+    def forward(self, predictions, target, anchors):
+        # Check where obj and noobj (we ignore if target == -1)
+        obj = target[..., 0] == 1  # in paper this is Iobj_i
+        noobj = target[..., 0] == 0  # in paper this is Inoobj_i
+        # ======================= #
+        #   FOR NO OBJECT LOSS    #
+        # ======================= #
+        no_object_loss = self.bce(
+            (predictions[..., 0:1][noobj]),
+            (target[..., 0:1][noobj]),
+        )
+        # ==================== #
+        #   FOR OBJECT LOSS    #
+        # ==================== #
+        anchors = anchors.reshape(1, 3, 1, 1, 2)
+        box_preds = torch.cat(
+            [
+                self.sigmoid(predictions[..., 1:3]),
+                torch.exp(predictions[..., 3:5]) * anchors,
+            ],
+            dim=-1,
+        )
+        ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj]).detach()
+        # ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj])
+        object_loss = self.mse(
+            self.sigmoid(predictions[..., 0:1][obj]), ious * target[..., 0:1][obj]
+        )
+        # ======================== #
+        #   FOR BOX COORDINATES    #
+        # ======================== #
+        predictions[..., 1:3] = self.sigmoid(predictions[..., 1:3])  # x,y coordinates
+        target[..., 3:5] = torch.log(
+            (1e-16 + target[..., 3:5] / anchors)
+        )  # width, height coordinates
+        box_loss = self.mse(predictions[..., 1:5][obj], target[..., 1:5][obj])
+        # ================== #
+        #   FOR CLASS LOSS   #
+        # ================== #
+        class_loss = self.entropy(
+            (predictions[..., 5:][obj]),
+            (target[..., 5][obj].long()),
+        )
+        # print("__________________________________")
+        # print(self.lambda_box * box_loss)
+        # print(self.lambda_obj * object_loss)
+        # print(self.lambda_noobj * no_object_loss)
+        # print(self.lambda_class * class_loss)
+        # print("\n")
+        return (
+            self.lambda_box * box_loss
+            + self.lambda_obj * object_loss
+            + self.lambda_noobj * no_object_loss
+            + self.lambda_class * class_loss
+        )

utils/utils.py ADDED Viewed

	@@ -0,0 +1,594 @@

+import config
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import numpy as np
+import os
+import random
+import torch
+from collections import Counter
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+def iou_width_height(boxes1, boxes2):
+    """
+    Parameters:
+        boxes1 (tensor): width and height of the first bounding boxes
+        boxes2 (tensor): width and height of the second bounding boxes
+    Returns:
+        tensor: Intersection over union of the corresponding boxes
+    """
+    intersection = torch.min(boxes1[..., 0], boxes2[..., 0]) * torch.min(
+        boxes1[..., 1], boxes2[..., 1]
+    )
+    union = (
+        boxes1[..., 0] * boxes1[..., 1] + boxes2[..., 0] * boxes2[..., 1] - intersection
+    )
+    return intersection / union
+def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
+    """
+    Video explanation of this function:
+    https://youtu.be/XXYG5ZWtjj0
+    This function calculates intersection over union (iou) given pred boxes
+    and target boxes.
+    Parameters:
+        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
+        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
+        box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
+    Returns:
+        tensor: Intersection over union for all examples
+    """
+    if box_format == "midpoint":
+        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
+        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
+        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
+        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
+        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
+        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
+        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
+        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
+    if box_format == "corners":
+        box1_x1 = boxes_preds[..., 0:1]
+        box1_y1 = boxes_preds[..., 1:2]
+        box1_x2 = boxes_preds[..., 2:3]
+        box1_y2 = boxes_preds[..., 3:4]
+        box2_x1 = boxes_labels[..., 0:1]
+        box2_y1 = boxes_labels[..., 1:2]
+        box2_x2 = boxes_labels[..., 2:3]
+        box2_y2 = boxes_labels[..., 3:4]
+    x1 = torch.max(box1_x1, box2_x1)
+    y1 = torch.max(box1_y1, box2_y1)
+    x2 = torch.min(box1_x2, box2_x2)
+    y2 = torch.min(box1_y2, box2_y2)
+    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
+    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
+    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
+    return intersection / (box1_area + box2_area - intersection + 1e-6)
+def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
+    """
+    Video explanation of this function:
+    https://youtu.be/YDkjWEN8jNA
+    Does Non Max Suppression given bboxes
+    Parameters:
+        bboxes (list): list of lists containing all bboxes with each bboxes
+        specified as [class_pred, prob_score, x1, y1, x2, y2]
+        iou_threshold (float): threshold where predicted bboxes is correct
+        threshold (float): threshold to remove predicted bboxes (independent of IoU)
+        box_format (str): "midpoint" or "corners" used to specify bboxes
+    Returns:
+        list: bboxes after performing NMS given a specific IoU threshold
+    """
+    assert type(bboxes) == list
+    bboxes = [box for box in bboxes if box[1] > threshold]
+    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
+    bboxes_after_nms = []
+    while bboxes:
+        chosen_box = bboxes.pop(0)
+        bboxes = [
+            box
+            for box in bboxes
+            if box[0] != chosen_box[0]
+            or intersection_over_union(
+                torch.tensor(chosen_box[2:]),
+                torch.tensor(box[2:]),
+                box_format=box_format,
+            )
+            < iou_threshold
+        ]
+        bboxes_after_nms.append(chosen_box)
+    return bboxes_after_nms
+def mean_average_precision(
+    pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
+):
+    """
+    Video explanation of this function:
+    https://youtu.be/FppOzcDvaDI
+    This function calculates mean average precision (mAP)
+    Parameters:
+        pred_boxes (list): list of lists containing all bboxes with each bboxes
+        specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
+        true_boxes (list): Similar as pred_boxes except all the correct ones
+        iou_threshold (float): threshold where predicted bboxes is correct
+        box_format (str): "midpoint" or "corners" used to specify bboxes
+        num_classes (int): number of classes
+    Returns:
+        float: mAP value across all classes given a specific IoU threshold
+    """
+    # list storing all AP for respective classes
+    average_precisions = []
+    # used for numerical stability later on
+    epsilon = 1e-6
+    for c in range(num_classes):
+        detections = []
+        ground_truths = []
+        # Go through all predictions and targets,
+        # and only add the ones that belong to the
+        # current class c
+        for detection in pred_boxes:
+            if detection[1] == c:
+                detections.append(detection)
+        for true_box in true_boxes:
+            if true_box[1] == c:
+                ground_truths.append(true_box)
+        # find the amount of bboxes for each training example
+        # Counter here finds how many ground truth bboxes we get
+        # for each training example, so let's say img 0 has 3,
+        # img 1 has 5 then we will obtain a dictionary with:
+        # amount_bboxes = {0:3, 1:5}
+        amount_bboxes = Counter([gt[0] for gt in ground_truths])
+        # We then go through each key, val in this dictionary
+        # and convert to the following (w.r.t same example):
+        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
+        for key, val in amount_bboxes.items():
+            amount_bboxes[key] = torch.zeros(val)
+        # sort by box probabilities which is index 2
+        detections.sort(key=lambda x: x[2], reverse=True)
+        TP = torch.zeros((len(detections)))
+        FP = torch.zeros((len(detections)))
+        total_true_bboxes = len(ground_truths)
+        # If none exists for this class then we can safely skip
+        if total_true_bboxes == 0:
+            continue
+        for detection_idx, detection in enumerate(detections):
+            # Only take out the ground_truths that have the same
+            # training idx as detection
+            ground_truth_img = [
+                bbox for bbox in ground_truths if bbox[0] == detection[0]
+            ]
+            num_gts = len(ground_truth_img)
+            best_iou = 0
+            for idx, gt in enumerate(ground_truth_img):
+                iou = intersection_over_union(
+                    torch.tensor(detection[3:]),
+                    torch.tensor(gt[3:]),
+                    box_format=box_format,
+                )
+                if iou > best_iou:
+                    best_iou = iou
+                    best_gt_idx = idx
+            if best_iou > iou_threshold:
+                # only detect ground truth detection once
+                if amount_bboxes[detection[0]][best_gt_idx] == 0:
+                    # true positive and add this bounding box to seen
+                    TP[detection_idx] = 1
+                    amount_bboxes[detection[0]][best_gt_idx] = 1
+                else:
+                    FP[detection_idx] = 1
+            # if IOU is lower then the detection is a false positive
+            else:
+                FP[detection_idx] = 1
+        TP_cumsum = torch.cumsum(TP, dim=0)
+        FP_cumsum = torch.cumsum(FP, dim=0)
+        recalls = TP_cumsum / (total_true_bboxes + epsilon)
+        precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
+        precisions = torch.cat((torch.tensor([1]), precisions))
+        recalls = torch.cat((torch.tensor([0]), recalls))
+        # torch.trapz for numerical integration
+        average_precisions.append(torch.trapz(precisions, recalls))
+    return sum(average_precisions) / len(average_precisions)
+def plot_image(image, boxes):
+    """Plots predicted bounding boxes on the image"""
+    cmap = plt.get_cmap("tab20b")
+    class_labels = (
+        config.COCO_LABELS if config.DATASET == "COCO" else config.PASCAL_CLASSES
+    )
+    colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
+    im = np.array(image)
+    height, width, _ = im.shape
+    # Create figure and axes
+    fig, ax = plt.subplots(1)
+    # Display the image
+    ax.imshow(im)
+    # box[0] is x midpoint, box[2] is width
+    # box[1] is y midpoint, box[3] is height
+    # Create a Rectangle patch
+    for box in boxes:
+        assert (
+            len(box) == 6
+        ), "box should contain class pred, confidence, x, y, width, height"
+        class_pred = box[0]
+        box = box[2:]
+        upper_left_x = box[0] - box[2] / 2
+        upper_left_y = box[1] - box[3] / 2
+        rect = patches.Rectangle(
+            (upper_left_x * width, upper_left_y * height),
+            box[2] * width,
+            box[3] * height,
+            linewidth=2,
+            edgecolor=colors[int(class_pred)],
+            facecolor="none",
+        )
+        # Add the patch to the Axes
+        ax.add_patch(rect)
+        plt.text(
+            upper_left_x * width,
+            upper_left_y * height,
+            s=class_labels[int(class_pred)],
+            color="white",
+            verticalalignment="top",
+            bbox={"color": colors[int(class_pred)], "pad": 0},
+        )
+    plt.show()
+def get_evaluation_bboxes(
+    loader,
+    model,
+    iou_threshold,
+    anchors,
+    threshold,
+    box_format="midpoint",
+    device="cuda",
+):
+    # make sure model is in eval before get bboxes
+    model.eval()
+    train_idx = 0
+    all_pred_boxes = []
+    all_true_boxes = []
+    for batch_idx, (x, labels) in enumerate(tqdm(loader)):
+        x = x.to(device)
+        with torch.no_grad():
+            predictions = model(x)
+        batch_size = x.shape[0]
+        bboxes = [[] for _ in range(batch_size)]
+        for i in range(3):
+            S = predictions[i].shape[2]
+            anchor = torch.tensor([*anchors[i]]).to(device) * S
+            boxes_scale_i = cells_to_bboxes(predictions[i], anchor, S=S, is_preds=True)
+            for idx, (box) in enumerate(boxes_scale_i):
+                bboxes[idx] += box
+        # we just want one bbox for each label, not one for each scale
+        true_bboxes = cells_to_bboxes(labels[2], anchor, S=S, is_preds=False)
+        for idx in range(batch_size):
+            nms_boxes = non_max_suppression(
+                bboxes[idx],
+                iou_threshold=iou_threshold,
+                threshold=threshold,
+                box_format=box_format,
+            )
+            for nms_box in nms_boxes:
+                all_pred_boxes.append([train_idx] + nms_box)
+            for box in true_bboxes[idx]:
+                if box[1] > threshold:
+                    all_true_boxes.append([train_idx] + box)
+            train_idx += 1
+    model.train()
+    return all_pred_boxes, all_true_boxes
+def cells_to_bboxes(predictions, anchors, S, is_preds=True):
+    """
+    Scales the predictions coming from the model to
+    be relative to the entire image such that they for example later
+    can be plotted or.
+    INPUT:
+    predictions: tensor of size (N, 3, S, S, num_classes+5)
+    anchors: the anchors used for the predictions
+    S: the number of cells the image is divided in on the width (and height)
+    is_preds: whether the input is predictions or the true bounding boxes
+    OUTPUT:
+    converted_bboxes: the converted boxes of sizes (N, num_anchors, S, S, 1+5) with class index,
+                      object score, bounding box coordinates
+    """
+    BATCH_SIZE = predictions.shape[0]
+    num_anchors = len(anchors)
+    box_predictions = predictions[..., 1:5]
+    if is_preds:
+        anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
+        box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2])
+        box_predictions[..., 2:] = torch.exp(box_predictions[..., 2:]) * anchors
+        scores = torch.sigmoid(predictions[..., 0:1])
+        best_class = torch.argmax(predictions[..., 5:], dim=-1).unsqueeze(-1)
+    else:
+        scores = predictions[..., 0:1]
+        best_class = predictions[..., 5:6]
+    cell_indices = (
+        torch.arange(S)
+        .repeat(predictions.shape[0], 3, S, 1)
+        .unsqueeze(-1)
+        .to(predictions.device)
+    )
+    x = 1 / S * (box_predictions[..., 0:1] + cell_indices)
+    y = 1 / S * (box_predictions[..., 1:2] + cell_indices.permute(0, 1, 3, 2, 4))
+    w_h = 1 / S * box_predictions[..., 2:4]
+    converted_bboxes = torch.cat((best_class, scores, x, y, w_h), dim=-1).reshape(
+        BATCH_SIZE, num_anchors * S * S, 6
+    )
+    return converted_bboxes.tolist()
+def check_class_accuracy(model, loader, threshold):
+    model.eval()
+    tot_class_preds, correct_class = 0, 0
+    tot_noobj, correct_noobj = 0, 0
+    tot_obj, correct_obj = 0, 0
+    for idx, (x, y) in enumerate(tqdm(loader)):
+        x = x.to(config.DEVICE)
+        with torch.no_grad():
+            out = model(x)
+        for i in range(3):
+            y[i] = y[i].to(config.DEVICE)
+            obj = y[i][..., 0] == 1  # in paper this is Iobj_i
+            noobj = y[i][..., 0] == 0  # in paper this is Iobj_i
+            correct_class += torch.sum(
+                torch.argmax(out[i][..., 5:][obj], dim=-1) == y[i][..., 5][obj]
+            )
+            tot_class_preds += torch.sum(obj)
+            obj_preds = torch.sigmoid(out[i][..., 0]) > threshold
+            correct_obj += torch.sum(obj_preds[obj] == y[i][..., 0][obj])
+            tot_obj += torch.sum(obj)
+            correct_noobj += torch.sum(obj_preds[noobj] == y[i][..., 0][noobj])
+            tot_noobj += torch.sum(noobj)
+    class_acc = (correct_class / (tot_class_preds + 1e-16)) * 100
+    noobj_acc = (correct_noobj / (tot_noobj + 1e-16)) * 100
+    obj_acc = (correct_obj / (tot_obj + 1e-16)) * 100
+    print(f"Class accuracy is: {class_acc:2f}%")
+    print(f"No obj accuracy is: {noobj_acc:2f}%")
+    print(f"Obj accuracy is: {obj_acc:2f}%")
+    model.train()
+    return class_acc, noobj_acc, obj_acc
+def get_mean_std(loader):
+    # var[X] = E[X**2] - E[X]**2
+    channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0
+    for data, _ in tqdm(loader):
+        channels_sum += torch.mean(data, dim=[0, 2, 3])
+        channels_sqrd_sum += torch.mean(data**2, dim=[0, 2, 3])
+        num_batches += 1
+    mean = channels_sum / num_batches
+    std = (channels_sqrd_sum / num_batches - mean**2) ** 0.5
+    return mean, std
+def save_checkpoint(model, optimizer, filename="my_checkpoint.pth.tar"):
+    print("=> Saving checkpoint")
+    checkpoint = {
+        "state_dict": model.state_dict(),
+        "optimizer": optimizer.state_dict(),
+    }
+    torch.save(checkpoint, filename)
+def load_checkpoint(checkpoint_file, model, optimizer, lr):
+    print("=> Loading checkpoint")
+    checkpoint = torch.load(checkpoint_file, map_location=config.DEVICE)
+    model.load_state_dict(checkpoint["state_dict"])
+    optimizer.load_state_dict(checkpoint["optimizer"])
+    # If we don't do this then it will just have learning rate of old checkpoint
+    # and it will lead to many hours of debugging \:
+    for param_group in optimizer.param_groups:
+        param_group["lr"] = lr
+def get_loaders(train_csv_path, test_csv_path):
+    from dataset import YOLODataset
+    IMAGE_SIZE = config.IMAGE_SIZE
+    train_dataset = YOLODataset(
+        train_csv_path,
+        transform=config.train_transforms,
+        S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
+        img_dir=config.IMG_DIR,
+        label_dir=config.LABEL_DIR,
+        anchors=config.ANCHORS,
+    )
+    test_dataset = YOLODataset(
+        test_csv_path,
+        transform=config.test_transforms,
+        S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
+        img_dir=config.IMG_DIR,
+        label_dir=config.LABEL_DIR,
+        anchors=config.ANCHORS,
+    )
+    train_loader = DataLoader(
+        dataset=train_dataset,
+        batch_size=config.BATCH_SIZE,
+        num_workers=config.NUM_WORKERS,
+        pin_memory=config.PIN_MEMORY,
+        shuffle=True,
+        drop_last=False,
+    )
+    test_loader = DataLoader(
+        dataset=test_dataset,
+        batch_size=config.BATCH_SIZE,
+        num_workers=config.NUM_WORKERS,
+        pin_memory=config.PIN_MEMORY,
+        shuffle=False,
+        drop_last=False,
+    )
+    train_eval_dataset = YOLODataset(
+        train_csv_path,
+        transform=config.test_transforms,
+        S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
+        img_dir=config.IMG_DIR,
+        label_dir=config.LABEL_DIR,
+        anchors=config.ANCHORS,
+    )
+    train_eval_loader = DataLoader(
+        dataset=train_eval_dataset,
+        batch_size=config.BATCH_SIZE,
+        num_workers=config.NUM_WORKERS,
+        pin_memory=config.PIN_MEMORY,
+        shuffle=False,
+        drop_last=False,
+    )
+    return train_loader, test_loader, train_eval_loader
+def plot_couple_examples(model, loader, thresh, iou_thresh, anchors):
+    model.eval()
+    x, y = next(iter(loader))
+    x = x.to("cuda")
+    with torch.no_grad():
+        out = model(x)
+        bboxes = [[] for _ in range(x.shape[0])]
+        for i in range(3):
+            batch_size, A, S, _, _ = out[i].shape
+            anchor = anchors[i]
+            boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
+            for idx, (box) in enumerate(boxes_scale_i):
+                bboxes[idx] += box
+        model.train()
+    for i in range(batch_size // 4):
+        nms_boxes = non_max_suppression(
+            bboxes[i],
+            iou_threshold=iou_thresh,
+            threshold=thresh,
+            box_format="midpoint",
+        )
+        plot_image(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes)
+def seed_everything(seed=42):
+    os.environ["PYTHONHASHSEED"] = str(seed)
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def clip_coords(boxes, img_shape):
+    # Clip bounding xyxy bounding boxes to image shape (height, width)
+    boxes[:, 0].clamp_(0, img_shape[1])  # x1
+    boxes[:, 1].clamp_(0, img_shape[0])  # y1
+    boxes[:, 2].clamp_(0, img_shape[1])  # x2
+    boxes[:, 3].clamp_(0, img_shape[0])  # y2
+def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
+    # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw  # top left x
+    y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh  # top left y
+    y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw  # bottom right x
+    y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh  # bottom right y
+    return y
+def xyn2xy(x, w=640, h=640, padw=0, padh=0):
+    # Convert normalized segments into pixel segments, shape (n,2)
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[..., 0] = w * x[..., 0] + padw  # top left x
+    y[..., 1] = h * x[..., 1] + padh  # top left y
+    return y
+def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
+    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
+    if clip:
+        clip_boxes(x, (h - eps, w - eps))  # warning: inplace clip
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w  # x center
+    y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h  # y center
+    y[..., 2] = (x[..., 2] - x[..., 0]) / w  # width
+    y[..., 3] = (x[..., 3] - x[..., 1]) / h  # height
+    return y
+def clip_boxes(boxes, shape):
+    # Clip boxes (xyxy) to image shape (height, width)
+    if isinstance(boxes, torch.Tensor):  # faster individually
+        boxes[..., 0].clamp_(0, shape[1])  # x1
+        boxes[..., 1].clamp_(0, shape[0])  # y1
+        boxes[..., 2].clamp_(0, shape[1])  # x2
+        boxes[..., 3].clamp_(0, shape[0])  # y2
+    else:  # np.array (faster grouped)
+        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
+        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2

utils/utils/common.py ADDED Viewed

	@@ -0,0 +1,185 @@

+import numpy as np
+import random
+import matplotlib.pyplot as plt
+import torch
+import torchvision
+from torchinfo import summary
+from torch_lr_finder import LRFinder
+def find_lr(model, optimizer, criterion, device, trainloader, numiter, startlr, endlr):
+    lr_finder = LRFinder(
+        model=model, optimizer=optimizer, criterion=criterion, device=device
+    )
+    lr_finder.range_test(
+        train_loader=trainloader,
+        start_lr=startlr,
+        end_lr=endlr,
+        num_iter=numiter,
+        step_mode="exp",
+    )
+    lr_finder.plot()
+    lr_finder.reset()
+def one_cycle_lr(optimizer, maxlr, steps, epochs):
+    scheduler = torch.optim.lr_scheduler.OneCycleLR(
+        optimizer=optimizer,
+        max_lr=maxlr,
+        steps_per_epoch=steps,
+        epochs=epochs,
+        pct_start=5 / epochs,
+        div_factor=100,
+        three_phase=False,
+        final_div_factor=100,
+        anneal_strategy="linear",
+    )
+    return scheduler
+def show_random_images_for_each_class(train_data, num_images_per_class=16):
+    for c, cls in enumerate(train_data.classes):
+        rand_targets = random.sample(
+            [n for n, x in enumerate(train_data.targets) if x == c],
+            k=num_images_per_class,
+        )
+        show_img_grid(np.transpose(train_data.data[rand_targets], axes=(0, 3, 1, 2)))
+        plt.title(cls)
+def show_img_grid(data):
+    try:
+        grid_img = torchvision.utils.make_grid(data.cpu().detach())
+    except:
+        data = torch.from_numpy(data)
+        grid_img = torchvision.utils.make_grid(data)
+    plt.figure(figsize=(10, 10))
+    plt.imshow(grid_img.permute(1, 2, 0))
+def show_random_images(data_loader):
+    data, target = next(iter(data_loader))
+    show_img_grid(data)
+def show_model_summary(model, batch_size):
+    summary(
+        model=model,
+        input_size=(batch_size, 3, 32, 32),
+        col_names=["input_size", "output_size", "num_params", "kernel_size"],
+        verbose=1,
+    )
+def lossacc_plots(results):
+    plt.plot(results["epoch"], results["trainloss"])
+    plt.plot(results["epoch"], results["testloss"])
+    plt.legend(["Train Loss", "Validation Loss"])
+    plt.xlabel("Epochs")
+    plt.ylabel("Loss")
+    plt.title("Loss vs Epochs")
+    plt.show()
+    plt.plot(results["epoch"], results["trainacc"])
+    plt.plot(results["epoch"], results["testacc"])
+    plt.legend(["Train Acc", "Validation Acc"])
+    plt.xlabel("Epochs")
+    plt.ylabel("Accuracy")
+    plt.title("Accuracy vs Epochs")
+    plt.show()
+def lr_plots(results, length):
+    plt.plot(range(length), results["lr"])
+    plt.xlabel("Epochs")
+    plt.ylabel("Learning Rate")
+    plt.title("Learning Rate vs Epochs")
+    plt.show()
+def get_misclassified(model, testloader, device, mis_count=10):
+    misimgs, mistgts, mispreds = [], [], []
+    with torch.no_grad():
+        for data, target in testloader:
+            data, target = data.to(device), target.to(device)
+            output = model(data)
+            pred = output.argmax(dim=1, keepdim=True)
+            misclassified = torch.argwhere(pred.squeeze() != target).squeeze()
+            for idx in misclassified:
+                if len(misimgs) >= mis_count:
+                    break
+                misimgs.append(data[idx])
+                mistgts.append(target[idx])
+                mispreds.append(pred[idx].squeeze())
+    return misimgs, mistgts, mispreds
+# def plot_misclassified(misimgs, mistgts, mispreds, classes):
+#     fig, axes = plt.subplots(len(misimgs) // 2, 2)
+#     fig.tight_layout()
+#     for ax, img, tgt, pred in zip(axes.ravel(), misimgs, mistgts, mispreds):
+#         ax.imshow((img / img.max()).permute(1, 2, 0).cpu())
+#         ax.set_title(f"{classes[tgt]} | {classes[pred]}")
+#         ax.grid(False)
+#         ax.set_axis_off()
+#     plt.show()
+def get_misclassified_data(model, device, test_loader, count):
+    """
+    Function to run the model on test set and return misclassified images
+    :param model: Network Architecture
+    :param device: CPU/GPU
+    :param test_loader: DataLoader for test set
+    """
+    # Prepare the model for evaluation i.e. drop the dropout layer
+    model.eval()
+    # List to store misclassified Images
+    misclassified_data = []
+    # Reset the gradients
+    with torch.no_grad():
+        # Extract images, labels in a batch
+        for data, target in test_loader:
+            # Migrate the data to the device
+            data, target = data.to(device), target.to(device)
+            # Extract single image, label from the batch
+            for image, label in zip(data, target):
+                # Add batch dimension to the image
+                image = image.unsqueeze(0)
+                # Get the model prediction on the image
+                output = model(image)
+                # Convert the output from one-hot encoding to a value
+                pred = output.argmax(dim=1, keepdim=True)
+                # If prediction is incorrect, append the data
+                if pred != label:
+                    misclassified_data.append((image, label, pred))
+            if len(misclassified_data) >= count:
+                        break
+    return misclassified_data[:count]
+def plot_misclassified(data, classes, size=(10, 10), rows=2, cols=5, inv_normalize=None):
+    fig = plt.figure(figsize=size)
+    number_of_samples = len(data)
+    for i in range(number_of_samples):
+        plt.subplot(rows, cols, i + 1)
+        img = data[i][0].squeeze().to('cpu')
+        if inv_normalize is not None:
+            img = inv_normalize(img)
+        plt.imshow(np.transpose(img, (1, 2, 0)))
+        plt.title(f"Label: {classes[data[i][1].item()]} \n Prediction: {classes[data[i][2].item()]}")
+        plt.xticks([])
+        plt.yticks([])

utils/utils/data.py ADDED Viewed

	@@ -0,0 +1,294 @@

+"""
+Creates a Pytorch dataset to load the Pascal VOC & MS COCO datasets
+"""
+import numpy as np
+import os
+import pandas as pd
+import torch
+import random
+from PIL import Image, ImageFile
+import lightning as L
+from torch.utils.data import Dataset, DataLoader
+import config as config
+from utils.utils import xywhn2xyxy, xyxy2xywhn
+from utils.utils import (
+    cells_to_bboxes,
+    iou_width_height as iou,
+    non_max_suppression as nms,
+    plot_image,
+)
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+class YOLODataset(Dataset):
+    def __init__(
+        self,
+        csv_file,
+        img_dir,
+        label_dir,
+        anchors,
+        image_size=416,
+        S=[13, 26, 52],
+        C=20,
+        transform=None,
+    ):
+        self.annotations = pd.read_csv(csv_file)
+        self.img_dir = img_dir
+        self.label_dir = label_dir
+        self.image_size = image_size
+        self.mosaic_border = [image_size // 2, image_size // 2]
+        self.transform = transform
+        self.S = S
+        self.anchors = torch.tensor(
+            anchors[0] + anchors[1] + anchors[2]
+        )  # for all 3 scales
+        self.num_anchors = self.anchors.shape[0]
+        self.num_anchors_per_scale = self.num_anchors // 3
+        self.C = C
+        self.ignore_iou_thresh = 0.5
+    def __len__(self):
+        return len(self.annotations)
+    def load_mosaic(self, index):
+        # YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
+        labels4 = []
+        s = self.image_size
+        yc, xc = (
+            int(random.uniform(x, 2 * s - x)) for x in self.mosaic_border
+        )  # mosaic center x, y
+        indices = [index] + random.choices(
+            range(len(self)), k=3
+        )  # 3 additional image indices
+        random.shuffle(indices)
+        for i, index in enumerate(indices):
+            # Load image
+            label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
+            bboxes = np.roll(
+                np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
+            ).tolist()
+            img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
+            img = np.array(Image.open(img_path).convert("RGB"))
+            h, w = img.shape[0], img.shape[1]
+            labels = np.array(bboxes)
+            # place img in img4
+            if i == 0:  # top left
+                img4 = np.full(
+                    (s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8
+                )  # base image with 4 tiles
+                x1a, y1a, x2a, y2a = (
+                    max(xc - w, 0),
+                    max(yc - h, 0),
+                    xc,
+                    yc,
+                )  # xmin, ymin, xmax, ymax (large image)
+                x1b, y1b, x2b, y2b = (
+                    w - (x2a - x1a),
+                    h - (y2a - y1a),
+                    w,
+                    h,
+                )  # xmin, ymin, xmax, ymax (small image)
+            elif i == 1:  # top right
+                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
+                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+            elif i == 2:  # bottom left
+                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+            elif i == 3:  # bottom right
+                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
+                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+            img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
+            padw = x1a - x1b
+            padh = y1a - y1b
+            # Labels
+            if labels.size:
+                labels[:, :-1] = xywhn2xyxy(
+                    labels[:, :-1], w, h, padw, padh
+                )  # normalized xywh to pixel xyxy format
+            labels4.append(labels)
+        # Concat/clip labels
+        labels4 = np.concatenate(labels4, 0)
+        for x in (labels4[:, :-1],):
+            np.clip(x, 0, 2 * s, out=x)  # clip when using random_perspective()
+        # img4, labels4 = replicate(img4, labels4)  # replicate
+        labels4[:, :-1] = xyxy2xywhn(labels4[:, :-1], 2 * s, 2 * s)
+        labels4[:, :-1] = np.clip(labels4[:, :-1], 0, 1)
+        labels4 = labels4[labels4[:, 2] > 0]
+        labels4 = labels4[labels4[:, 3] > 0]
+        return img4, labels4
+    def __getitem__(self, index):
+        if random.random() >= config.P_MOSAIC:
+            image, bboxes = self.load_mosaic(index)
+        else:
+            label_path = os.path.join(self.label_dir, self.annotations.iloc[index, 1])
+            bboxes = np.roll(
+                np.loadtxt(fname=label_path, delimiter=" ", ndmin=2), 4, axis=1
+            ).tolist()
+            img_path = os.path.join(self.img_dir, self.annotations.iloc[index, 0])
+            image = np.array(Image.open(img_path).convert("RGB"))
+        if self.transform:
+            augmentations = self.transform(image=image, bboxes=bboxes)
+            image = augmentations["image"]
+            bboxes = augmentations["bboxes"]
+        # Below assumes 3 scale predictions (as paper) and same num of anchors per scale
+        targets = [torch.zeros((self.num_anchors // 3, S, S, 6)) for S in self.S]
+        for box in bboxes:
+            iou_anchors = iou(torch.tensor(box[2:4]), self.anchors)
+            anchor_indices = iou_anchors.argsort(descending=True, dim=0)
+            x, y, width, height, class_label = box
+            has_anchor = [False] * 3  # each scale should have one anchor
+            for anchor_idx in anchor_indices:
+                scale_idx = anchor_idx // self.num_anchors_per_scale
+                anchor_on_scale = anchor_idx % self.num_anchors_per_scale
+                S = self.S[scale_idx]
+                i, j = int(S * y), int(S * x)  # which cell
+                anchor_taken = targets[scale_idx][anchor_on_scale, i, j, 0]
+                if not anchor_taken and not has_anchor[scale_idx]:
+                    targets[scale_idx][anchor_on_scale, i, j, 0] = 1
+                    x_cell, y_cell = S * x - j, S * y - i  # both between [0,1]
+                    width_cell, height_cell = (
+                        width * S,
+                        height * S,
+                    )  # can be greater than 1 since it's relative to cell
+                    box_coordinates = torch.tensor(
+                        [x_cell, y_cell, width_cell, height_cell]
+                    )
+                    targets[scale_idx][anchor_on_scale, i, j, 1:5] = box_coordinates
+                    targets[scale_idx][anchor_on_scale, i, j, 5] = int(class_label)
+                    has_anchor[scale_idx] = True
+                elif (
+                    not anchor_taken
+                    and iou_anchors[anchor_idx] > self.ignore_iou_thresh
+                ):
+                    targets[scale_idx][
+                        anchor_on_scale, i, j, 0
+                    ] = -1  # ignore prediction
+        return image, tuple(targets)
+def test():
+    anchors = config.ANCHORS
+    transform = config.test_transforms
+    dataset = YOLODataset(
+        "COCO/train.csv",
+        "COCO/images/images/",
+        "COCO/labels/labels_new/",
+        S=[13, 26, 52],
+        anchors=anchors,
+        transform=transform,
+    )
+    S = [13, 26, 52]
+    scaled_anchors = torch.tensor(anchors) / (
+        1 / torch.tensor(S).unsqueeze(1).unsqueeze(1).repeat(1, 3, 2)
+    )
+    loader = DataLoader(dataset=dataset, batch_size=1, shuffle=True)
+    for x, y in loader:
+        boxes = []
+        for i in range(y[0].shape[1]):
+            anchor = scaled_anchors[i]
+            print(anchor.shape)
+            print(y[i].shape)
+            boxes += cells_to_bboxes(
+                y[i], is_preds=False, S=y[i].shape[2], anchors=anchor
+            )[0]
+        boxes = nms(boxes, iou_threshold=1, threshold=0.7, box_format="midpoint")
+        print(boxes)
+        plot_image(x[0].permute(1, 2, 0).to("cpu"), boxes)
+class PascalDataModule(L.LightningDataModule):
+    def __init__(
+        self,
+        train_csv_path=None,
+        test_csv_path=None,
+        batch_size=512,
+        shuffle=True,
+        num_workers=4,
+    ) -> None:
+        super().__init__()
+        self.train_csv_path = train_csv_path
+        self.test_csv_path = test_csv_path
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.num_workers = num_workers
+        self.IMAGE_SIZE = config.IMAGE_SIZE
+    def prepare_data(self) -> None:
+        pass
+    def setup(self, stage=None):
+        self.train_dataset = YOLODataset(
+            self.train_csv_path,
+            transform=config.train_transforms,
+            S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
+            img_dir=config.IMG_DIR,
+            label_dir=config.LABEL_DIR,
+            anchors=config.ANCHORS,
+        )
+        self.val_dataset = YOLODataset(
+            self.test_csv_path,
+            transform=config.test_transforms,
+            S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
+            img_dir=config.IMG_DIR,
+            label_dir=config.LABEL_DIR,
+            anchors=config.ANCHORS,
+        )
+        self.test_dataset = YOLODataset(
+            self.test_csv_path,
+            transform=config.test_transforms,
+            S=[self.IMAGE_SIZE // 32, self.IMAGE_SIZE // 16, self.IMAGE_SIZE // 8],
+            img_dir=config.IMG_DIR,
+            label_dir=config.LABEL_DIR,
+            anchors=config.ANCHORS,
+        )
+    def train_dataloader(self):
+        return DataLoader(
+            dataset=self.train_dataset,
+            batch_size=config.BATCH_SIZE,
+            num_workers=config.NUM_WORKERS,
+            pin_memory=config.PIN_MEMORY,
+            shuffle=True,
+            drop_last=False,
+        )
+    def val_dataloader(self):
+        return DataLoader(
+            dataset=self.val_dataset,
+            batch_size=config.BATCH_SIZE,
+            num_workers=config.NUM_WORKERS,
+            pin_memory=config.PIN_MEMORY,
+            shuffle=False,
+            drop_last=False,
+        )
+    def test_dataloader(self):
+        return DataLoader(
+            dataset=self.test_dataset,
+            batch_size=config.BATCH_SIZE,
+            num_workers=config.NUM_WORKERS,
+            pin_memory=config.PIN_MEMORY,
+            shuffle=False,
+            drop_last=False,
+        )

utils/utils/gradcam.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import numpy as np
+from pytorch_grad_cam import EigenCAM
+from pytorch_grad_cam.utils.image import show_cam_on_image
+import matplotlib.pyplot as plt
+def generate_gradcam(model, target_layers, images, use_cuda=True, transparency=0.6):
+    results = []
+    targets = None
+    cam = EigenCAM(model, target_layers, use_cuda=use_cuda)
+    for image in images:
+        input_tensor = image.unsqueeze(0)
+        grayscale_cam = cam(input_tensor, targets=targets)
+        grayscale_cam = grayscale_cam[0, :]
+        img = input_tensor.squeeze(0).to("cpu")
+        rgb_img = np.transpose(img, (1, 2, 0))
+        rgb_img = rgb_img.numpy()
+        cam_image = show_cam_on_image(
+            rgb_img, grayscale_cam, use_rgb=True, image_weight=transparency
+        )
+        results.append(cam_image)
+    return results
+def visualize_gradcam(images, figsize=(10, 10), rows=2, cols=5):
+    fig = plt.figure(figsize=figsize)
+    for i in range(len(images)):
+        plt.subplot(rows, cols, i + 1)
+        plt.imshow(images[i])
+        plt.xticks([])
+        plt.yticks([])

utils/utils/loss.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""
+Implementation of Yolo Loss Function similar to the one in Yolov3 paper,
+the difference from what I can tell is I use CrossEntropy for the classes
+instead of BinaryCrossEntropy.
+"""
+import random
+import torch
+import torch.nn as nn
+from utils.utils import intersection_over_union
+class YoloLoss(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.mse = nn.MSELoss()
+        self.bce = nn.BCEWithLogitsLoss()
+        self.entropy = nn.CrossEntropyLoss()
+        self.sigmoid = nn.Sigmoid()
+        # Constants signifying how much to pay for each respective part of the loss
+        self.lambda_class = 1
+        self.lambda_noobj = 10
+        self.lambda_obj = 1
+        self.lambda_box = 10
+    def forward(self, predictions, target, anchors):
+        # Check where obj and noobj (we ignore if target == -1)
+        obj = target[..., 0] == 1  # in paper this is Iobj_i
+        noobj = target[..., 0] == 0  # in paper this is Inoobj_i
+        # ======================= #
+        #   FOR NO OBJECT LOSS    #
+        # ======================= #
+        no_object_loss = self.bce(
+            (predictions[..., 0:1][noobj]),
+            (target[..., 0:1][noobj]),
+        )
+        # ==================== #
+        #   FOR OBJECT LOSS    #
+        # ==================== #
+        anchors = anchors.reshape(1, 3, 1, 1, 2)
+        box_preds = torch.cat(
+            [
+                self.sigmoid(predictions[..., 1:3]),
+                torch.exp(predictions[..., 3:5]) * anchors,
+            ],
+            dim=-1,
+        )
+        ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj]).detach()
+        # ious = intersection_over_union(box_preds[obj], target[..., 1:5][obj])
+        object_loss = self.mse(
+            self.sigmoid(predictions[..., 0:1][obj]), ious * target[..., 0:1][obj]
+        )
+        # ======================== #
+        #   FOR BOX COORDINATES    #
+        # ======================== #
+        predictions[..., 1:3] = self.sigmoid(predictions[..., 1:3])  # x,y coordinates
+        target[..., 3:5] = torch.log(
+            (1e-16 + target[..., 3:5] / anchors)
+        )  # width, height coordinates
+        box_loss = self.mse(predictions[..., 1:5][obj], target[..., 1:5][obj])
+        # ================== #
+        #   FOR CLASS LOSS   #
+        # ================== #
+        class_loss = self.entropy(
+            (predictions[..., 5:][obj]),
+            (target[..., 5][obj].long()),
+        )
+        # print("__________________________________")
+        # print(self.lambda_box * box_loss)
+        # print(self.lambda_obj * object_loss)
+        # print(self.lambda_noobj * no_object_loss)
+        # print(self.lambda_class * class_loss)
+        # print("\n")
+        return (
+            self.lambda_box * box_loss
+            + self.lambda_obj * object_loss
+            + self.lambda_noobj * no_object_loss
+            + self.lambda_class * class_loss
+        )

utils/utils/utils.py ADDED Viewed

	@@ -0,0 +1,668 @@

+import config
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import numpy as np
+import os
+import random
+import torch
+from collections import Counter
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+def iou_width_height(boxes1, boxes2):
+    """
+    Parameters:
+        boxes1 (tensor): width and height of the first bounding boxes
+        boxes2 (tensor): width and height of the second bounding boxes
+    Returns:
+        tensor: Intersection over union of the corresponding boxes
+    """
+    intersection = torch.min(boxes1[..., 0], boxes2[..., 0]) * torch.min(
+        boxes1[..., 1], boxes2[..., 1]
+    )
+    union = (
+        boxes1[..., 0] * boxes1[..., 1] + boxes2[..., 0] * boxes2[..., 1] - intersection
+    )
+    return intersection / union
+def intersection_over_union(boxes_preds, boxes_labels, box_format="midpoint"):
+    """
+    Video explanation of this function:
+    https://youtu.be/XXYG5ZWtjj0
+    This function calculates intersection over union (iou) given pred boxes
+    and target boxes.
+    Parameters:
+        boxes_preds (tensor): Predictions of Bounding Boxes (BATCH_SIZE, 4)
+        boxes_labels (tensor): Correct labels of Bounding Boxes (BATCH_SIZE, 4)
+        box_format (str): midpoint/corners, if boxes (x,y,w,h) or (x1,y1,x2,y2)
+    Returns:
+        tensor: Intersection over union for all examples
+    """
+    if box_format == "midpoint":
+        box1_x1 = boxes_preds[..., 0:1] - boxes_preds[..., 2:3] / 2
+        box1_y1 = boxes_preds[..., 1:2] - boxes_preds[..., 3:4] / 2
+        box1_x2 = boxes_preds[..., 0:1] + boxes_preds[..., 2:3] / 2
+        box1_y2 = boxes_preds[..., 1:2] + boxes_preds[..., 3:4] / 2
+        box2_x1 = boxes_labels[..., 0:1] - boxes_labels[..., 2:3] / 2
+        box2_y1 = boxes_labels[..., 1:2] - boxes_labels[..., 3:4] / 2
+        box2_x2 = boxes_labels[..., 0:1] + boxes_labels[..., 2:3] / 2
+        box2_y2 = boxes_labels[..., 1:2] + boxes_labels[..., 3:4] / 2
+    if box_format == "corners":
+        box1_x1 = boxes_preds[..., 0:1]
+        box1_y1 = boxes_preds[..., 1:2]
+        box1_x2 = boxes_preds[..., 2:3]
+        box1_y2 = boxes_preds[..., 3:4]
+        box2_x1 = boxes_labels[..., 0:1]
+        box2_y1 = boxes_labels[..., 1:2]
+        box2_x2 = boxes_labels[..., 2:3]
+        box2_y2 = boxes_labels[..., 3:4]
+    x1 = torch.max(box1_x1, box2_x1)
+    y1 = torch.max(box1_y1, box2_y1)
+    x2 = torch.min(box1_x2, box2_x2)
+    y2 = torch.min(box1_y2, box2_y2)
+    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
+    box1_area = abs((box1_x2 - box1_x1) * (box1_y2 - box1_y1))
+    box2_area = abs((box2_x2 - box2_x1) * (box2_y2 - box2_y1))
+    return intersection / (box1_area + box2_area - intersection + 1e-6)
+def non_max_suppression(bboxes, iou_threshold, threshold, box_format="corners"):
+    """
+    Video explanation of this function:
+    https://youtu.be/YDkjWEN8jNA
+    Does Non Max Suppression given bboxes
+    Parameters:
+        bboxes (list): list of lists containing all bboxes with each bboxes
+        specified as [class_pred, prob_score, x1, y1, x2, y2]
+        iou_threshold (float): threshold where predicted bboxes is correct
+        threshold (float): threshold to remove predicted bboxes (independent of IoU)
+        box_format (str): "midpoint" or "corners" used to specify bboxes
+    Returns:
+        list: bboxes after performing NMS given a specific IoU threshold
+    """
+    assert type(bboxes) == list
+    bboxes = [box for box in bboxes if box[1] > threshold]
+    bboxes = sorted(bboxes, key=lambda x: x[1], reverse=True)
+    bboxes_after_nms = []
+    while bboxes:
+        chosen_box = bboxes.pop(0)
+        bboxes = [
+            box
+            for box in bboxes
+            if box[0] != chosen_box[0]
+            or intersection_over_union(
+                torch.tensor(chosen_box[2:]),
+                torch.tensor(box[2:]),
+                box_format=box_format,
+            )
+            < iou_threshold
+        ]
+        bboxes_after_nms.append(chosen_box)
+    return bboxes_after_nms
+def mean_average_precision(
+    pred_boxes, true_boxes, iou_threshold=0.5, box_format="midpoint", num_classes=20
+):
+    """
+    Video explanation of this function:
+    https://youtu.be/FppOzcDvaDI
+    This function calculates mean average precision (mAP)
+    Parameters:
+        pred_boxes (list): list of lists containing all bboxes with each bboxes
+        specified as [train_idx, class_prediction, prob_score, x1, y1, x2, y2]
+        true_boxes (list): Similar as pred_boxes except all the correct ones
+        iou_threshold (float): threshold where predicted bboxes is correct
+        box_format (str): "midpoint" or "corners" used to specify bboxes
+        num_classes (int): number of classes
+    Returns:
+        float: mAP value across all classes given a specific IoU threshold
+    """
+    # list storing all AP for respective classes
+    average_precisions = []
+    # used for numerical stability later on
+    epsilon = 1e-6
+    for c in range(num_classes):
+        detections = []
+        ground_truths = []
+        # Go through all predictions and targets,
+        # and only add the ones that belong to the
+        # current class c
+        for detection in pred_boxes:
+            if detection[1] == c:
+                detections.append(detection)
+        for true_box in true_boxes:
+            if true_box[1] == c:
+                ground_truths.append(true_box)
+        # find the amount of bboxes for each training example
+        # Counter here finds how many ground truth bboxes we get
+        # for each training example, so let's say img 0 has 3,
+        # img 1 has 5 then we will obtain a dictionary with:
+        # amount_bboxes = {0:3, 1:5}
+        amount_bboxes = Counter([gt[0] for gt in ground_truths])
+        # We then go through each key, val in this dictionary
+        # and convert to the following (w.r.t same example):
+        # ammount_bboxes = {0:torch.tensor[0,0,0], 1:torch.tensor[0,0,0,0,0]}
+        for key, val in amount_bboxes.items():
+            amount_bboxes[key] = torch.zeros(val)
+        # sort by box probabilities which is index 2
+        detections.sort(key=lambda x: x[2], reverse=True)
+        TP = torch.zeros((len(detections)))
+        FP = torch.zeros((len(detections)))
+        total_true_bboxes = len(ground_truths)
+        # If none exists for this class then we can safely skip
+        if total_true_bboxes == 0:
+            continue
+        for detection_idx, detection in enumerate(detections):
+            # Only take out the ground_truths that have the same
+            # training idx as detection
+            ground_truth_img = [
+                bbox for bbox in ground_truths if bbox[0] == detection[0]
+            ]
+            num_gts = len(ground_truth_img)
+            best_iou = 0
+            for idx, gt in enumerate(ground_truth_img):
+                iou = intersection_over_union(
+                    torch.tensor(detection[3:]),
+                    torch.tensor(gt[3:]),
+                    box_format=box_format,
+                )
+                if iou > best_iou:
+                    best_iou = iou
+                    best_gt_idx = idx
+            if best_iou > iou_threshold:
+                # only detect ground truth detection once
+                if amount_bboxes[detection[0]][best_gt_idx] == 0:
+                    # true positive and add this bounding box to seen
+                    TP[detection_idx] = 1
+                    amount_bboxes[detection[0]][best_gt_idx] = 1
+                else:
+                    FP[detection_idx] = 1
+            # if IOU is lower then the detection is a false positive
+            else:
+                FP[detection_idx] = 1
+        TP_cumsum = torch.cumsum(TP, dim=0)
+        FP_cumsum = torch.cumsum(FP, dim=0)
+        recalls = TP_cumsum / (total_true_bboxes + epsilon)
+        precisions = TP_cumsum / (TP_cumsum + FP_cumsum + epsilon)
+        precisions = torch.cat((torch.tensor([1]), precisions))
+        recalls = torch.cat((torch.tensor([0]), recalls))
+        # torch.trapz for numerical integration
+        average_precisions.append(torch.trapz(precisions, recalls))
+    return sum(average_precisions) / len(average_precisions)
+def plot_image(image, boxes):
+    """Plots predicted bounding boxes on the image"""
+    cmap = plt.get_cmap("tab20b")
+    class_labels = (
+        config.COCO_LABELS if config.DATASET == "COCO" else config.PASCAL_CLASSES
+    )
+    colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
+    im = np.array(image)
+    height, width, _ = im.shape
+    # Create figure and axes
+    fig, ax = plt.subplots(1)
+    # Display the image
+    ax.imshow(im)
+    # box[0] is x midpoint, box[2] is width
+    # box[1] is y midpoint, box[3] is height
+    # Create a Rectangle patch
+    for box in boxes:
+        assert (
+            len(box) == 6
+        ), "box should contain class pred, confidence, x, y, width, height"
+        class_pred = box[0]
+        box = box[2:]
+        upper_left_x = box[0] - box[2] / 2
+        upper_left_y = box[1] - box[3] / 2
+        rect = patches.Rectangle(
+            (upper_left_x * width, upper_left_y * height),
+            box[2] * width,
+            box[3] * height,
+            linewidth=2,
+            edgecolor=colors[int(class_pred)],
+            facecolor="none",
+        )
+        # Add the patch to the Axes
+        ax.add_patch(rect)
+        plt.text(
+            upper_left_x * width,
+            upper_left_y * height,
+            s=class_labels[int(class_pred)],
+            color="white",
+            verticalalignment="top",
+            bbox={"color": colors[int(class_pred)], "pad": 0},
+        )
+    plt.show()
+def get_evaluation_bboxes(
+    loader,
+    model,
+    iou_threshold,
+    anchors,
+    threshold,
+    box_format="midpoint",
+    device="cuda",
+):
+    # make sure model is in eval before get bboxes
+    model.eval()
+    train_idx = 0
+    all_pred_boxes = []
+    all_true_boxes = []
+    for batch_idx, (x, labels) in enumerate(tqdm(loader)):
+        x = x.to(device)
+        with torch.no_grad():
+            predictions = model(x)
+        batch_size = x.shape[0]
+        bboxes = [[] for _ in range(batch_size)]
+        for i in range(3):
+            S = predictions[i].shape[2]
+            anchor = torch.tensor([*anchors[i]]).to(device) * S
+            boxes_scale_i = cells_to_bboxes(predictions[i], anchor, S=S, is_preds=True)
+            for idx, (box) in enumerate(boxes_scale_i):
+                bboxes[idx] += box
+        # we just want one bbox for each label, not one for each scale
+        true_bboxes = cells_to_bboxes(labels[2], anchor, S=S, is_preds=False)
+        for idx in range(batch_size):
+            nms_boxes = non_max_suppression(
+                bboxes[idx],
+                iou_threshold=iou_threshold,
+                threshold=threshold,
+                box_format=box_format,
+            )
+            for nms_box in nms_boxes:
+                all_pred_boxes.append([train_idx] + nms_box)
+            for box in true_bboxes[idx]:
+                if box[1] > threshold:
+                    all_true_boxes.append([train_idx] + box)
+            train_idx += 1
+    model.train()
+    return all_pred_boxes, all_true_boxes
+def cells_to_bboxes(predictions, anchors, S, is_preds=True):
+    """
+    Scales the predictions coming from the model to
+    be relative to the entire image such that they for example later
+    can be plotted or.
+    INPUT:
+    predictions: tensor of size (N, 3, S, S, num_classes+5)
+    anchors: the anchors used for the predictions
+    S: the number of cells the image is divided in on the width (and height)
+    is_preds: whether the input is predictions or the true bounding boxes
+    OUTPUT:
+    converted_bboxes: the converted boxes of sizes (N, num_anchors, S, S, 1+5) with class index,
+                      object score, bounding box coordinates
+    """
+    BATCH_SIZE = predictions.shape[0]
+    num_anchors = len(anchors)
+    box_predictions = predictions[..., 1:5]
+    if is_preds:
+        anchors = anchors.reshape(1, len(anchors), 1, 1, 2)
+        box_predictions[..., 0:2] = torch.sigmoid(box_predictions[..., 0:2])
+        box_predictions[..., 2:] = torch.exp(box_predictions[..., 2:]) * anchors
+        scores = torch.sigmoid(predictions[..., 0:1])
+        best_class = torch.argmax(predictions[..., 5:], dim=-1).unsqueeze(-1)
+    else:
+        scores = predictions[..., 0:1]
+        best_class = predictions[..., 5:6]
+    cell_indices = (
+        torch.arange(S)
+        .repeat(predictions.shape[0], 3, S, 1)
+        .unsqueeze(-1)
+        .to(predictions.device)
+    )
+    x = 1 / S * (box_predictions[..., 0:1] + cell_indices)
+    y = 1 / S * (box_predictions[..., 1:2] + cell_indices.permute(0, 1, 3, 2, 4))
+    w_h = 1 / S * box_predictions[..., 2:4]
+    converted_bboxes = torch.cat((best_class, scores, x, y, w_h), dim=-1).reshape(
+        BATCH_SIZE, num_anchors * S * S, 6
+    )
+    return converted_bboxes.tolist()
+def check_class_accuracy(model, loader, threshold):
+    model.eval()
+    tot_class_preds, correct_class = 0, 0
+    tot_noobj, correct_noobj = 0, 0
+    tot_obj, correct_obj = 0, 0
+    for idx, (x, y) in enumerate(tqdm(loader)):
+        x = x.to(config.DEVICE)
+        with torch.no_grad():
+            out = model(x)
+        for i in range(3):
+            y[i] = y[i].to(config.DEVICE)
+            obj = y[i][..., 0] == 1  # in paper this is Iobj_i
+            noobj = y[i][..., 0] == 0  # in paper this is Iobj_i
+            correct_class += torch.sum(
+                torch.argmax(out[i][..., 5:][obj], dim=-1) == y[i][..., 5][obj]
+            )
+            tot_class_preds += torch.sum(obj)
+            obj_preds = torch.sigmoid(out[i][..., 0]) > threshold
+            correct_obj += torch.sum(obj_preds[obj] == y[i][..., 0][obj])
+            tot_obj += torch.sum(obj)
+            correct_noobj += torch.sum(obj_preds[noobj] == y[i][..., 0][noobj])
+            tot_noobj += torch.sum(noobj)
+    class_acc = (correct_class / (tot_class_preds + 1e-16)) * 100
+    noobj_acc = (correct_noobj / (tot_noobj + 1e-16)) * 100
+    obj_acc = (correct_obj / (tot_obj + 1e-16)) * 100
+    print(f"Class accuracy is: {class_acc:2f}%")
+    print(f"No obj accuracy is: {noobj_acc:2f}%")
+    print(f"Obj accuracy is: {obj_acc:2f}%")
+    model.train()
+    return class_acc, noobj_acc, obj_acc
+def get_mean_std(loader):
+    # var[X] = E[X**2] - E[X]**2
+    channels_sum, channels_sqrd_sum, num_batches = 0, 0, 0
+    for data, _ in tqdm(loader):
+        channels_sum += torch.mean(data, dim=[0, 2, 3])
+        channels_sqrd_sum += torch.mean(data**2, dim=[0, 2, 3])
+        num_batches += 1
+    mean = channels_sum / num_batches
+    std = (channels_sqrd_sum / num_batches - mean**2) ** 0.5
+    return mean, std
+def save_checkpoint(model, optimizer, filename="my_checkpoint.pth.tar"):
+    print("=> Saving checkpoint")
+    checkpoint = {
+        "state_dict": model.state_dict(),
+        "optimizer": optimizer.state_dict(),
+    }
+    torch.save(checkpoint, filename)
+def load_checkpoint(checkpoint_file, model, optimizer, lr):
+    print("=> Loading checkpoint")
+    checkpoint = torch.load(checkpoint_file, map_location=config.DEVICE)
+    model.load_state_dict(checkpoint["state_dict"])
+    optimizer.load_state_dict(checkpoint["optimizer"])
+    # If we don't do this then it will just have learning rate of old checkpoint
+    # and it will lead to many hours of debugging \:
+    for param_group in optimizer.param_groups:
+        param_group["lr"] = lr
+def get_loaders(train_csv_path, test_csv_path):
+    from dataset import YOLODataset
+    IMAGE_SIZE = config.IMAGE_SIZE
+    train_dataset = YOLODataset(
+        train_csv_path,
+        transform=config.train_transforms,
+        S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
+        img_dir=config.IMG_DIR,
+        label_dir=config.LABEL_DIR,
+        anchors=config.ANCHORS,
+    )
+    test_dataset = YOLODataset(
+        test_csv_path,
+        transform=config.test_transforms,
+        S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
+        img_dir=config.IMG_DIR,
+        label_dir=config.LABEL_DIR,
+        anchors=config.ANCHORS,
+    )
+    train_loader = DataLoader(
+        dataset=train_dataset,
+        batch_size=config.BATCH_SIZE,
+        num_workers=config.NUM_WORKERS,
+        pin_memory=config.PIN_MEMORY,
+        shuffle=True,
+        drop_last=False,
+    )
+    test_loader = DataLoader(
+        dataset=test_dataset,
+        batch_size=config.BATCH_SIZE,
+        num_workers=config.NUM_WORKERS,
+        pin_memory=config.PIN_MEMORY,
+        shuffle=False,
+        drop_last=False,
+    )
+    train_eval_dataset = YOLODataset(
+        train_csv_path,
+        transform=config.test_transforms,
+        S=[IMAGE_SIZE // 32, IMAGE_SIZE // 16, IMAGE_SIZE // 8],
+        img_dir=config.IMG_DIR,
+        label_dir=config.LABEL_DIR,
+        anchors=config.ANCHORS,
+    )
+    train_eval_loader = DataLoader(
+        dataset=train_eval_dataset,
+        batch_size=config.BATCH_SIZE,
+        num_workers=config.NUM_WORKERS,
+        pin_memory=config.PIN_MEMORY,
+        shuffle=False,
+        drop_last=False,
+    )
+    return train_loader, test_loader, train_eval_loader
+def plot_couple_examples(model, loader, thresh, iou_thresh, anchors):
+    model.eval()
+    x, y = next(iter(loader))
+    x = x.to("cuda")
+    with torch.no_grad():
+        out = model(x)
+        bboxes = [[] for _ in range(x.shape[0])]
+        for i in range(3):
+            batch_size, A, S, _, _ = out[i].shape
+            anchor = anchors[i]
+            boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
+            for idx, (box) in enumerate(boxes_scale_i):
+                bboxes[idx] += box
+        model.train()
+    for i in range(batch_size // 4):
+        nms_boxes = non_max_suppression(
+            bboxes[i],
+            iou_threshold=iou_thresh,
+            threshold=thresh,
+            box_format="midpoint",
+        )
+        plot_image(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes)
+def seed_everything(seed=42):
+    os.environ["PYTHONHASHSEED"] = str(seed)
+    random.seed(seed)
+    np.random.seed(seed)
+    torch.manual_seed(seed)
+    torch.cuda.manual_seed(seed)
+    torch.cuda.manual_seed_all(seed)
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+def clip_coords(boxes, img_shape):
+    # Clip bounding xyxy bounding boxes to image shape (height, width)
+    boxes[:, 0].clamp_(0, img_shape[1])  # x1
+    boxes[:, 1].clamp_(0, img_shape[0])  # y1
+    boxes[:, 2].clamp_(0, img_shape[1])  # x2
+    boxes[:, 3].clamp_(0, img_shape[0])  # y2
+def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
+    # Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw  # top left x
+    y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh  # top left y
+    y[..., 2] = w * (x[..., 0] + x[..., 2] / 2) + padw  # bottom right x
+    y[..., 3] = h * (x[..., 1] + x[..., 3] / 2) + padh  # bottom right y
+    return y
+def xyn2xy(x, w=640, h=640, padw=0, padh=0):
+    # Convert normalized segments into pixel segments, shape (n,2)
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[..., 0] = w * x[..., 0] + padw  # top left x
+    y[..., 1] = h * x[..., 1] + padh  # top left y
+    return y
+def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
+    # Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
+    if clip:
+        clip_boxes(x, (h - eps, w - eps))  # warning: inplace clip
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[..., 0] = ((x[..., 0] + x[..., 2]) / 2) / w  # x center
+    y[..., 1] = ((x[..., 1] + x[..., 3]) / 2) / h  # y center
+    y[..., 2] = (x[..., 2] - x[..., 0]) / w  # width
+    y[..., 3] = (x[..., 3] - x[..., 1]) / h  # height
+    return y
+def clip_boxes(boxes, shape):
+    # Clip boxes (xyxy) to image shape (height, width)
+    if isinstance(boxes, torch.Tensor):  # faster individually
+        boxes[..., 0].clamp_(0, shape[1])  # x1
+        boxes[..., 1].clamp_(0, shape[0])  # y1
+        boxes[..., 2].clamp_(0, shape[1])  # x2
+        boxes[..., 3].clamp_(0, shape[0])  # y2
+    else:  # np.array (faster grouped)
+        boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1])  # x1, x2
+        boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0])  # y1, y2
+def save_result(image, boxes, index):
+    """Plots predicted bounding boxes on the image"""
+    cmap = plt.get_cmap("tab20b")
+    class_labels = config.PASCAL_CLASSES
+    colors = [cmap(i) for i in np.linspace(0, 1, len(class_labels))]
+    im = np.array(image)
+    height, width, _ = im.shape
+    # Create figure and axes
+    fig, ax = plt.subplots(1)
+    # Display the image
+    ax.imshow(im)
+    # box[0] is x midpoint, box[2] is width
+    # box[1] is y midpoint, box[3] is height
+    # Create a Rectangle patch
+    for box in boxes:
+        assert (
+            len(box) == 6
+        ), "box should contain class pred, confidence, x, y, width, height"
+        class_pred = box[0]
+        box = box[2:]
+        upper_left_x = box[0] - box[2] / 2
+        upper_left_y = box[1] - box[3] / 2
+        rect = patches.Rectangle(
+            (upper_left_x * width, upper_left_y * height),
+            box[2] * width,
+            box[3] * height,
+            linewidth=2,
+            edgecolor=colors[int(class_pred)],
+            facecolor="none",
+        )
+        # Add the patch to the Axes
+        ax.add_patch(rect)
+        plt.text(
+            upper_left_x * width,
+            upper_left_y * height,
+            s=class_labels[int(class_pred)],
+            color="white",
+            verticalalignment="top",
+            bbox={"color": colors[int(class_pred)], "pad": 0},
+        )
+    ax.grid(False)
+    ax.set_axis_off()
+    plt.savefig(f"output/img{index}.png")
+def generate_result(model, data, thresh, iou_thresh, anchors):
+    model.eval()
+    x = data
+    # x = x.to("cuda")
+    with torch.no_grad():
+        out = model(x)
+        bboxes = [[] for _ in range(x.shape[0])]
+        for i in range(3):
+            batch_size, A, S, _, _ = out[i].shape
+            anchor = anchors[i]
+            boxes_scale_i = cells_to_bboxes(out[i], anchor, S=S, is_preds=True)
+            for idx, (box) in enumerate(boxes_scale_i):
+                bboxes[idx] += box
+    for i in range(batch_size):
+        nms_boxes = non_max_suppression(
+            bboxes[i],
+            iou_threshold=iou_thresh,
+            threshold=thresh,
+            box_format="midpoint",
+        )
+        save_result(x[i].permute(1, 2, 0).detach().cpu(), nms_boxes, i)