Spaces:

Dnau15
/

Lama

Runtime error

App Files Files Community

Dnau15 commited on Nov 26, 2024

Commit

5fb1835

1 Parent(s): e490316

add saicinpainting

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

saicinpainting/__init__.py +0 -0
saicinpainting/__pycache__/__init__.cpython-38.pyc +0 -0
saicinpainting/__pycache__/__init__.cpython-39.pyc +0 -0
saicinpainting/__pycache__/utils.cpython-39.pyc +0 -0
saicinpainting/evaluation/__init__.py +33 -0
saicinpainting/evaluation/__pycache__/__init__.cpython-38.pyc +0 -0
saicinpainting/evaluation/__pycache__/__init__.cpython-39.pyc +0 -0
saicinpainting/evaluation/__pycache__/data.cpython-39.pyc +0 -0
saicinpainting/evaluation/__pycache__/evaluator.cpython-39.pyc +0 -0
saicinpainting/evaluation/__pycache__/refinement.cpython-39.pyc +0 -0
saicinpainting/evaluation/__pycache__/utils.cpython-39.pyc +0 -0
saicinpainting/evaluation/data.py +168 -0
saicinpainting/evaluation/evaluator.py +220 -0
saicinpainting/evaluation/losses/__init__.py +0 -0
saicinpainting/evaluation/losses/__pycache__/__init__.cpython-39.pyc +0 -0
saicinpainting/evaluation/losses/__pycache__/base_loss.cpython-39.pyc +0 -0
saicinpainting/evaluation/losses/__pycache__/lpips.cpython-39.pyc +0 -0
saicinpainting/evaluation/losses/__pycache__/ssim.cpython-39.pyc +0 -0
saicinpainting/evaluation/losses/base_loss.py +528 -0
saicinpainting/evaluation/losses/fid/__init__.py +0 -0
saicinpainting/evaluation/losses/fid/__pycache__/__init__.cpython-39.pyc +0 -0
saicinpainting/evaluation/losses/fid/__pycache__/inception.cpython-39.pyc +0 -0
saicinpainting/evaluation/losses/fid/fid_score.py +328 -0
saicinpainting/evaluation/losses/fid/inception.py +323 -0
saicinpainting/evaluation/losses/lpips.py +891 -0
saicinpainting/evaluation/losses/ssim.py +74 -0
saicinpainting/evaluation/masks/README.md +27 -0
saicinpainting/evaluation/masks/__init__.py +0 -0
saicinpainting/evaluation/masks/__pycache__/__init__.cpython-39.pyc +0 -0
saicinpainting/evaluation/masks/__pycache__/mask.cpython-39.pyc +0 -0
saicinpainting/evaluation/masks/countless/.gitignore +1 -0
saicinpainting/evaluation/masks/countless/README.md +25 -0
saicinpainting/evaluation/masks/countless/__init__.py +0 -0
saicinpainting/evaluation/masks/countless/__pycache__/__init__.cpython-39.pyc +0 -0
saicinpainting/evaluation/masks/countless/__pycache__/countless2d.cpython-39.pyc +0 -0
saicinpainting/evaluation/masks/countless/countless2d.py +529 -0
saicinpainting/evaluation/masks/countless/countless3d.py +356 -0
saicinpainting/evaluation/masks/countless/requirements.txt +7 -0
saicinpainting/evaluation/masks/countless/test.py +195 -0
saicinpainting/evaluation/masks/mask.py +429 -0
saicinpainting/evaluation/refinement.py +314 -0
saicinpainting/evaluation/utils.py +28 -0
saicinpainting/evaluation/vis.py +37 -0
saicinpainting/training/__init__.py +0 -0
saicinpainting/training/__pycache__/__init__.cpython-39.pyc +0 -0
saicinpainting/training/data/__init__.py +0 -0
saicinpainting/training/data/__pycache__/__init__.cpython-39.pyc +0 -0
saicinpainting/training/data/__pycache__/aug.cpython-39.pyc +0 -0
saicinpainting/training/data/__pycache__/datasets.cpython-39.pyc +0 -0
saicinpainting/training/data/__pycache__/masks.cpython-39.pyc +0 -0

saicinpainting/__init__.py ADDED Viewed

File without changes

saicinpainting/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (143 Bytes). View file

saicinpainting/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (143 Bytes). View file

saicinpainting/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (6.07 kB). View file

saicinpainting/evaluation/__init__.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import logging
+import torch
+from saicinpainting.evaluation.evaluator import InpaintingEvaluatorOnline, ssim_fid100_f1, lpips_fid100_f1
+from saicinpainting.evaluation.losses.base_loss import SSIMScore, LPIPSScore, FIDScore
+def make_evaluator(kind='default', ssim=True, lpips=True, fid=True, integral_kind=None, **kwargs):
+    logging.info(f'Make evaluator {kind}')
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    metrics = {}
+    if ssim:
+        metrics['ssim'] = SSIMScore()
+    if lpips:
+        metrics['lpips'] = LPIPSScore()
+    if fid:
+        metrics['fid'] = FIDScore().to(device)
+    if integral_kind is None:
+        integral_func = None
+    elif integral_kind == 'ssim_fid100_f1':
+        integral_func = ssim_fid100_f1
+    elif integral_kind == 'lpips_fid100_f1':
+        integral_func = lpips_fid100_f1
+    else:
+        raise ValueError(f'Unexpected integral_kind={integral_kind}')
+    if kind == 'default':
+        return InpaintingEvaluatorOnline(scores=metrics,
+                                         integral_func=integral_func,
+                                         integral_title=integral_kind,
+                                         **kwargs)

saicinpainting/evaluation/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (1.06 kB). View file

saicinpainting/evaluation/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (1.06 kB). View file

saicinpainting/evaluation/__pycache__/data.cpython-39.pyc ADDED Viewed

Binary file (7.26 kB). View file

saicinpainting/evaluation/__pycache__/evaluator.cpython-39.pyc ADDED Viewed

Binary file (7.95 kB). View file

saicinpainting/evaluation/__pycache__/refinement.cpython-39.pyc ADDED Viewed

Binary file (9.64 kB). View file

saicinpainting/evaluation/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (1.36 kB). View file

saicinpainting/evaluation/data.py ADDED Viewed

	@@ -0,0 +1,168 @@

+import glob
+import os
+import cv2
+import PIL.Image as Image
+import numpy as np
+from torch.utils.data import Dataset
+import torch.nn.functional as F
+def load_image(fname, mode='RGB', return_orig=False):
+    img = np.array(Image.open(fname).convert(mode))
+    if img.ndim == 3:
+        img = np.transpose(img, (2, 0, 1))
+    out_img = img.astype('float32') / 255
+    if return_orig:
+        return out_img, img
+    else:
+        return out_img
+def ceil_modulo(x, mod):
+    if x % mod == 0:
+        return x
+    return (x // mod + 1) * mod
+def pad_img_to_modulo(img, mod):
+    channels, height, width = img.shape
+    out_height = ceil_modulo(height, mod)
+    out_width = ceil_modulo(width, mod)
+    return np.pad(img, ((0, 0), (0, out_height - height), (0, out_width - width)), mode='symmetric')
+def pad_tensor_to_modulo(img, mod):
+    batch_size, channels, height, width = img.shape
+    out_height = ceil_modulo(height, mod)
+    out_width = ceil_modulo(width, mod)
+    return F.pad(img, pad=(0, out_width - width, 0, out_height - height), mode='reflect')
+def scale_image(img, factor, interpolation=cv2.INTER_AREA):
+    if img.shape[0] == 1:
+        img = img[0]
+    else:
+        img = np.transpose(img, (1, 2, 0))
+    img = cv2.resize(img, dsize=None, fx=factor, fy=factor, interpolation=interpolation)
+    if img.ndim == 2:
+        img = img[None, ...]
+    else:
+        img = np.transpose(img, (2, 0, 1))
+    return img
+class InpaintingDataset(Dataset):
+    def __init__(self, datadir, img_suffix='.jpg', pad_out_to_modulo=None, scale_factor=None):
+        self.datadir = datadir
+        self.mask_filenames = sorted(list(glob.glob(os.path.join(self.datadir, '**', '*mask*.png'), recursive=True)))
+        self.img_filenames = [fname.rsplit('_mask', 1)[0] + img_suffix for fname in self.mask_filenames]
+        self.pad_out_to_modulo = pad_out_to_modulo
+        self.scale_factor = scale_factor
+    def __len__(self):
+        return len(self.mask_filenames)
+    def __getitem__(self, i):
+        image = load_image(self.img_filenames[i], mode='RGB')
+        mask = load_image(self.mask_filenames[i], mode='L')
+        result = dict(image=image, mask=mask[None, ...])
+        if self.scale_factor is not None:
+            result['image'] = scale_image(result['image'], self.scale_factor)
+            result['mask'] = scale_image(result['mask'], self.scale_factor, interpolation=cv2.INTER_NEAREST)
+        if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1:
+            result['unpad_to_size'] = result['image'].shape[1:]
+            result['image'] = pad_img_to_modulo(result['image'], self.pad_out_to_modulo)
+            result['mask'] = pad_img_to_modulo(result['mask'], self.pad_out_to_modulo)
+        return result
+class OurInpaintingDataset(Dataset):
+    def __init__(self, datadir, img_suffix='.jpg', pad_out_to_modulo=None, scale_factor=None):
+        self.datadir = datadir
+        self.mask_filenames = sorted(list(glob.glob(os.path.join(self.datadir, 'mask', '**', '*mask*.png'), recursive=True)))
+        self.img_filenames = [os.path.join(self.datadir, 'img', os.path.basename(fname.rsplit('-', 1)[0].rsplit('_', 1)[0]) + '.png') for fname in self.mask_filenames]
+        self.pad_out_to_modulo = pad_out_to_modulo
+        self.scale_factor = scale_factor
+    def __len__(self):
+        return len(self.mask_filenames)
+    def __getitem__(self, i):
+        result = dict(image=load_image(self.img_filenames[i], mode='RGB'),
+                      mask=load_image(self.mask_filenames[i], mode='L')[None, ...])
+        if self.scale_factor is not None:
+            result['image'] = scale_image(result['image'], self.scale_factor)
+            result['mask'] = scale_image(result['mask'], self.scale_factor)
+        if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1:
+            result['image'] = pad_img_to_modulo(result['image'], self.pad_out_to_modulo)
+            result['mask'] = pad_img_to_modulo(result['mask'], self.pad_out_to_modulo)
+        return result
+class PrecomputedInpaintingResultsDataset(InpaintingDataset):
+    def __init__(self, datadir, predictdir, inpainted_suffix='_inpainted.jpg', **kwargs):
+        super().__init__(datadir, **kwargs)
+        if not datadir.endswith('/'):
+            datadir += '/'
+        self.predictdir = predictdir
+        self.pred_filenames = [os.path.join(predictdir, os.path.splitext(fname[len(datadir):])[0] + inpainted_suffix)
+                               for fname in self.mask_filenames]
+    def __getitem__(self, i):
+        result = super().__getitem__(i)
+        result['inpainted'] = load_image(self.pred_filenames[i])
+        if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1:
+            result['inpainted'] = pad_img_to_modulo(result['inpainted'], self.pad_out_to_modulo)
+        return result
+class OurPrecomputedInpaintingResultsDataset(OurInpaintingDataset):
+    def __init__(self, datadir, predictdir, inpainted_suffix="png", **kwargs):
+        super().__init__(datadir, **kwargs)
+        if not datadir.endswith('/'):
+            datadir += '/'
+        self.predictdir = predictdir
+        self.pred_filenames = [os.path.join(predictdir, os.path.basename(os.path.splitext(fname)[0]) + f'_inpainted.{inpainted_suffix}')
+                               for fname in self.mask_filenames]
+        # self.pred_filenames = [os.path.join(predictdir, os.path.splitext(fname[len(datadir):])[0] + inpainted_suffix)
+        #                        for fname in self.mask_filenames]
+    def __getitem__(self, i):
+        result = super().__getitem__(i)
+        result['inpainted'] = self.file_loader(self.pred_filenames[i])
+        if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1:
+            result['inpainted'] = pad_img_to_modulo(result['inpainted'], self.pad_out_to_modulo)
+        return result
+class InpaintingEvalOnlineDataset(Dataset):
+    def __init__(self, indir, mask_generator, img_suffix='.jpg', pad_out_to_modulo=None, scale_factor=None,  **kwargs):
+        self.indir = indir
+        self.mask_generator = mask_generator
+        self.img_filenames = sorted(list(glob.glob(os.path.join(self.indir, '**', f'*{img_suffix}' ), recursive=True)))
+        self.pad_out_to_modulo = pad_out_to_modulo
+        self.scale_factor = scale_factor
+    def __len__(self):
+        return len(self.img_filenames)
+    def __getitem__(self, i):
+        img, raw_image = load_image(self.img_filenames[i], mode='RGB', return_orig=True)
+        mask = self.mask_generator(img, raw_image=raw_image)
+        result = dict(image=img, mask=mask)
+        if self.scale_factor is not None:
+            result['image'] = scale_image(result['image'], self.scale_factor)
+            result['mask'] = scale_image(result['mask'], self.scale_factor, interpolation=cv2.INTER_NEAREST)
+        if self.pad_out_to_modulo is not None and self.pad_out_to_modulo > 1:
+            result['image'] = pad_img_to_modulo(result['image'], self.pad_out_to_modulo)
+            result['mask'] = pad_img_to_modulo(result['mask'], self.pad_out_to_modulo)
+        return result

saicinpainting/evaluation/evaluator.py ADDED Viewed

	@@ -0,0 +1,220 @@

+import logging
+import math
+from typing import Dict
+import numpy as np
+import torch
+import torch.nn as nn
+import tqdm
+from torch.utils.data import DataLoader
+from saicinpainting.evaluation.utils import move_to_device
+LOGGER = logging.getLogger(__name__)
+class InpaintingEvaluator():
+    def __init__(self, dataset, scores, area_grouping=True, bins=10, batch_size=32, device='cuda',
+                 integral_func=None, integral_title=None, clamp_image_range=None):
+        """
+        :param dataset: torch.utils.data.Dataset which contains images and masks
+        :param scores: dict {score_name: EvaluatorScore object}
+        :param area_grouping: in addition to the overall scores, allows to compute score for the groups of samples
+            which are defined by share of area occluded by mask
+        :param bins: number of groups, partition is generated by np.linspace(0., 1., bins + 1)
+        :param batch_size: batch_size for the dataloader
+        :param device: device to use
+        """
+        self.scores = scores
+        self.dataset = dataset
+        self.area_grouping = area_grouping
+        self.bins = bins
+        self.device = torch.device(device)
+        self.dataloader = DataLoader(self.dataset, shuffle=False, batch_size=batch_size)
+        self.integral_func = integral_func
+        self.integral_title = integral_title
+        self.clamp_image_range = clamp_image_range
+    def _get_bin_edges(self):
+        bin_edges = np.linspace(0, 1, self.bins + 1)
+        num_digits = max(0, math.ceil(math.log10(self.bins)) - 1)
+        interval_names = []
+        for idx_bin in range(self.bins):
+            start_percent, end_percent = round(100 * bin_edges[idx_bin], num_digits), \
+                                         round(100 * bin_edges[idx_bin + 1], num_digits)
+            start_percent = '{:.{n}f}'.format(start_percent, n=num_digits)
+            end_percent = '{:.{n}f}'.format(end_percent, n=num_digits)
+            interval_names.append("{0}-{1}%".format(start_percent, end_percent))
+        groups = []
+        for batch in self.dataloader:
+            mask = batch['mask']
+            batch_size = mask.shape[0]
+            area = mask.to(self.device).reshape(batch_size, -1).mean(dim=-1)
+            bin_indices = np.searchsorted(bin_edges, area.detach().cpu().numpy(), side='right') - 1
+            # corner case: when area is equal to 1, bin_indices should return bins - 1, not bins for that element
+            bin_indices[bin_indices == self.bins] = self.bins - 1
+            groups.append(bin_indices)
+        groups = np.hstack(groups)
+        return groups, interval_names
+    def evaluate(self, model=None):
+        """
+        :param model: callable with signature (image_batch, mask_batch); should return inpainted_batch
+        :return: dict with (score_name, group_type) as keys, where group_type can be either 'overall' or
+            name of the particular group arranged by area of mask (e.g. '10-20%')
+            and score statistics for the group as values.
+        """
+        results = dict()
+        if self.area_grouping:
+            groups, interval_names = self._get_bin_edges()
+        else:
+            groups = None
+        for score_name, score in tqdm.auto.tqdm(self.scores.items(), desc='scores'):
+            score.to(self.device)
+            with torch.no_grad():
+                score.reset()
+                for batch in tqdm.auto.tqdm(self.dataloader, desc=score_name, leave=False):
+                    batch = move_to_device(batch, self.device)
+                    image_batch, mask_batch = batch['image'], batch['mask']
+                    if self.clamp_image_range is not None:
+                        image_batch = torch.clamp(image_batch,
+                                                  min=self.clamp_image_range[0],
+                                                  max=self.clamp_image_range[1])
+                    if model is None:
+                        assert 'inpainted' in batch, \
+                            'Model is None, so we expected precomputed inpainting results at key "inpainted"'
+                        inpainted_batch = batch['inpainted']
+                    else:
+                        inpainted_batch = model(image_batch, mask_batch)
+                    score(inpainted_batch, image_batch, mask_batch)
+                total_results, group_results = score.get_value(groups=groups)
+            results[(score_name, 'total')] = total_results
+            if groups is not None:
+                for group_index, group_values in group_results.items():
+                    group_name = interval_names[group_index]
+                    results[(score_name, group_name)] = group_values
+        if self.integral_func is not None:
+            results[(self.integral_title, 'total')] = dict(mean=self.integral_func(results))
+        return results
+def ssim_fid100_f1(metrics, fid_scale=100):
+    ssim = metrics[('ssim', 'total')]['mean']
+    fid = metrics[('fid', 'total')]['mean']
+    fid_rel = max(0, fid_scale - fid) / fid_scale
+    f1 = 2 * ssim * fid_rel / (ssim + fid_rel + 1e-3)
+    return f1
+def lpips_fid100_f1(metrics, fid_scale=100):
+    neg_lpips = 1 - metrics[('lpips', 'total')]['mean']  # invert, so bigger is better
+    fid = metrics[('fid', 'total')]['mean']
+    fid_rel = max(0, fid_scale - fid) / fid_scale
+    f1 = 2 * neg_lpips * fid_rel / (neg_lpips + fid_rel + 1e-3)
+    return f1
+class InpaintingEvaluatorOnline(nn.Module):
+    def __init__(self, scores, bins=10, image_key='image', inpainted_key='inpainted',
+                 integral_func=None, integral_title=None, clamp_image_range=None):
+        """
+        :param scores: dict {score_name: EvaluatorScore object}
+        :param bins: number of groups, partition is generated by np.linspace(0., 1., bins + 1)
+        :param device: device to use
+        """
+        super().__init__()
+        LOGGER.info(f'{type(self)} init called')
+        self.scores = nn.ModuleDict(scores)
+        self.image_key = image_key
+        self.inpainted_key = inpainted_key
+        self.bins_num = bins
+        self.bin_edges = np.linspace(0, 1, self.bins_num + 1)
+        num_digits = max(0, math.ceil(math.log10(self.bins_num)) - 1)
+        self.interval_names = []
+        for idx_bin in range(self.bins_num):
+            start_percent, end_percent = round(100 * self.bin_edges[idx_bin], num_digits), \
+                                         round(100 * self.bin_edges[idx_bin + 1], num_digits)
+            start_percent = '{:.{n}f}'.format(start_percent, n=num_digits)
+            end_percent = '{:.{n}f}'.format(end_percent, n=num_digits)
+            self.interval_names.append("{0}-{1}%".format(start_percent, end_percent))
+        self.groups = []
+        self.integral_func = integral_func
+        self.integral_title = integral_title
+        self.clamp_image_range = clamp_image_range
+        LOGGER.info(f'{type(self)} init done')
+    def _get_bins(self, mask_batch):
+        batch_size = mask_batch.shape[0]
+        area = mask_batch.view(batch_size, -1).mean(dim=-1).detach().cpu().numpy()
+        bin_indices = np.clip(np.searchsorted(self.bin_edges, area) - 1, 0, self.bins_num - 1)
+        return bin_indices
+    def forward(self, batch: Dict[str, torch.Tensor]):
+        """
+        Calculate and accumulate metrics for batch. To finalize evaluation and obtain final metrics, call evaluation_end
+        :param batch: batch dict with mandatory fields mask, image, inpainted (can be overriden by self.inpainted_key)
+        """
+        result = {}
+        with torch.no_grad():
+            image_batch, mask_batch, inpainted_batch = batch[self.image_key], batch['mask'], batch[self.inpainted_key]
+            if self.clamp_image_range is not None:
+                image_batch = torch.clamp(image_batch,
+                                          min=self.clamp_image_range[0],
+                                          max=self.clamp_image_range[1])
+            self.groups.extend(self._get_bins(mask_batch))
+            for score_name, score in self.scores.items():
+                result[score_name] = score(inpainted_batch, image_batch, mask_batch)
+        return result
+    def process_batch(self, batch: Dict[str, torch.Tensor]):
+        return self(batch)
+    def evaluation_end(self, states=None):
+        """:return: dict with (score_name, group_type) as keys, where group_type can be either 'overall' or
+            name of the particular group arranged by area of mask (e.g. '10-20%')
+            and score statistics for the group as values.
+        """
+        LOGGER.info(f'{type(self)}: evaluation_end called')
+        self.groups = np.array(self.groups)
+        results = {}
+        for score_name, score in self.scores.items():
+            LOGGER.info(f'Getting value of {score_name}')
+            cur_states = [s[score_name] for s in states] if states is not None else None
+            total_results, group_results = score.get_value(groups=self.groups, states=cur_states)
+            LOGGER.info(f'Getting value of {score_name} done')
+            results[(score_name, 'total')] = total_results
+            for group_index, group_values in group_results.items():
+                group_name = self.interval_names[group_index]
+                results[(score_name, group_name)] = group_values
+        if self.integral_func is not None:
+            results[(self.integral_title, 'total')] = dict(mean=self.integral_func(results))
+        LOGGER.info(f'{type(self)}: reset scores')
+        self.groups = []
+        for sc in self.scores.values():
+            sc.reset()
+        LOGGER.info(f'{type(self)}: reset scores done')
+        LOGGER.info(f'{type(self)}: evaluation_end done')
+        return results

saicinpainting/evaluation/losses/__init__.py ADDED Viewed

File without changes

saicinpainting/evaluation/losses/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (161 Bytes). View file

saicinpainting/evaluation/losses/__pycache__/base_loss.cpython-39.pyc ADDED Viewed

Binary file (17.6 kB). View file

saicinpainting/evaluation/losses/__pycache__/lpips.cpython-39.pyc ADDED Viewed

Binary file (29.5 kB). View file

saicinpainting/evaluation/losses/__pycache__/ssim.cpython-39.pyc ADDED Viewed

Binary file (2.73 kB). View file

saicinpainting/evaluation/losses/base_loss.py ADDED Viewed

	@@ -0,0 +1,528 @@

+import logging
+from abc import abstractmethod, ABC
+import numpy as np
+import sklearn
+import sklearn.svm
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from joblib import Parallel, delayed
+from scipy import linalg
+from models.ade20k import SegmentationModule, NUM_CLASS, segm_options
+from .fid.inception import InceptionV3
+from .lpips import PerceptualLoss
+from .ssim import SSIM
+LOGGER = logging.getLogger(__name__)
+def get_groupings(groups):
+    """
+    :param groups: group numbers for respective elements
+    :return: dict of kind {group_idx: indices of the corresponding group elements}
+    """
+    label_groups, count_groups = np.unique(groups, return_counts=True)
+    indices = np.argsort(groups)
+    grouping = dict()
+    cur_start = 0
+    for label, count in zip(label_groups, count_groups):
+        cur_end = cur_start + count
+        cur_indices = indices[cur_start:cur_end]
+        grouping[label] = cur_indices
+        cur_start = cur_end
+    return grouping
+class EvaluatorScore(nn.Module):
+    @abstractmethod
+    def forward(self, pred_batch, target_batch, mask):
+        pass
+    @abstractmethod
+    def get_value(self, groups=None, states=None):
+        pass
+    @abstractmethod
+    def reset(self):
+        pass
+class PairwiseScore(EvaluatorScore, ABC):
+    def __init__(self):
+        super().__init__()
+        self.individual_values = None
+    def get_value(self, groups=None, states=None):
+        """
+        :param groups:
+        :return:
+            total_results: dict of kind {'mean': score mean, 'std': score std}
+            group_results: None, if groups is None;
+                else dict {group_idx: {'mean': score mean among group, 'std': score std among group}}
+        """
+        individual_values = torch.cat(states, dim=-1).reshape(-1).cpu().numpy() if states is not None \
+            else self.individual_values
+        total_results = {
+            'mean': individual_values.mean(),
+            'std': individual_values.std()
+        }
+        if groups is None:
+            return total_results, None
+        group_results = dict()
+        grouping = get_groupings(groups)
+        for label, index in grouping.items():
+            group_scores = individual_values[index]
+            group_results[label] = {
+                'mean': group_scores.mean(),
+                'std': group_scores.std()
+            }
+        return total_results, group_results
+    def reset(self):
+        self.individual_values = []
+class SSIMScore(PairwiseScore):
+    def __init__(self, window_size=11):
+        super().__init__()
+        self.score = SSIM(window_size=window_size, size_average=False).eval()
+        self.reset()
+    def forward(self, pred_batch, target_batch, mask=None):
+        batch_values = self.score(pred_batch, target_batch)
+        self.individual_values = np.hstack([
+            self.individual_values, batch_values.detach().cpu().numpy()
+        ])
+        return batch_values
+class LPIPSScore(PairwiseScore):
+    def __init__(self, model='net-lin', net='vgg', model_path=None, use_gpu=True):
+        super().__init__()
+        self.score = PerceptualLoss(model=model, net=net, model_path=model_path,
+                                    use_gpu=use_gpu, spatial=False).eval()
+        self.reset()
+    def forward(self, pred_batch, target_batch, mask=None):
+        batch_values = self.score(pred_batch, target_batch).flatten()
+        self.individual_values = np.hstack([
+            self.individual_values, batch_values.detach().cpu().numpy()
+        ])
+        return batch_values
+def fid_calculate_activation_statistics(act):
+    mu = np.mean(act, axis=0)
+    sigma = np.cov(act, rowvar=False)
+    return mu, sigma
+def calculate_frechet_distance(activations_pred, activations_target, eps=1e-6):
+    mu1, sigma1 = fid_calculate_activation_statistics(activations_pred)
+    mu2, sigma2 = fid_calculate_activation_statistics(activations_target)
+    diff = mu1 - mu2
+    # Product might be almost singular
+    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+    if not np.isfinite(covmean).all():
+        msg = ('fid calculation produces singular product; '
+               'adding %s to diagonal of cov estimates') % eps
+        LOGGER.warning(msg)
+        offset = np.eye(sigma1.shape[0]) * eps
+        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+    # Numerical error might give slight imaginary component
+    if np.iscomplexobj(covmean):
+        # if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-2):
+            m = np.max(np.abs(covmean.imag))
+            raise ValueError('Imaginary component {}'.format(m))
+        covmean = covmean.real
+    tr_covmean = np.trace(covmean)
+    return (diff.dot(diff) + np.trace(sigma1) +
+            np.trace(sigma2) - 2 * tr_covmean)
+class FIDScore(EvaluatorScore):
+    def __init__(self, dims=2048, eps=1e-6):
+        LOGGER.info("FIDscore init called")
+        super().__init__()
+        if getattr(FIDScore, '_MODEL', None) is None:
+            block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+            FIDScore._MODEL = InceptionV3([block_idx]).eval()
+        self.model = FIDScore._MODEL
+        self.eps = eps
+        self.reset()
+        LOGGER.info("FIDscore init done")
+    def forward(self, pred_batch, target_batch, mask=None):
+        activations_pred = self._get_activations(pred_batch)
+        activations_target = self._get_activations(target_batch)
+        self.activations_pred.append(activations_pred.detach().cpu())
+        self.activations_target.append(activations_target.detach().cpu())
+        return activations_pred, activations_target
+    def get_value(self, groups=None, states=None):
+        LOGGER.info("FIDscore get_value called")
+        activations_pred, activations_target = zip(*states) if states is not None \
+            else (self.activations_pred, self.activations_target)
+        activations_pred = torch.cat(activations_pred).cpu().numpy()
+        activations_target = torch.cat(activations_target).cpu().numpy()
+        total_distance = calculate_frechet_distance(activations_pred, activations_target, eps=self.eps)
+        total_results = dict(mean=total_distance)
+        if groups is None:
+            group_results = None
+        else:
+            group_results = dict()
+            grouping = get_groupings(groups)
+            for label, index in grouping.items():
+                if len(index) > 1:
+                    group_distance = calculate_frechet_distance(activations_pred[index], activations_target[index],
+                                                                eps=self.eps)
+                    group_results[label] = dict(mean=group_distance)
+                else:
+                    group_results[label] = dict(mean=float('nan'))
+        self.reset()
+        LOGGER.info("FIDscore get_value done")
+        return total_results, group_results
+    def reset(self):
+        self.activations_pred = []
+        self.activations_target = []
+    def _get_activations(self, batch):
+        activations = self.model(batch)[0]
+        if activations.shape[2] != 1 or activations.shape[3] != 1:
+            assert False, \
+                'We should not have got here, because Inception always scales inputs to 299x299'
+            # activations = F.adaptive_avg_pool2d(activations, output_size=(1, 1))
+        activations = activations.squeeze(-1).squeeze(-1)
+        return activations
+class SegmentationAwareScore(EvaluatorScore):
+    def __init__(self, weights_path):
+        super().__init__()
+        self.segm_network = SegmentationModule(weights_path=weights_path, use_default_normalization=True).eval()
+        self.target_class_freq_by_image_total = []
+        self.target_class_freq_by_image_mask = []
+        self.pred_class_freq_by_image_mask = []
+    def forward(self, pred_batch, target_batch, mask):
+        pred_segm_flat = self.segm_network.predict(pred_batch)[0].view(pred_batch.shape[0], -1).long().detach().cpu().numpy()
+        target_segm_flat = self.segm_network.predict(target_batch)[0].view(pred_batch.shape[0], -1).long().detach().cpu().numpy()
+        mask_flat = (mask.view(mask.shape[0], -1) > 0.5).detach().cpu().numpy()
+        batch_target_class_freq_total = []
+        batch_target_class_freq_mask = []
+        batch_pred_class_freq_mask = []
+        for cur_pred_segm, cur_target_segm, cur_mask in zip(pred_segm_flat, target_segm_flat, mask_flat):
+            cur_target_class_freq_total = np.bincount(cur_target_segm, minlength=NUM_CLASS)[None, ...]
+            cur_target_class_freq_mask = np.bincount(cur_target_segm[cur_mask], minlength=NUM_CLASS)[None, ...]
+            cur_pred_class_freq_mask = np.bincount(cur_pred_segm[cur_mask], minlength=NUM_CLASS)[None, ...]
+            self.target_class_freq_by_image_total.append(cur_target_class_freq_total)
+            self.target_class_freq_by_image_mask.append(cur_target_class_freq_mask)
+            self.pred_class_freq_by_image_mask.append(cur_pred_class_freq_mask)
+            batch_target_class_freq_total.append(cur_target_class_freq_total)
+            batch_target_class_freq_mask.append(cur_target_class_freq_mask)
+            batch_pred_class_freq_mask.append(cur_pred_class_freq_mask)
+        batch_target_class_freq_total = np.concatenate(batch_target_class_freq_total, axis=0)
+        batch_target_class_freq_mask = np.concatenate(batch_target_class_freq_mask, axis=0)
+        batch_pred_class_freq_mask = np.concatenate(batch_pred_class_freq_mask, axis=0)
+        return batch_target_class_freq_total, batch_target_class_freq_mask, batch_pred_class_freq_mask
+    def reset(self):
+        super().reset()
+        self.target_class_freq_by_image_total = []
+        self.target_class_freq_by_image_mask = []
+        self.pred_class_freq_by_image_mask = []
+def distribute_values_to_classes(target_class_freq_by_image_mask, values, idx2name):
+    assert target_class_freq_by_image_mask.ndim == 2 and target_class_freq_by_image_mask.shape[0] == values.shape[0]
+    total_class_freq = target_class_freq_by_image_mask.sum(0)
+    distr_values = (target_class_freq_by_image_mask * values[..., None]).sum(0)
+    result = distr_values / (total_class_freq + 1e-3)
+    return {idx2name[i]: val for i, val in enumerate(result) if total_class_freq[i] > 0}
+def get_segmentation_idx2name():
+    return {i - 1: name for i, name in segm_options['classes'].set_index('Idx', drop=True)['Name'].to_dict().items()}
+class SegmentationAwarePairwiseScore(SegmentationAwareScore):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.individual_values = []
+        self.segm_idx2name = get_segmentation_idx2name()
+    def forward(self, pred_batch, target_batch, mask):
+        cur_class_stats = super().forward(pred_batch, target_batch, mask)
+        score_values = self.calc_score(pred_batch, target_batch, mask)
+        self.individual_values.append(score_values)
+        return cur_class_stats + (score_values,)
+    @abstractmethod
+    def calc_score(self, pred_batch, target_batch, mask):
+        raise NotImplementedError()
+    def get_value(self, groups=None, states=None):
+        """
+        :param groups:
+        :return:
+            total_results: dict of kind {'mean': score mean, 'std': score std}
+            group_results: None, if groups is None;
+                else dict {group_idx: {'mean': score mean among group, 'std': score std among group}}
+        """
+        if states is not None:
+            (target_class_freq_by_image_total,
+             target_class_freq_by_image_mask,
+             pred_class_freq_by_image_mask,
+             individual_values) = states
+        else:
+            target_class_freq_by_image_total = self.target_class_freq_by_image_total
+            target_class_freq_by_image_mask = self.target_class_freq_by_image_mask
+            pred_class_freq_by_image_mask = self.pred_class_freq_by_image_mask
+            individual_values = self.individual_values
+        target_class_freq_by_image_total = np.concatenate(target_class_freq_by_image_total, axis=0)
+        target_class_freq_by_image_mask = np.concatenate(target_class_freq_by_image_mask, axis=0)
+        pred_class_freq_by_image_mask = np.concatenate(pred_class_freq_by_image_mask, axis=0)
+        individual_values = np.concatenate(individual_values, axis=0)
+        total_results = {
+            'mean': individual_values.mean(),
+            'std': individual_values.std(),
+            **distribute_values_to_classes(target_class_freq_by_image_mask, individual_values, self.segm_idx2name)
+        }
+        if groups is None:
+            return total_results, None
+        group_results = dict()
+        grouping = get_groupings(groups)
+        for label, index in grouping.items():
+            group_class_freq = target_class_freq_by_image_mask[index]
+            group_scores = individual_values[index]
+            group_results[label] = {
+                'mean': group_scores.mean(),
+                'std': group_scores.std(),
+                ** distribute_values_to_classes(group_class_freq, group_scores, self.segm_idx2name)
+            }
+        return total_results, group_results
+    def reset(self):
+        super().reset()
+        self.individual_values = []
+class SegmentationClassStats(SegmentationAwarePairwiseScore):
+    def calc_score(self, pred_batch, target_batch, mask):
+        return 0
+    def get_value(self, groups=None, states=None):
+        """
+        :param groups:
+        :return:
+            total_results: dict of kind {'mean': score mean, 'std': score std}
+            group_results: None, if groups is None;
+                else dict {group_idx: {'mean': score mean among group, 'std': score std among group}}
+        """
+        if states is not None:
+            (target_class_freq_by_image_total,
+             target_class_freq_by_image_mask,
+             pred_class_freq_by_image_mask,
+             _) = states
+        else:
+            target_class_freq_by_image_total = self.target_class_freq_by_image_total
+            target_class_freq_by_image_mask = self.target_class_freq_by_image_mask
+            pred_class_freq_by_image_mask = self.pred_class_freq_by_image_mask
+        target_class_freq_by_image_total = np.concatenate(target_class_freq_by_image_total, axis=0)
+        target_class_freq_by_image_mask = np.concatenate(target_class_freq_by_image_mask, axis=0)
+        pred_class_freq_by_image_mask = np.concatenate(pred_class_freq_by_image_mask, axis=0)
+        target_class_freq_by_image_total_marginal = target_class_freq_by_image_total.sum(0).astype('float32')
+        target_class_freq_by_image_total_marginal /= target_class_freq_by_image_total_marginal.sum()
+        target_class_freq_by_image_mask_marginal = target_class_freq_by_image_mask.sum(0).astype('float32')
+        target_class_freq_by_image_mask_marginal /= target_class_freq_by_image_mask_marginal.sum()
+        pred_class_freq_diff = (pred_class_freq_by_image_mask - target_class_freq_by_image_mask).sum(0) / (target_class_freq_by_image_mask.sum(0) + 1e-3)
+        total_results = dict()
+        total_results.update({f'total_freq/{self.segm_idx2name[i]}': v
+                              for i, v in enumerate(target_class_freq_by_image_total_marginal)
+                              if v > 0})
+        total_results.update({f'mask_freq/{self.segm_idx2name[i]}': v
+                              for i, v in enumerate(target_class_freq_by_image_mask_marginal)
+                              if v > 0})
+        total_results.update({f'mask_freq_diff/{self.segm_idx2name[i]}': v
+                              for i, v in enumerate(pred_class_freq_diff)
+                              if target_class_freq_by_image_total_marginal[i] > 0})
+        if groups is None:
+            return total_results, None
+        group_results = dict()
+        grouping = get_groupings(groups)
+        for label, index in grouping.items():
+            group_target_class_freq_by_image_total = target_class_freq_by_image_total[index]
+            group_target_class_freq_by_image_mask = target_class_freq_by_image_mask[index]
+            group_pred_class_freq_by_image_mask = pred_class_freq_by_image_mask[index]
+            group_target_class_freq_by_image_total_marginal = group_target_class_freq_by_image_total.sum(0).astype('float32')
+            group_target_class_freq_by_image_total_marginal /= group_target_class_freq_by_image_total_marginal.sum()
+            group_target_class_freq_by_image_mask_marginal = group_target_class_freq_by_image_mask.sum(0).astype('float32')
+            group_target_class_freq_by_image_mask_marginal /= group_target_class_freq_by_image_mask_marginal.sum()
+            group_pred_class_freq_diff = (group_pred_class_freq_by_image_mask - group_target_class_freq_by_image_mask).sum(0) / (
+                    group_target_class_freq_by_image_mask.sum(0) + 1e-3)
+            cur_group_results = dict()
+            cur_group_results.update({f'total_freq/{self.segm_idx2name[i]}': v
+                                      for i, v in enumerate(group_target_class_freq_by_image_total_marginal)
+                                      if v > 0})
+            cur_group_results.update({f'mask_freq/{self.segm_idx2name[i]}': v
+                                      for i, v in enumerate(group_target_class_freq_by_image_mask_marginal)
+                                      if v > 0})
+            cur_group_results.update({f'mask_freq_diff/{self.segm_idx2name[i]}': v
+                                      for i, v in enumerate(group_pred_class_freq_diff)
+                                      if group_target_class_freq_by_image_total_marginal[i] > 0})
+            group_results[label] = cur_group_results
+        return total_results, group_results
+class SegmentationAwareSSIM(SegmentationAwarePairwiseScore):
+    def __init__(self, *args, window_size=11, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.score_impl = SSIM(window_size=window_size, size_average=False).eval()
+    def calc_score(self, pred_batch, target_batch, mask):
+        return self.score_impl(pred_batch, target_batch).detach().cpu().numpy()
+class SegmentationAwareLPIPS(SegmentationAwarePairwiseScore):
+    def __init__(self, *args, model='net-lin', net='vgg', model_path=None, use_gpu=True,  **kwargs):
+        super().__init__(*args, **kwargs)
+        self.score_impl = PerceptualLoss(model=model, net=net, model_path=model_path,
+                                         use_gpu=use_gpu, spatial=False).eval()
+    def calc_score(self, pred_batch, target_batch, mask):
+        return self.score_impl(pred_batch, target_batch).flatten().detach().cpu().numpy()
+def calculade_fid_no_img(img_i, activations_pred, activations_target, eps=1e-6):
+    activations_pred = activations_pred.copy()
+    activations_pred[img_i] = activations_target[img_i]
+    return calculate_frechet_distance(activations_pred, activations_target, eps=eps)
+class SegmentationAwareFID(SegmentationAwarePairwiseScore):
+    def __init__(self, *args, dims=2048, eps=1e-6, n_jobs=-1, **kwargs):
+        super().__init__(*args, **kwargs)
+        if getattr(FIDScore, '_MODEL', None) is None:
+            block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+            FIDScore._MODEL = InceptionV3([block_idx]).eval()
+        self.model = FIDScore._MODEL
+        self.eps = eps
+        self.n_jobs = n_jobs
+    def calc_score(self, pred_batch, target_batch, mask):
+        activations_pred = self._get_activations(pred_batch)
+        activations_target = self._get_activations(target_batch)
+        return activations_pred, activations_target
+    def get_value(self, groups=None, states=None):
+        """
+        :param groups:
+        :return:
+            total_results: dict of kind {'mean': score mean, 'std': score std}
+            group_results: None, if groups is None;
+                else dict {group_idx: {'mean': score mean among group, 'std': score std among group}}
+        """
+        if states is not None:
+            (target_class_freq_by_image_total,
+             target_class_freq_by_image_mask,
+             pred_class_freq_by_image_mask,
+             activation_pairs) = states
+        else:
+            target_class_freq_by_image_total = self.target_class_freq_by_image_total
+            target_class_freq_by_image_mask = self.target_class_freq_by_image_mask
+            pred_class_freq_by_image_mask = self.pred_class_freq_by_image_mask
+            activation_pairs = self.individual_values
+        target_class_freq_by_image_total = np.concatenate(target_class_freq_by_image_total, axis=0)
+        target_class_freq_by_image_mask = np.concatenate(target_class_freq_by_image_mask, axis=0)
+        pred_class_freq_by_image_mask = np.concatenate(pred_class_freq_by_image_mask, axis=0)
+        activations_pred, activations_target = zip(*activation_pairs)
+        activations_pred = np.concatenate(activations_pred, axis=0)
+        activations_target = np.concatenate(activations_target, axis=0)
+        total_results = {
+            'mean': calculate_frechet_distance(activations_pred, activations_target, eps=self.eps),
+            'std': 0,
+            **self.distribute_fid_to_classes(target_class_freq_by_image_mask, activations_pred, activations_target)
+        }
+        if groups is None:
+            return total_results, None
+        group_results = dict()
+        grouping = get_groupings(groups)
+        for label, index in grouping.items():
+            if len(index) > 1:
+                group_activations_pred = activations_pred[index]
+                group_activations_target = activations_target[index]
+                group_class_freq = target_class_freq_by_image_mask[index]
+                group_results[label] = {
+                    'mean': calculate_frechet_distance(group_activations_pred, group_activations_target, eps=self.eps),
+                    'std': 0,
+                    **self.distribute_fid_to_classes(group_class_freq,
+                                                     group_activations_pred,
+                                                     group_activations_target)
+                }
+            else:
+                group_results[label] = dict(mean=float('nan'), std=0)
+        return total_results, group_results
+    def distribute_fid_to_classes(self, class_freq, activations_pred, activations_target):
+        real_fid = calculate_frechet_distance(activations_pred, activations_target, eps=self.eps)
+        fid_no_images = Parallel(n_jobs=self.n_jobs)(
+            delayed(calculade_fid_no_img)(img_i, activations_pred, activations_target, eps=self.eps)
+            for img_i in range(activations_pred.shape[0])
+        )
+        errors = real_fid - fid_no_images
+        return distribute_values_to_classes(class_freq, errors, self.segm_idx2name)
+    def _get_activations(self, batch):
+        activations = self.model(batch)[0]
+        if activations.shape[2] != 1 or activations.shape[3] != 1:
+            activations = F.adaptive_avg_pool2d(activations, output_size=(1, 1))
+        activations = activations.squeeze(-1).squeeze(-1).detach().cpu().numpy()
+        return activations

saicinpainting/evaluation/losses/fid/__init__.py ADDED Viewed

File without changes

saicinpainting/evaluation/losses/fid/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (165 Bytes). View file

saicinpainting/evaluation/losses/fid/__pycache__/inception.cpython-39.pyc ADDED Viewed

Binary file (9.02 kB). View file

saicinpainting/evaluation/losses/fid/fid_score.py ADDED Viewed

	@@ -0,0 +1,328 @@

+#!/usr/bin/env python3
+"""Calculates the Frechet Inception Distance (FID) to evalulate GANs
+The FID metric calculates the distance between two distributions of images.
+Typically, we have summary statistics (mean & covariance matrix) of one
+of these distributions, while the 2nd distribution is given by a GAN.
+When run as a stand-alone program, it compares the distribution of
+images that are stored as PNG/JPEG at a specified location with a
+distribution given by summary statistics (in pickle format).
+The FID is calculated by assuming that X_1 and X_2 are the activations of
+the pool_3 layer of the inception net for generated samples and real world
+samples respectively.
+See --help to see further details.
+Code apapted from https://github.com/bioinf-jku/TTUR to use PyTorch instead
+of Tensorflow
+Copyright 2018 Institute of Bioinformatics, JKU Linz
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+   http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import pathlib
+from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
+import numpy as np
+import torch
+# from scipy.misc import imread
+from imageio import imread
+from PIL import Image, JpegImagePlugin
+from scipy import linalg
+from torch.nn.functional import adaptive_avg_pool2d
+from torchvision.transforms import CenterCrop, Compose, Resize, ToTensor
+try:
+    from tqdm import tqdm
+except ImportError:
+    # If not tqdm is not available, provide a mock version of it
+    def tqdm(x): return x
+try:
+    from .inception import InceptionV3
+except ModuleNotFoundError:
+    from inception import InceptionV3
+parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
+parser.add_argument('path', type=str, nargs=2,
+                    help=('Path to the generated images or '
+                          'to .npz statistic files'))
+parser.add_argument('--batch-size', type=int, default=50,
+                    help='Batch size to use')
+parser.add_argument('--dims', type=int, default=2048,
+                    choices=list(InceptionV3.BLOCK_INDEX_BY_DIM),
+                    help=('Dimensionality of Inception features to use. '
+                          'By default, uses pool3 features'))
+parser.add_argument('-c', '--gpu', default='', type=str,
+                    help='GPU to use (leave blank for CPU only)')
+parser.add_argument('--resize', default=256)
+transform = Compose([Resize(256), CenterCrop(256), ToTensor()])
+def get_activations(files, model, batch_size=50, dims=2048,
+                    cuda=False, verbose=False, keep_size=False):
+    """Calculates the activations of the pool_3 layer for all images.
+    Params:
+    -- files       : List of image files paths
+    -- model       : Instance of inception model
+    -- batch_size  : Batch size of images for the model to process at once.
+                     Make sure that the number of samples is a multiple of
+                     the batch size, otherwise some samples are ignored. This
+                     behavior is retained to match the original FID score
+                     implementation.
+    -- dims        : Dimensionality of features returned by Inception
+    -- cuda        : If set to True, use GPU
+    -- verbose     : If set to True and parameter out_step is given, the number
+                     of calculated batches is reported.
+    Returns:
+    -- A numpy array of dimension (num images, dims) that contains the
+       activations of the given tensor when feeding inception with the
+       query tensor.
+    """
+    model.eval()
+    if len(files) % batch_size != 0:
+        print(('Warning: number of images is not a multiple of the '
+               'batch size. Some samples are going to be ignored.'))
+    if batch_size > len(files):
+        print(('Warning: batch size is bigger than the data size. '
+               'Setting batch size to data size'))
+        batch_size = len(files)
+    n_batches = len(files) // batch_size
+    n_used_imgs = n_batches * batch_size
+    pred_arr = np.empty((n_used_imgs, dims))
+    for i in tqdm(range(n_batches)):
+        if verbose:
+            print('\rPropagating batch %d/%d' % (i + 1, n_batches),
+                  end='', flush=True)
+        start = i * batch_size
+        end = start + batch_size
+        # # Official code goes below
+        # images = np.array([imread(str(f)).astype(np.float32)
+        #                    for f in files[start:end]])
+        # # Reshape to (n_images, 3, height, width)
+        # images = images.transpose((0, 3, 1, 2))
+        # images /= 255
+        # batch = torch.from_numpy(images).type(torch.FloatTensor)
+        # #
+        t = transform if not keep_size else ToTensor()
+        if isinstance(files[0], pathlib.PosixPath):
+            images = [t(Image.open(str(f))) for f in files[start:end]]
+        elif isinstance(files[0], Image.Image):
+            images = [t(f) for f in files[start:end]]
+        else:
+            raise ValueError(f"Unknown data type for image: {type(files[0])}")
+        batch = torch.stack(images)
+        if cuda:
+            batch = batch.cuda()
+        pred = model(batch)[0]
+        # If model output is not scalar, apply global spatial average pooling.
+        # This happens if you choose a dimensionality not equal 2048.
+        if pred.shape[2] != 1 or pred.shape[3] != 1:
+            pred = adaptive_avg_pool2d(pred, output_size=(1, 1))
+        pred_arr[start:end] = pred.cpu().data.numpy().reshape(batch_size, -1)
+    if verbose:
+        print(' done')
+    return pred_arr
+def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
+    """Numpy implementation of the Frechet Distance.
+    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
+    and X_2 ~ N(mu_2, C_2) is
+            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
+    Stable version by Dougal J. Sutherland.
+    Params:
+    -- mu1   : Numpy array containing the activations of a layer of the
+               inception net (like returned by the function 'get_predictions')
+               for generated samples.
+    -- mu2   : The sample mean over activations, precalculated on an
+               representative data set.
+    -- sigma1: The covariance matrix over activations for generated samples.
+    -- sigma2: The covariance matrix over activations, precalculated on an
+               representative data set.
+    Returns:
+    --   : The Frechet Distance.
+    """
+    mu1 = np.atleast_1d(mu1)
+    mu2 = np.atleast_1d(mu2)
+    sigma1 = np.atleast_2d(sigma1)
+    sigma2 = np.atleast_2d(sigma2)
+    assert mu1.shape == mu2.shape, \
+        'Training and test mean vectors have different lengths'
+    assert sigma1.shape == sigma2.shape, \
+        'Training and test covariances have different dimensions'
+    diff = mu1 - mu2
+    # Product might be almost singular
+    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
+    if not np.isfinite(covmean).all():
+        msg = ('fid calculation produces singular product; '
+               'adding %s to diagonal of cov estimates') % eps
+        print(msg)
+        offset = np.eye(sigma1.shape[0]) * eps
+        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))
+    # Numerical error might give slight imaginary component
+    if np.iscomplexobj(covmean):
+        # if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
+        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-2):
+            m = np.max(np.abs(covmean.imag))
+            raise ValueError('Imaginary component {}'.format(m))
+        covmean = covmean.real
+    tr_covmean = np.trace(covmean)
+    return (diff.dot(diff) + np.trace(sigma1) +
+            np.trace(sigma2) - 2 * tr_covmean)
+def calculate_activation_statistics(files, model, batch_size=50,
+                                    dims=2048, cuda=False, verbose=False, keep_size=False):
+    """Calculation of the statistics used by the FID.
+    Params:
+    -- files       : List of image files paths
+    -- model       : Instance of inception model
+    -- batch_size  : The images numpy array is split into batches with
+                     batch size batch_size. A reasonable batch size
+                     depends on the hardware.
+    -- dims        : Dimensionality of features returned by Inception
+    -- cuda        : If set to True, use GPU
+    -- verbose     : If set to True and parameter out_step is given, the
+                     number of calculated batches is reported.
+    Returns:
+    -- mu    : The mean over samples of the activations of the pool_3 layer of
+               the inception model.
+    -- sigma : The covariance matrix of the activations of the pool_3 layer of
+               the inception model.
+    """
+    act = get_activations(files, model, batch_size, dims, cuda, verbose, keep_size=keep_size)
+    mu = np.mean(act, axis=0)
+    sigma = np.cov(act, rowvar=False)
+    return mu, sigma
+def _compute_statistics_of_path(path, model, batch_size, dims, cuda):
+    if path.endswith('.npz'):
+        f = np.load(path)
+        m, s = f['mu'][:], f['sigma'][:]
+        f.close()
+    else:
+        path = pathlib.Path(path)
+        files = list(path.glob('*.jpg')) + list(path.glob('*.png'))
+        m, s = calculate_activation_statistics(files, model, batch_size,
+                                               dims, cuda)
+    return m, s
+def _compute_statistics_of_images(images, model, batch_size, dims, cuda, keep_size=False):
+    if isinstance(images, list):  # exact paths to files are provided
+        m, s = calculate_activation_statistics(images, model, batch_size,
+                                               dims, cuda, keep_size=keep_size)
+        return m, s
+    else:
+        raise ValueError
+def calculate_fid_given_paths(paths, batch_size, cuda, dims):
+    """Calculates the FID of two paths"""
+    for p in paths:
+        if not os.path.exists(p):
+            raise RuntimeError('Invalid path: %s' % p)
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+    model = InceptionV3([block_idx])
+    if cuda:
+        model.cuda()
+    m1, s1 = _compute_statistics_of_path(paths[0], model, batch_size,
+                                         dims, cuda)
+    m2, s2 = _compute_statistics_of_path(paths[1], model, batch_size,
+                                         dims, cuda)
+    fid_value = calculate_frechet_distance(m1, s1, m2, s2)
+    return fid_value
+def calculate_fid_given_images(images, batch_size, cuda, dims, use_globals=False, keep_size=False):
+    if use_globals:
+        global FID_MODEL  # for multiprocessing
+    for imgs in images:
+        if isinstance(imgs, list) and isinstance(imgs[0], (Image.Image, JpegImagePlugin.JpegImageFile)):
+            pass
+        else:
+            raise RuntimeError('Invalid images')
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]
+    if 'FID_MODEL' not in globals() or not use_globals:
+        model = InceptionV3([block_idx])
+        if cuda:
+            model.cuda()
+        if use_globals:
+            FID_MODEL = model
+    else:
+        model = FID_MODEL
+    m1, s1 = _compute_statistics_of_images(images[0], model, batch_size,
+                                        dims, cuda, keep_size=False)
+    m2, s2 = _compute_statistics_of_images(images[1], model, batch_size,
+                                        dims, cuda, keep_size=False)
+    fid_value = calculate_frechet_distance(m1, s1, m2, s2)
+    return fid_value
+if __name__ == '__main__':
+    args = parser.parse_args()
+    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu
+    fid_value = calculate_fid_given_paths(args.path,
+                                          args.batch_size,
+                                          args.gpu != '',
+                                          args.dims)
+    print('FID: ', fid_value)

saicinpainting/evaluation/losses/fid/inception.py ADDED Viewed

	@@ -0,0 +1,323 @@

+import logging
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models
+try:
+    from torchvision.models.utils import load_state_dict_from_url
+except ImportError:
+    from torch.utils.model_zoo import load_url as load_state_dict_from_url
+# Inception weights ported to Pytorch from
+# http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz
+FID_WEIGHTS_URL = 'https://github.com/mseitzer/pytorch-fid/releases/download/fid_weights/pt_inception-2015-12-05-6726825d.pth'
+LOGGER = logging.getLogger(__name__)
+class InceptionV3(nn.Module):
+    """Pretrained InceptionV3 network returning feature maps"""
+    # Index of default block of inception to return,
+    # corresponds to output of final average pooling
+    DEFAULT_BLOCK_INDEX = 3
+    # Maps feature dimensionality to their output blocks indices
+    BLOCK_INDEX_BY_DIM = {
+        64: 0,   # First max pooling features
+        192: 1,  # Second max pooling featurs
+        768: 2,  # Pre-aux classifier features
+        2048: 3  # Final average pooling features
+    }
+    def __init__(self,
+                 output_blocks=[DEFAULT_BLOCK_INDEX],
+                 resize_input=True,
+                 normalize_input=True,
+                 requires_grad=False,
+                 use_fid_inception=True):
+        """Build pretrained InceptionV3
+        Parameters
+        ----------
+        output_blocks : list of int
+            Indices of blocks to return features of. Possible values are:
+                - 0: corresponds to output of first max pooling
+                - 1: corresponds to output of second max pooling
+                - 2: corresponds to output which is fed to aux classifier
+                - 3: corresponds to output of final average pooling
+        resize_input : bool
+            If true, bilinearly resizes input to width and height 299 before
+            feeding input to model. As the network without fully connected
+            layers is fully convolutional, it should be able to handle inputs
+            of arbitrary size, so resizing might not be strictly needed
+        normalize_input : bool
+            If true, scales the input from range (0, 1) to the range the
+            pretrained Inception network expects, namely (-1, 1)
+        requires_grad : bool
+            If true, parameters of the model require gradients. Possibly useful
+            for finetuning the network
+        use_fid_inception : bool
+            If true, uses the pretrained Inception model used in Tensorflow's
+            FID implementation. If false, uses the pretrained Inception model
+            available in torchvision. The FID Inception model has different
+            weights and a slightly different structure from torchvision's
+            Inception model. If you want to compute FID scores, you are
+            strongly advised to set this parameter to true to get comparable
+            results.
+        """
+        super(InceptionV3, self).__init__()
+        self.resize_input = resize_input
+        self.normalize_input = normalize_input
+        self.output_blocks = sorted(output_blocks)
+        self.last_needed_block = max(output_blocks)
+        assert self.last_needed_block <= 3, \
+            'Last possible output block index is 3'
+        self.blocks = nn.ModuleList()
+        if use_fid_inception:
+            inception = fid_inception_v3()
+        else:
+            inception = models.inception_v3(pretrained=True)
+        # Block 0: input to maxpool1
+        block0 = [
+            inception.Conv2d_1a_3x3,
+            inception.Conv2d_2a_3x3,
+            inception.Conv2d_2b_3x3,
+            nn.MaxPool2d(kernel_size=3, stride=2)
+        ]
+        self.blocks.append(nn.Sequential(*block0))
+        # Block 1: maxpool1 to maxpool2
+        if self.last_needed_block >= 1:
+            block1 = [
+                inception.Conv2d_3b_1x1,
+                inception.Conv2d_4a_3x3,
+                nn.MaxPool2d(kernel_size=3, stride=2)
+            ]
+            self.blocks.append(nn.Sequential(*block1))
+        # Block 2: maxpool2 to aux classifier
+        if self.last_needed_block >= 2:
+            block2 = [
+                inception.Mixed_5b,
+                inception.Mixed_5c,
+                inception.Mixed_5d,
+                inception.Mixed_6a,
+                inception.Mixed_6b,
+                inception.Mixed_6c,
+                inception.Mixed_6d,
+                inception.Mixed_6e,
+            ]
+            self.blocks.append(nn.Sequential(*block2))
+        # Block 3: aux classifier to final avgpool
+        if self.last_needed_block >= 3:
+            block3 = [
+                inception.Mixed_7a,
+                inception.Mixed_7b,
+                inception.Mixed_7c,
+                nn.AdaptiveAvgPool2d(output_size=(1, 1))
+            ]
+            self.blocks.append(nn.Sequential(*block3))
+        for param in self.parameters():
+            param.requires_grad = requires_grad
+    def forward(self, inp):
+        """Get Inception feature maps
+        Parameters
+        ----------
+        inp : torch.autograd.Variable
+            Input tensor of shape Bx3xHxW. Values are expected to be in
+            range (0, 1)
+        Returns
+        -------
+        List of torch.autograd.Variable, corresponding to the selected output
+        block, sorted ascending by index
+        """
+        outp = []
+        x = inp
+        if self.resize_input:
+            x = F.interpolate(x,
+                              size=(299, 299),
+                              mode='bilinear',
+                              align_corners=False)
+        if self.normalize_input:
+            x = 2 * x - 1  # Scale from range (0, 1) to range (-1, 1)
+        for idx, block in enumerate(self.blocks):
+            x = block(x)
+            if idx in self.output_blocks:
+                outp.append(x)
+            if idx == self.last_needed_block:
+                break
+        return outp
+def fid_inception_v3():
+    """Build pretrained Inception model for FID computation
+    The Inception model for FID computation uses a different set of weights
+    and has a slightly different structure than torchvision's Inception.
+    This method first constructs torchvision's Inception and then patches the
+    necessary parts that are different in the FID Inception model.
+    """
+    LOGGER.info('fid_inception_v3 called')
+    inception = models.inception_v3(num_classes=1008,
+                                    aux_logits=False,
+                                    pretrained=False)
+    LOGGER.info('models.inception_v3 done')
+    inception.Mixed_5b = FIDInceptionA(192, pool_features=32)
+    inception.Mixed_5c = FIDInceptionA(256, pool_features=64)
+    inception.Mixed_5d = FIDInceptionA(288, pool_features=64)
+    inception.Mixed_6b = FIDInceptionC(768, channels_7x7=128)
+    inception.Mixed_6c = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6d = FIDInceptionC(768, channels_7x7=160)
+    inception.Mixed_6e = FIDInceptionC(768, channels_7x7=192)
+    inception.Mixed_7b = FIDInceptionE_1(1280)
+    inception.Mixed_7c = FIDInceptionE_2(2048)
+    LOGGER.info('fid_inception_v3 patching done')
+    state_dict = load_state_dict_from_url(FID_WEIGHTS_URL, progress=True)
+    LOGGER.info('fid_inception_v3 weights downloaded')
+    inception.load_state_dict(state_dict)
+    LOGGER.info('fid_inception_v3 weights loaded into model')
+    return inception
+class FIDInceptionA(models.inception.InceptionA):
+    """InceptionA block patched for FID computation"""
+    def __init__(self, in_channels, pool_features):
+        super(FIDInceptionA, self).__init__(in_channels, pool_features)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch5x5 = self.branch5x5_1(x)
+        branch5x5 = self.branch5x5_2(branch5x5)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = self.branch3x3dbl_3(branch3x3dbl)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch5x5, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionC(models.inception.InceptionC):
+    """InceptionC block patched for FID computation"""
+    def __init__(self, in_channels, channels_7x7):
+        super(FIDInceptionC, self).__init__(in_channels, channels_7x7)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch7x7 = self.branch7x7_1(x)
+        branch7x7 = self.branch7x7_2(branch7x7)
+        branch7x7 = self.branch7x7_3(branch7x7)
+        branch7x7dbl = self.branch7x7dbl_1(x)
+        branch7x7dbl = self.branch7x7dbl_2(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_3(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_4(branch7x7dbl)
+        branch7x7dbl = self.branch7x7dbl_5(branch7x7dbl)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch7x7, branch7x7dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionE_1(models.inception.InceptionE):
+    """First InceptionE block patched for FID computation"""
+    def __init__(self, in_channels):
+        super(FIDInceptionE_1, self).__init__(in_channels)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+        # Patch: Tensorflow's average pool does not use the padded zero's in
+        # its average calculation
+        branch_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1,
+                                   count_include_pad=False)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)
+class FIDInceptionE_2(models.inception.InceptionE):
+    """Second InceptionE block patched for FID computation"""
+    def __init__(self, in_channels):
+        super(FIDInceptionE_2, self).__init__(in_channels)
+    def forward(self, x):
+        branch1x1 = self.branch1x1(x)
+        branch3x3 = self.branch3x3_1(x)
+        branch3x3 = [
+            self.branch3x3_2a(branch3x3),
+            self.branch3x3_2b(branch3x3),
+        ]
+        branch3x3 = torch.cat(branch3x3, 1)
+        branch3x3dbl = self.branch3x3dbl_1(x)
+        branch3x3dbl = self.branch3x3dbl_2(branch3x3dbl)
+        branch3x3dbl = [
+            self.branch3x3dbl_3a(branch3x3dbl),
+            self.branch3x3dbl_3b(branch3x3dbl),
+        ]
+        branch3x3dbl = torch.cat(branch3x3dbl, 1)
+        # Patch: The FID Inception model uses max pooling instead of average
+        # pooling. This is likely an error in this specific Inception
+        # implementation, as other Inception models use average pooling here
+        # (which matches the description in the paper).
+        branch_pool = F.max_pool2d(x, kernel_size=3, stride=1, padding=1)
+        branch_pool = self.branch_pool(branch_pool)
+        outputs = [branch1x1, branch3x3, branch3x3dbl, branch_pool]
+        return torch.cat(outputs, 1)

saicinpainting/evaluation/losses/lpips.py ADDED Viewed

	@@ -0,0 +1,891 @@

+############################################################
+# The contents below have been combined using files in the #
+# following repository:                                    #
+# https://github.com/richzhang/PerceptualSimilarity        #
+############################################################
+############################################################
+#                       __init__.py                        #
+############################################################
+import numpy as np
+from skimage.metrics import structural_similarity
+import torch
+from saicinpainting.utils import get_shape
+class PerceptualLoss(torch.nn.Module):
+    def __init__(self, model='net-lin', net='alex', colorspace='rgb', model_path=None, spatial=False, use_gpu=True):
+        # VGG using our perceptually-learned weights (LPIPS metric)
+        # def __init__(self, model='net', net='vgg', use_gpu=True): # "default" way of using VGG as a perceptual loss
+        super(PerceptualLoss, self).__init__()
+        self.use_gpu = use_gpu
+        self.spatial = spatial
+        self.model = DistModel()
+        self.model.initialize(model=model, net=net, use_gpu=use_gpu, colorspace=colorspace,
+                              model_path=model_path, spatial=self.spatial)
+    def forward(self, pred, target, normalize=True):
+        """
+        Pred and target are Variables.
+        If normalize is True, assumes the images are between [0,1] and then scales them between [-1,+1]
+        If normalize is False, assumes the images are already between [-1,+1]
+        Inputs pred and target are Nx3xHxW
+        Output pytorch Variable N long
+        """
+        if normalize:
+            target = 2 * target - 1
+            pred = 2 * pred - 1
+        return self.model(target, pred)
+def normalize_tensor(in_feat, eps=1e-10):
+    norm_factor = torch.sqrt(torch.sum(in_feat ** 2, dim=1, keepdim=True))
+    return in_feat / (norm_factor + eps)
+def l2(p0, p1, range=255.):
+    return .5 * np.mean((p0 / range - p1 / range) ** 2)
+def psnr(p0, p1, peak=255.):
+    return 10 * np.log10(peak ** 2 / np.mean((1. * p0 - 1. * p1) ** 2))
+def dssim(p0, p1, range=255.):
+    return (1 - compare_ssim(p0, p1, data_range=range, multichannel=True)) / 2.
+def rgb2lab(in_img, mean_cent=False):
+    from skimage import color
+    img_lab = color.rgb2lab(in_img)
+    if (mean_cent):
+        img_lab[:, :, 0] = img_lab[:, :, 0] - 50
+    return img_lab
+def tensor2np(tensor_obj):
+    # change dimension of a tensor object into a numpy array
+    return tensor_obj[0].cpu().float().numpy().transpose((1, 2, 0))
+def np2tensor(np_obj):
+    # change dimenion of np array into tensor array
+    return torch.Tensor(np_obj[:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
+def tensor2tensorlab(image_tensor, to_norm=True, mc_only=False):
+    # image tensor to lab tensor
+    from skimage import color
+    img = tensor2im(image_tensor)
+    img_lab = color.rgb2lab(img)
+    if (mc_only):
+        img_lab[:, :, 0] = img_lab[:, :, 0] - 50
+    if (to_norm and not mc_only):
+        img_lab[:, :, 0] = img_lab[:, :, 0] - 50
+        img_lab = img_lab / 100.
+    return np2tensor(img_lab)
+def tensorlab2tensor(lab_tensor, return_inbnd=False):
+    from skimage import color
+    import warnings
+    warnings.filterwarnings("ignore")
+    lab = tensor2np(lab_tensor) * 100.
+    lab[:, :, 0] = lab[:, :, 0] + 50
+    rgb_back = 255. * np.clip(color.lab2rgb(lab.astype('float')), 0, 1)
+    if (return_inbnd):
+        # convert back to lab, see if we match
+        lab_back = color.rgb2lab(rgb_back.astype('uint8'))
+        mask = 1. * np.isclose(lab_back, lab, atol=2.)
+        mask = np2tensor(np.prod(mask, axis=2)[:, :, np.newaxis])
+        return (im2tensor(rgb_back), mask)
+    else:
+        return im2tensor(rgb_back)
+def rgb2lab(input):
+    from skimage import color
+    return color.rgb2lab(input / 255.)
+def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255. / 2.):
+    image_numpy = image_tensor[0].cpu().float().numpy()
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
+    return image_numpy.astype(imtype)
+def im2tensor(image, imtype=np.uint8, cent=1., factor=255. / 2.):
+    return torch.Tensor((image / factor - cent)
+                        [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
+def tensor2vec(vector_tensor):
+    return vector_tensor.data.cpu().numpy()[:, :, 0, 0]
+def voc_ap(rec, prec, use_07_metric=False):
+    """ ap = voc_ap(rec, prec, [use_07_metric])
+    Compute VOC AP given precision and recall.
+    If use_07_metric is true, uses the
+    VOC 07 11 point method (default:False).
+    """
+    if use_07_metric:
+        # 11 point metric
+        ap = 0.
+        for t in np.arange(0., 1.1, 0.1):
+            if np.sum(rec >= t) == 0:
+                p = 0
+            else:
+                p = np.max(prec[rec >= t])
+            ap = ap + p / 11.
+    else:
+        # correct AP calculation
+        # first append sentinel values at the end
+        mrec = np.concatenate(([0.], rec, [1.]))
+        mpre = np.concatenate(([0.], prec, [0.]))
+        # compute the precision envelope
+        for i in range(mpre.size - 1, 0, -1):
+            mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
+        # to calculate area under PR curve, look for points
+        # where X axis (recall) changes value
+        i = np.where(mrec[1:] != mrec[:-1])[0]
+        # and sum (\Delta recall) * prec
+        ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
+    return ap
+def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=255. / 2.):
+    # def tensor2im(image_tensor, imtype=np.uint8, cent=1., factor=1.):
+    image_numpy = image_tensor[0].cpu().float().numpy()
+    image_numpy = (np.transpose(image_numpy, (1, 2, 0)) + cent) * factor
+    return image_numpy.astype(imtype)
+def im2tensor(image, imtype=np.uint8, cent=1., factor=255. / 2.):
+    # def im2tensor(image, imtype=np.uint8, cent=1., factor=1.):
+    return torch.Tensor((image / factor - cent)
+                        [:, :, :, np.newaxis].transpose((3, 2, 0, 1)))
+############################################################
+#                      base_model.py                       #
+############################################################
+class BaseModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+    def name(self):
+        return 'BaseModel'
+    def initialize(self, use_gpu=True):
+        self.use_gpu = use_gpu
+    def forward(self):
+        pass
+    def get_image_paths(self):
+        pass
+    def optimize_parameters(self):
+        pass
+    def get_current_visuals(self):
+        return self.input
+    def get_current_errors(self):
+        return {}
+    def save(self, label):
+        pass
+    # helper saving function that can be used by subclasses
+    def save_network(self, network, path, network_label, epoch_label):
+        save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
+        save_path = os.path.join(path, save_filename)
+        torch.save(network.state_dict(), save_path)
+    # helper loading function that can be used by subclasses
+    def load_network(self, network, network_label, epoch_label):
+        save_filename = '%s_net_%s.pth' % (epoch_label, network_label)
+        save_path = os.path.join(self.save_dir, save_filename)
+        print('Loading network from %s' % save_path)
+        network.load_state_dict(torch.load(save_path, map_location='cpu'))
+    def update_learning_rate():
+        pass
+    def get_image_paths(self):
+        return self.image_paths
+    def save_done(self, flag=False):
+        np.save(os.path.join(self.save_dir, 'done_flag'), flag)
+        np.savetxt(os.path.join(self.save_dir, 'done_flag'), [flag, ], fmt='%i')
+############################################################
+#                      dist_model.py                       #
+############################################################
+import os
+from collections import OrderedDict
+from scipy.ndimage import zoom
+from tqdm import tqdm
+class DistModel(BaseModel):
+    def name(self):
+        return self.model_name
+    def initialize(self, model='net-lin', net='alex', colorspace='Lab', pnet_rand=False, pnet_tune=False,
+                   model_path=None,
+                   use_gpu=True, printNet=False, spatial=False,
+                   is_train=False, lr=.0001, beta1=0.5, version='0.1'):
+        '''
+        INPUTS
+            model - ['net-lin'] for linearly calibrated network
+                    ['net'] for off-the-shelf network
+                    ['L2'] for L2 distance in Lab colorspace
+                    ['SSIM'] for ssim in RGB colorspace
+            net - ['squeeze','alex','vgg']
+            model_path - if None, will look in weights/[NET_NAME].pth
+            colorspace - ['Lab','RGB'] colorspace to use for L2 and SSIM
+            use_gpu - bool - whether or not to use a GPU
+            printNet - bool - whether or not to print network architecture out
+            spatial - bool - whether to output an array containing varying distances across spatial dimensions
+            spatial_shape - if given, output spatial shape. if None then spatial shape is determined automatically via spatial_factor (see below).
+            spatial_factor - if given, specifies upsampling factor relative to the largest spatial extent of a convolutional layer. if None then resized to size of input images.
+            spatial_order - spline order of filter for upsampling in spatial mode, by default 1 (bilinear).
+            is_train - bool - [True] for training mode
+            lr - float - initial learning rate
+            beta1 - float - initial momentum term for adam
+            version - 0.1 for latest, 0.0 was original (with a bug)
+        '''
+        BaseModel.initialize(self, use_gpu=use_gpu)
+        self.model = model
+        self.net = net
+        self.is_train = is_train
+        self.spatial = spatial
+        self.model_name = '%s [%s]' % (model, net)
+        if (self.model == 'net-lin'):  # pretrained net + linear layer
+            self.net = PNetLin(pnet_rand=pnet_rand, pnet_tune=pnet_tune, pnet_type=net,
+                               use_dropout=True, spatial=spatial, version=version, lpips=True)
+            kw = dict(map_location='cpu')
+            if (model_path is None):
+                import inspect
+                model_path = os.path.abspath(
+                    os.path.join(os.path.dirname(__file__), '..', '..', '..', 'models', 'lpips_models', f'{net}.pth'))
+            if (not is_train):
+                self.net.load_state_dict(torch.load(model_path, **kw), strict=False)
+        elif (self.model == 'net'):  # pretrained network
+            self.net = PNetLin(pnet_rand=pnet_rand, pnet_type=net, lpips=False)
+        elif (self.model in ['L2', 'l2']):
+            self.net = L2(use_gpu=use_gpu, colorspace=colorspace)  # not really a network, only for testing
+            self.model_name = 'L2'
+        elif (self.model in ['DSSIM', 'dssim', 'SSIM', 'ssim']):
+            self.net = DSSIM(use_gpu=use_gpu, colorspace=colorspace)
+            self.model_name = 'SSIM'
+        else:
+            raise ValueError("Model [%s] not recognized." % self.model)
+        self.trainable_parameters = list(self.net.parameters())
+        if self.is_train:  # training mode
+            # extra network on top to go from distances (d0,d1) => predicted human judgment (h*)
+            self.rankLoss = BCERankingLoss()
+            self.trainable_parameters += list(self.rankLoss.net.parameters())
+            self.lr = lr
+            self.old_lr = lr
+            self.optimizer_net = torch.optim.Adam(self.trainable_parameters, lr=lr, betas=(beta1, 0.999))
+        else:  # test mode
+            self.net.eval()
+        # if (use_gpu):
+            # self.net.to(gpu_ids[0])
+            # self.net = torch.nn.DataParallel(self.net, device_ids=gpu_ids)
+            # if (self.is_train):
+            #     self.rankLoss = self.rankLoss.to(device=gpu_ids[0])  # just put this on GPU0
+        if (printNet):
+            print('---------- Networks initialized -------------')
+            print_network(self.net)
+            print('-----------------------------------------------')
+    def forward(self, in0, in1, retPerLayer=False):
+        ''' Function computes the distance between image patches in0 and in1
+        INPUTS
+            in0, in1 - torch.Tensor object of shape Nx3xXxY - image patch scaled to [-1,1]
+        OUTPUT
+            computed distances between in0 and in1
+        '''
+        return self.net(in0, in1, retPerLayer=retPerLayer)
+    # ***** TRAINING FUNCTIONS *****
+    def optimize_parameters(self):
+        self.forward_train()
+        self.optimizer_net.zero_grad()
+        self.backward_train()
+        self.optimizer_net.step()
+        self.clamp_weights()
+    def clamp_weights(self):
+        for module in self.net.modules():
+            if (hasattr(module, 'weight') and module.kernel_size == (1, 1)):
+                module.weight.data = torch.clamp(module.weight.data, min=0)
+    def set_input(self, data):
+        self.input_ref = data['ref']
+        self.input_p0 = data['p0']
+        self.input_p1 = data['p1']
+        self.input_judge = data['judge']
+        # if (self.use_gpu):
+        #     self.input_ref = self.input_ref.to(device=self.gpu_ids[0])
+        #     self.input_p0 = self.input_p0.to(device=self.gpu_ids[0])
+        #     self.input_p1 = self.input_p1.to(device=self.gpu_ids[0])
+        #     self.input_judge = self.input_judge.to(device=self.gpu_ids[0])
+        # self.var_ref = Variable(self.input_ref, requires_grad=True)
+        # self.var_p0 = Variable(self.input_p0, requires_grad=True)
+        # self.var_p1 = Variable(self.input_p1, requires_grad=True)
+    def forward_train(self):  # run forward pass
+        # print(self.net.module.scaling_layer.shift)
+        # print(torch.norm(self.net.module.net.slice1[0].weight).item(), torch.norm(self.net.module.lin0.model[1].weight).item())
+        assert False, "We shoud've not get here when using LPIPS as a metric"
+        self.d0 = self(self.var_ref, self.var_p0)
+        self.d1 = self(self.var_ref, self.var_p1)
+        self.acc_r = self.compute_accuracy(self.d0, self.d1, self.input_judge)
+        self.var_judge = Variable(1. * self.input_judge).view(self.d0.size())
+        self.loss_total = self.rankLoss(self.d0, self.d1, self.var_judge * 2. - 1.)
+        return self.loss_total
+    def backward_train(self):
+        torch.mean(self.loss_total).backward()
+    def compute_accuracy(self, d0, d1, judge):
+        ''' d0, d1 are Variables, judge is a Tensor '''
+        d1_lt_d0 = (d1 < d0).cpu().data.numpy().flatten()
+        judge_per = judge.cpu().numpy().flatten()
+        return d1_lt_d0 * judge_per + (1 - d1_lt_d0) * (1 - judge_per)
+    def get_current_errors(self):
+        retDict = OrderedDict([('loss_total', self.loss_total.data.cpu().numpy()),
+                               ('acc_r', self.acc_r)])
+        for key in retDict.keys():
+            retDict[key] = np.mean(retDict[key])
+        return retDict
+    def get_current_visuals(self):
+        zoom_factor = 256 / self.var_ref.data.size()[2]
+        ref_img = tensor2im(self.var_ref.data)
+        p0_img = tensor2im(self.var_p0.data)
+        p1_img = tensor2im(self.var_p1.data)
+        ref_img_vis = zoom(ref_img, [zoom_factor, zoom_factor, 1], order=0)
+        p0_img_vis = zoom(p0_img, [zoom_factor, zoom_factor, 1], order=0)
+        p1_img_vis = zoom(p1_img, [zoom_factor, zoom_factor, 1], order=0)
+        return OrderedDict([('ref', ref_img_vis),
+                            ('p0', p0_img_vis),
+                            ('p1', p1_img_vis)])
+    def save(self, path, label):
+        if (self.use_gpu):
+            self.save_network(self.net.module, path, '', label)
+        else:
+            self.save_network(self.net, path, '', label)
+        self.save_network(self.rankLoss.net, path, 'rank', label)
+    def update_learning_rate(self, nepoch_decay):
+        lrd = self.lr / nepoch_decay
+        lr = self.old_lr - lrd
+        for param_group in self.optimizer_net.param_groups:
+            param_group['lr'] = lr
+        print('update lr [%s] decay: %f -> %f' % (type, self.old_lr, lr))
+        self.old_lr = lr
+def score_2afc_dataset(data_loader, func, name=''):
+    ''' Function computes Two Alternative Forced Choice (2AFC) score using
+        distance function 'func' in dataset 'data_loader'
+    INPUTS
+        data_loader - CustomDatasetDataLoader object - contains a TwoAFCDataset inside
+        func - callable distance function - calling d=func(in0,in1) should take 2
+            pytorch tensors with shape Nx3xXxY, and return numpy array of length N
+    OUTPUTS
+        [0] - 2AFC score in [0,1], fraction of time func agrees with human evaluators
+        [1] - dictionary with following elements
+            d0s,d1s - N arrays containing distances between reference patch to perturbed patches
+            gts - N array in [0,1], preferred patch selected by human evaluators
+                (closer to "0" for left patch p0, "1" for right patch p1,
+                "0.6" means 60pct people preferred right patch, 40pct preferred left)
+            scores - N array in [0,1], corresponding to what percentage function agreed with humans
+    CONSTS
+        N - number of test triplets in data_loader
+    '''
+    d0s = []
+    d1s = []
+    gts = []
+    for data in tqdm(data_loader.load_data(), desc=name):
+        d0s += func(data['ref'], data['p0']).data.cpu().numpy().flatten().tolist()
+        d1s += func(data['ref'], data['p1']).data.cpu().numpy().flatten().tolist()
+        gts += data['judge'].cpu().numpy().flatten().tolist()
+    d0s = np.array(d0s)
+    d1s = np.array(d1s)
+    gts = np.array(gts)
+    scores = (d0s < d1s) * (1. - gts) + (d1s < d0s) * gts + (d1s == d0s) * .5
+    return (np.mean(scores), dict(d0s=d0s, d1s=d1s, gts=gts, scores=scores))
+def score_jnd_dataset(data_loader, func, name=''):
+    ''' Function computes JND score using distance function 'func' in dataset 'data_loader'
+    INPUTS
+        data_loader - CustomDatasetDataLoader object - contains a JNDDataset inside
+        func - callable distance function - calling d=func(in0,in1) should take 2
+            pytorch tensors with shape Nx3xXxY, and return pytorch array of length N
+    OUTPUTS
+        [0] - JND score in [0,1], mAP score (area under precision-recall curve)
+        [1] - dictionary with following elements
+            ds - N array containing distances between two patches shown to human evaluator
+            sames - N array containing fraction of people who thought the two patches were identical
+    CONSTS
+        N - number of test triplets in data_loader
+    '''
+    ds = []
+    gts = []
+    for data in tqdm(data_loader.load_data(), desc=name):
+        ds += func(data['p0'], data['p1']).data.cpu().numpy().tolist()
+        gts += data['same'].cpu().numpy().flatten().tolist()
+    sames = np.array(gts)
+    ds = np.array(ds)
+    sorted_inds = np.argsort(ds)
+    ds_sorted = ds[sorted_inds]
+    sames_sorted = sames[sorted_inds]
+    TPs = np.cumsum(sames_sorted)
+    FPs = np.cumsum(1 - sames_sorted)
+    FNs = np.sum(sames_sorted) - TPs
+    precs = TPs / (TPs + FPs)
+    recs = TPs / (TPs + FNs)
+    score = voc_ap(recs, precs)
+    return (score, dict(ds=ds, sames=sames))
+############################################################
+#                    networks_basic.py                     #
+############################################################
+import torch.nn as nn
+from torch.autograd import Variable
+import numpy as np
+def spatial_average(in_tens, keepdim=True):
+    return in_tens.mean([2, 3], keepdim=keepdim)
+def upsample(in_tens, out_H=64):  # assumes scale factor is same for H and W
+    in_H = in_tens.shape[2]
+    scale_factor = 1. * out_H / in_H
+    return nn.Upsample(scale_factor=scale_factor, mode='bilinear', align_corners=False)(in_tens)
+# Learned perceptual metric
+class PNetLin(nn.Module):
+    def __init__(self, pnet_type='vgg', pnet_rand=False, pnet_tune=False, use_dropout=True, spatial=False,
+                 version='0.1', lpips=True):
+        super(PNetLin, self).__init__()
+        self.pnet_type = pnet_type
+        self.pnet_tune = pnet_tune
+        self.pnet_rand = pnet_rand
+        self.spatial = spatial
+        self.lpips = lpips
+        self.version = version
+        self.scaling_layer = ScalingLayer()
+        if (self.pnet_type in ['vgg', 'vgg16']):
+            net_type = vgg16
+            self.chns = [64, 128, 256, 512, 512]
+        elif (self.pnet_type == 'alex'):
+            net_type = alexnet
+            self.chns = [64, 192, 384, 256, 256]
+        elif (self.pnet_type == 'squeeze'):
+            net_type = squeezenet
+            self.chns = [64, 128, 256, 384, 384, 512, 512]
+        self.L = len(self.chns)
+        self.net = net_type(pretrained=not self.pnet_rand, requires_grad=self.pnet_tune)
+        if (lpips):
+            self.lin0 = NetLinLayer(self.chns[0], use_dropout=use_dropout)
+            self.lin1 = NetLinLayer(self.chns[1], use_dropout=use_dropout)
+            self.lin2 = NetLinLayer(self.chns[2], use_dropout=use_dropout)
+            self.lin3 = NetLinLayer(self.chns[3], use_dropout=use_dropout)
+            self.lin4 = NetLinLayer(self.chns[4], use_dropout=use_dropout)
+            self.lins = [self.lin0, self.lin1, self.lin2, self.lin3, self.lin4]
+            if (self.pnet_type == 'squeeze'):  # 7 layers for squeezenet
+                self.lin5 = NetLinLayer(self.chns[5], use_dropout=use_dropout)
+                self.lin6 = NetLinLayer(self.chns[6], use_dropout=use_dropout)
+                self.lins += [self.lin5, self.lin6]
+    def forward(self, in0, in1, retPerLayer=False):
+        # v0.0 - original release had a bug, where input was not scaled
+        in0_input, in1_input = (self.scaling_layer(in0), self.scaling_layer(in1)) if self.version == '0.1' else (
+        in0, in1)
+        outs0, outs1 = self.net(in0_input), self.net(in1_input)
+        feats0, feats1, diffs = {}, {}, {}
+        for kk in range(self.L):
+            feats0[kk], feats1[kk] = normalize_tensor(outs0[kk]), normalize_tensor(outs1[kk])
+            diffs[kk] = (feats0[kk] - feats1[kk]) ** 2
+        if (self.lpips):
+            if (self.spatial):
+                res = [upsample(self.lins[kk].model(diffs[kk]), out_H=in0.shape[2]) for kk in range(self.L)]
+            else:
+                res = [spatial_average(self.lins[kk].model(diffs[kk]), keepdim=True) for kk in range(self.L)]
+        else:
+            if (self.spatial):
+                res = [upsample(diffs[kk].sum(dim=1, keepdim=True), out_H=in0.shape[2]) for kk in range(self.L)]
+            else:
+                res = [spatial_average(diffs[kk].sum(dim=1, keepdim=True), keepdim=True) for kk in range(self.L)]
+        val = res[0]
+        for l in range(1, self.L):
+            val += res[l]
+        if (retPerLayer):
+            return (val, res)
+        else:
+            return val
+class ScalingLayer(nn.Module):
+    def __init__(self):
+        super(ScalingLayer, self).__init__()
+        self.register_buffer('shift', torch.Tensor([-.030, -.088, -.188])[None, :, None, None])
+        self.register_buffer('scale', torch.Tensor([.458, .448, .450])[None, :, None, None])
+    def forward(self, inp):
+        return (inp - self.shift) / self.scale
+class NetLinLayer(nn.Module):
+    ''' A single linear layer which does a 1x1 conv '''
+    def __init__(self, chn_in, chn_out=1, use_dropout=False):
+        super(NetLinLayer, self).__init__()
+        layers = [nn.Dropout(), ] if (use_dropout) else []
+        layers += [nn.Conv2d(chn_in, chn_out, 1, stride=1, padding=0, bias=False), ]
+        self.model = nn.Sequential(*layers)
+class Dist2LogitLayer(nn.Module):
+    ''' takes 2 distances, puts through fc layers, spits out value between [0,1] (if use_sigmoid is True) '''
+    def __init__(self, chn_mid=32, use_sigmoid=True):
+        super(Dist2LogitLayer, self).__init__()
+        layers = [nn.Conv2d(5, chn_mid, 1, stride=1, padding=0, bias=True), ]
+        layers += [nn.LeakyReLU(0.2, True), ]
+        layers += [nn.Conv2d(chn_mid, chn_mid, 1, stride=1, padding=0, bias=True), ]
+        layers += [nn.LeakyReLU(0.2, True), ]
+        layers += [nn.Conv2d(chn_mid, 1, 1, stride=1, padding=0, bias=True), ]
+        if (use_sigmoid):
+            layers += [nn.Sigmoid(), ]
+        self.model = nn.Sequential(*layers)
+    def forward(self, d0, d1, eps=0.1):
+        return self.model(torch.cat((d0, d1, d0 - d1, d0 / (d1 + eps), d1 / (d0 + eps)), dim=1))
+class BCERankingLoss(nn.Module):
+    def __init__(self, chn_mid=32):
+        super(BCERankingLoss, self).__init__()
+        self.net = Dist2LogitLayer(chn_mid=chn_mid)
+        # self.parameters = list(self.net.parameters())
+        self.loss = torch.nn.BCELoss()
+    def forward(self, d0, d1, judge):
+        per = (judge + 1.) / 2.
+        self.logit = self.net(d0, d1)
+        return self.loss(self.logit, per)
+# L2, DSSIM metrics
+class FakeNet(nn.Module):
+    def __init__(self, use_gpu=True, colorspace='Lab'):
+        super(FakeNet, self).__init__()
+        self.use_gpu = use_gpu
+        self.colorspace = colorspace
+class L2(FakeNet):
+    def forward(self, in0, in1, retPerLayer=None):
+        assert (in0.size()[0] == 1)  # currently only supports batchSize 1
+        if (self.colorspace == 'RGB'):
+            (N, C, X, Y) = in0.size()
+            value = torch.mean(torch.mean(torch.mean((in0 - in1) ** 2, dim=1).view(N, 1, X, Y), dim=2).view(N, 1, 1, Y),
+                               dim=3).view(N)
+            return value
+        elif (self.colorspace == 'Lab'):
+            value = l2(tensor2np(tensor2tensorlab(in0.data, to_norm=False)),
+                       tensor2np(tensor2tensorlab(in1.data, to_norm=False)), range=100.).astype('float')
+            ret_var = Variable(torch.Tensor((value,)))
+            # if (self.use_gpu):
+            #     ret_var = ret_var.cuda()
+            return ret_var
+class DSSIM(FakeNet):
+    def forward(self, in0, in1, retPerLayer=None):
+        assert (in0.size()[0] == 1)  # currently only supports batchSize 1
+        if (self.colorspace == 'RGB'):
+            value = dssim(1. * tensor2im(in0.data), 1. * tensor2im(in1.data), range=255.).astype('float')
+        elif (self.colorspace == 'Lab'):
+            value = dssim(tensor2np(tensor2tensorlab(in0.data, to_norm=False)),
+                          tensor2np(tensor2tensorlab(in1.data, to_norm=False)), range=100.).astype('float')
+        ret_var = Variable(torch.Tensor((value,)))
+        # if (self.use_gpu):
+        #     ret_var = ret_var.cuda()
+        return ret_var
+def print_network(net):
+    num_params = 0
+    for param in net.parameters():
+        num_params += param.numel()
+    print('Network', net)
+    print('Total number of parameters: %d' % num_params)
+############################################################
+#                 pretrained_networks.py                   #
+############################################################
+from collections import namedtuple
+import torch
+from torchvision import models as tv
+class squeezenet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(squeezenet, self).__init__()
+        pretrained_features = tv.squeezenet1_1(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.slice6 = torch.nn.Sequential()
+        self.slice7 = torch.nn.Sequential()
+        self.N_slices = 7
+        for x in range(2):
+            self.slice1.add_module(str(x), pretrained_features[x])
+        for x in range(2, 5):
+            self.slice2.add_module(str(x), pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), pretrained_features[x])
+        for x in range(10, 11):
+            self.slice5.add_module(str(x), pretrained_features[x])
+        for x in range(11, 12):
+            self.slice6.add_module(str(x), pretrained_features[x])
+        for x in range(12, 13):
+            self.slice7.add_module(str(x), pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        h = self.slice6(h)
+        h_relu6 = h
+        h = self.slice7(h)
+        h_relu7 = h
+        vgg_outputs = namedtuple("SqueezeOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5', 'relu6', 'relu7'])
+        out = vgg_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5, h_relu6, h_relu7)
+        return out
+class alexnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(alexnet, self).__init__()
+        alexnet_pretrained_features = tv.alexnet(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(2):
+            self.slice1.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(2, 5):
+            self.slice2.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(5, 8):
+            self.slice3.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(8, 10):
+            self.slice4.add_module(str(x), alexnet_pretrained_features[x])
+        for x in range(10, 12):
+            self.slice5.add_module(str(x), alexnet_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1 = h
+        h = self.slice2(h)
+        h_relu2 = h
+        h = self.slice3(h)
+        h_relu3 = h
+        h = self.slice4(h)
+        h_relu4 = h
+        h = self.slice5(h)
+        h_relu5 = h
+        alexnet_outputs = namedtuple("AlexnetOutputs", ['relu1', 'relu2', 'relu3', 'relu4', 'relu5'])
+        out = alexnet_outputs(h_relu1, h_relu2, h_relu3, h_relu4, h_relu5)
+        return out
+class vgg16(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True):
+        super(vgg16, self).__init__()
+        vgg_pretrained_features = tv.vgg16(pretrained=pretrained).features
+        self.slice1 = torch.nn.Sequential()
+        self.slice2 = torch.nn.Sequential()
+        self.slice3 = torch.nn.Sequential()
+        self.slice4 = torch.nn.Sequential()
+        self.slice5 = torch.nn.Sequential()
+        self.N_slices = 5
+        for x in range(4):
+            self.slice1.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(4, 9):
+            self.slice2.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(9, 16):
+            self.slice3.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(16, 23):
+            self.slice4.add_module(str(x), vgg_pretrained_features[x])
+        for x in range(23, 30):
+            self.slice5.add_module(str(x), vgg_pretrained_features[x])
+        if not requires_grad:
+            for param in self.parameters():
+                param.requires_grad = False
+    def forward(self, X):
+        h = self.slice1(X)
+        h_relu1_2 = h
+        h = self.slice2(h)
+        h_relu2_2 = h
+        h = self.slice3(h)
+        h_relu3_3 = h
+        h = self.slice4(h)
+        h_relu4_3 = h
+        h = self.slice5(h)
+        h_relu5_3 = h
+        vgg_outputs = namedtuple("VggOutputs", ['relu1_2', 'relu2_2', 'relu3_3', 'relu4_3', 'relu5_3'])
+        out = vgg_outputs(h_relu1_2, h_relu2_2, h_relu3_3, h_relu4_3, h_relu5_3)
+        return out
+class resnet(torch.nn.Module):
+    def __init__(self, requires_grad=False, pretrained=True, num=18):
+        super(resnet, self).__init__()
+        if (num == 18):
+            self.net = tv.resnet18(pretrained=pretrained)
+        elif (num == 34):
+            self.net = tv.resnet34(pretrained=pretrained)
+        elif (num == 50):
+            self.net = tv.resnet50(pretrained=pretrained)
+        elif (num == 101):
+            self.net = tv.resnet101(pretrained=pretrained)
+        elif (num == 152):
+            self.net = tv.resnet152(pretrained=pretrained)
+        self.N_slices = 5
+        self.conv1 = self.net.conv1
+        self.bn1 = self.net.bn1
+        self.relu = self.net.relu
+        self.maxpool = self.net.maxpool
+        self.layer1 = self.net.layer1
+        self.layer2 = self.net.layer2
+        self.layer3 = self.net.layer3
+        self.layer4 = self.net.layer4
+    def forward(self, X):
+        h = self.conv1(X)
+        h = self.bn1(h)
+        h = self.relu(h)
+        h_relu1 = h
+        h = self.maxpool(h)
+        h = self.layer1(h)
+        h_conv2 = h
+        h = self.layer2(h)
+        h_conv3 = h
+        h = self.layer3(h)
+        h_conv4 = h
+        h = self.layer4(h)
+        h_conv5 = h
+        outputs = namedtuple("Outputs", ['relu1', 'conv2', 'conv3', 'conv4', 'conv5'])
+        out = outputs(h_relu1, h_conv2, h_conv3, h_conv4, h_conv5)
+        return out

saicinpainting/evaluation/losses/ssim.py ADDED Viewed

	@@ -0,0 +1,74 @@

+import numpy as np
+import torch
+import torch.nn.functional as F
+class SSIM(torch.nn.Module):
+    """SSIM. Modified from:
+    https://github.com/Po-Hsun-Su/pytorch-ssim/blob/master/pytorch_ssim/__init__.py
+    """
+    def __init__(self, window_size=11, size_average=True):
+        super().__init__()
+        self.window_size = window_size
+        self.size_average = size_average
+        self.channel = 1
+        self.register_buffer('window', self._create_window(window_size, self.channel))
+    def forward(self, img1, img2):
+        assert len(img1.shape) == 4
+        channel = img1.size()[1]
+        if channel == self.channel and self.window.data.type() == img1.data.type():
+            window = self.window
+        else:
+            window = self._create_window(self.window_size, channel)
+            # window = window.to(img1.get_device())
+            window = window.type_as(img1)
+            self.window = window
+            self.channel = channel
+        return self._ssim(img1, img2, window, self.window_size, channel, self.size_average)
+    def _gaussian(self, window_size, sigma):
+        gauss = torch.Tensor([
+            np.exp(-(x - (window_size // 2)) ** 2 / float(2 * sigma ** 2)) for x in range(window_size)
+        ])
+        return gauss / gauss.sum()
+    def _create_window(self, window_size, channel):
+        _1D_window = self._gaussian(window_size, 1.5).unsqueeze(1)
+        _2D_window = _1D_window.mm(_1D_window.t()).float().unsqueeze(0).unsqueeze(0)
+        return _2D_window.expand(channel, 1, window_size, window_size).contiguous()
+    def _ssim(self, img1, img2, window, window_size, channel, size_average=True):
+        mu1 = F.conv2d(img1, window, padding=(window_size // 2), groups=channel)
+        mu2 = F.conv2d(img2, window, padding=(window_size // 2), groups=channel)
+        mu1_sq = mu1.pow(2)
+        mu2_sq = mu2.pow(2)
+        mu1_mu2 = mu1 * mu2
+        sigma1_sq = F.conv2d(
+            img1 * img1, window, padding=(window_size // 2), groups=channel) - mu1_sq
+        sigma2_sq = F.conv2d(
+            img2 * img2, window, padding=(window_size // 2), groups=channel) - mu2_sq
+        sigma12 = F.conv2d(
+            img1 * img2, window, padding=(window_size // 2), groups=channel) - mu1_mu2
+        C1 = 0.01 ** 2
+        C2 = 0.03 ** 2
+        ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / \
+                   ((mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
+        if size_average:
+            return ssim_map.mean()
+        return ssim_map.mean(1).mean(1).mean(1)
+    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs):
+        return

saicinpainting/evaluation/masks/README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+# Current algorithm
+## Choice of mask objects
+For identification of the objects which are suitable for mask obtaining, panoptic segmentation model
+from [detectron2](https://github.com/facebookresearch/detectron2) trained on COCO. Categories of the detected instances
+belong either to "stuff" or "things" types. We consider that instances of objects should have category belong
+to "things". Besides, we set upper bound on area which is taken by the object &mdash; we consider that too big
+area indicates either of the instance being a background or a main object which should not be removed.
+## Choice of position for mask
+We consider that input image has size 2^n x 2^m. We downsample it using
+[COUNTLESS](https://github.com/william-silversmith/countless) algorithm so the width is equal to
+64 = 2^8 = 2^{downsample_levels}.
+### Augmentation
+There are several parameters for augmentation:
+- Scaling factor. We limit scaling to the case when a mask after scaling with pivot point in its center fits inside the
+ image completely.
+-
+### Shift
+## Select

saicinpainting/evaluation/masks/__init__.py ADDED Viewed

File without changes

saicinpainting/evaluation/masks/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (160 Bytes). View file

saicinpainting/evaluation/masks/__pycache__/mask.cpython-39.pyc ADDED Viewed

Binary file (13.8 kB). View file

saicinpainting/evaluation/masks/countless/.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ results

saicinpainting/evaluation/masks/countless/README.md ADDED Viewed

	@@ -0,0 +1,25 @@

+[![Build Status](https://travis-ci.org/william-silversmith/countless.svg?branch=master)](https://travis-ci.org/william-silversmith/countless)
+Python COUNTLESS Downsampling
+=============================
+To install:
+`pip install -r requirements.txt`
+To test:
+`python test.py`
+To benchmark countless2d:
+`python python/countless2d.py python/images/gray_segmentation.png`
+To benchmark countless3d:
+`python python/countless3d.py`
+Adjust N and the list of algorithms inside each script to modify the run parameters.
+Python3 is slightly faster than Python2.

saicinpainting/evaluation/masks/countless/__init__.py ADDED Viewed

File without changes

saicinpainting/evaluation/masks/countless/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (170 Bytes). View file

saicinpainting/evaluation/masks/countless/__pycache__/countless2d.cpython-39.pyc ADDED Viewed

Binary file (11.3 kB). View file

saicinpainting/evaluation/masks/countless/countless2d.py ADDED Viewed

	@@ -0,0 +1,529 @@

+from __future__ import print_function, division
+"""
+COUNTLESS performance test in Python.
+python countless2d.py ./images/NAMEOFIMAGE
+"""
+import six
+from six.moves import range
+from collections import defaultdict
+from functools import reduce
+import operator
+import io
+import os
+from PIL import Image
+import math
+import numpy as np
+import random
+import sys
+import time
+from tqdm import tqdm
+from scipy import ndimage
+def simplest_countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab = a * (a == b) # PICK(A,B)
+  ac = a * (a == c) # PICK(A,C)
+  bc = b * (b == c) # PICK(B,C)
+  a = ab | ac | bc # Bitwise OR, safe b/c non-matches are zeroed
+  return a + (a == 0) * d # AB || AC || BC || D
+def quick_countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab_ac = a * ((a == b) | (a == c)) # PICK(A,B) || PICK(A,C) w/ optimization
+  bc = b * (b == c) # PICK(B,C)
+  a = ab_ac | bc # (PICK(A,B) || PICK(A,C)) or PICK(B,C)
+  return a + (a == 0) * d # AB || AC || BC || D
+def quickest_countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab_ac = a * ((a == b) | (a == c)) # PICK(A,B) || PICK(A,C) w/ optimization
+  ab_ac |= b * (b == c) # PICK(B,C)
+  return ab_ac + (ab_ac == 0) * d # AB || AC || BC || D
+def quick_countless_xor(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab = a ^ (a ^ b) # a or b
+  ab += (ab != a) * ((ab ^ (ab ^ c)) - b) # b or c
+  ab += (ab == c) * ((ab ^ (ab ^ d)) - c) # c or d
+  return ab
+def stippled_countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm
+  that treats zero as "background" and inflates lone
+  pixels.
+  data is a 2D numpy array with even dimensions.
+  """
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab_ac = a * ((a == b) | (a == c)) # PICK(A,B) || PICK(A,C) w/ optimization
+  ab_ac |= b * (b == c) # PICK(B,C)
+  nonzero = a + (a == 0) * (b + (b == 0) * c)
+  return ab_ac + (ab_ac == 0) * (d + (d == 0) * nonzero) # AB || AC || BC || D
+def zero_corrected_countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  # allows us to prevent losing 1/2 a bit of information
+  # at the top end by using a bigger type. Without this 255 is handled incorrectly.
+  data, upgraded = upgrade_type(data)
+  # offset from zero, raw countless doesn't handle 0 correctly
+  # we'll remove the extra 1 at the end.
+  data += 1
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab = a * (a == b) # PICK(A,B)
+  ac = a * (a == c) # PICK(A,C)
+  bc = b * (b == c) # PICK(B,C)
+  a = ab | ac | bc # Bitwise OR, safe b/c non-matches are zeroed
+  result = a + (a == 0) * d - 1 # a or d - 1
+  if upgraded:
+    return downgrade_type(result)
+  # only need to reset data if we weren't upgraded
+  # b/c no copy was made in that case
+  data -= 1
+  return result
+def countless_extreme(data):
+  nonzeros = np.count_nonzero(data)
+  # print("nonzeros", nonzeros)
+  N = reduce(operator.mul, data.shape)
+  if nonzeros == N:
+    print("quick")
+    return quick_countless(data)
+  elif np.count_nonzero(data + 1) == N:
+    print("quick")
+    # print("upper", nonzeros)
+    return quick_countless(data)
+  else:
+    return countless(data)
+def countless(data):
+  """
+  Vectorized implementation of downsampling a 2D
+  image by 2 on each side using the COUNTLESS algorithm.
+  data is a 2D numpy array with even dimensions.
+  """
+  # allows us to prevent losing 1/2 a bit of information
+  # at the top end by using a bigger type. Without this 255 is handled incorrectly.
+  data, upgraded = upgrade_type(data)
+  # offset from zero, raw countless doesn't handle 0 correctly
+  # we'll remove the extra 1 at the end.
+  data += 1
+  sections = []
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  a, b, c, d = sections
+  ab_ac = a * ((a == b) | (a == c)) # PICK(A,B) || PICK(A,C) w/ optimization
+  ab_ac |= b * (b == c) # PICK(B,C)
+  result = ab_ac + (ab_ac == 0) * d - 1 # (matches or d) - 1
+  if upgraded:
+    return downgrade_type(result)
+  # only need to reset data if we weren't upgraded
+  # b/c no copy was made in that case
+  data -= 1
+  return result
+def upgrade_type(arr):
+  dtype = arr.dtype
+  if dtype == np.uint8:
+    return arr.astype(np.uint16), True
+  elif dtype == np.uint16:
+    return arr.astype(np.uint32), True
+  elif dtype == np.uint32:
+    return arr.astype(np.uint64), True
+  return arr, False
+def downgrade_type(arr):
+  dtype = arr.dtype
+  if dtype == np.uint64:
+    return arr.astype(np.uint32)
+  elif dtype == np.uint32:
+    return arr.astype(np.uint16)
+  elif dtype == np.uint16:
+    return arr.astype(np.uint8)
+  return arr
+def odd_to_even(image):
+  """
+  To facilitate 2x2 downsampling segmentation, change an odd sized image into an even sized one.
+  Works by mirroring the starting 1 pixel edge of the image on odd shaped sides.
+  e.g. turn a 3x3x5 image into a 4x4x5 (the x and y are what are getting downsampled)
+  For example: [ 3, 2, 4 ] => [ 3, 3, 2, 4 ] which is now easy to downsample.
+  """
+  shape = np.array(image.shape)
+  offset = (shape % 2)[:2] # x,y offset
+  # detect if we're dealing with an even
+  # image. if so it's fine, just return.
+  if not np.any(offset):
+    return image
+  oddshape = image.shape[:2] + offset
+  oddshape = np.append(oddshape, shape[2:])
+  oddshape = oddshape.astype(int)
+  newimg = np.empty(shape=oddshape, dtype=image.dtype)
+  ox,oy = offset
+  sx,sy = oddshape
+  newimg[0,0] = image[0,0] # corner
+  newimg[ox:sx,0] = image[:,0] # x axis line
+  newimg[0,oy:sy] = image[0,:] # y axis line
+  return newimg
+def counting(array):
+    factor = (2, 2, 1)
+    shape = array.shape
+    while len(shape) < 4:
+      array = np.expand_dims(array, axis=-1)
+      shape = array.shape
+    output_shape = tuple(int(math.ceil(s / f)) for s, f in zip(shape, factor))
+    output = np.zeros(output_shape, dtype=array.dtype)
+    for chan in range(0, shape[3]):
+      for z in range(0, shape[2]):
+        for x in range(0, shape[0], 2):
+          for y in range(0, shape[1], 2):
+            block = array[ x:x+2, y:y+2, z, chan ] # 2x2 block
+            hashtable = defaultdict(int)
+            for subx, suby in np.ndindex(block.shape[0], block.shape[1]):
+              hashtable[block[subx, suby]] += 1
+            best = (0, 0)
+            for segid, val in six.iteritems(hashtable):
+              if best[1] < val:
+                best = (segid, val)
+            output[ x // 2, y // 2, chan ] = best[0]
+    return output
+def ndzoom(array):
+    if len(array.shape) == 3:
+      ratio = ( 1 / 2.0, 1 / 2.0, 1.0 )
+    else:
+      ratio = ( 1 / 2.0, 1 / 2.0)
+    return ndimage.interpolation.zoom(array, ratio, order=1)
+def countless_if(array):
+    factor = (2, 2, 1)
+    shape = array.shape
+    if len(shape) < 3:
+      array = array[ :,:, np.newaxis ]
+      shape = array.shape
+    output_shape = tuple(int(math.ceil(s / f)) for s, f in zip(shape, factor))
+    output = np.zeros(output_shape, dtype=array.dtype)
+    for chan in range(0, shape[2]):
+      for x in range(0, shape[0], 2):
+        for y in range(0, shape[1], 2):
+          block = array[ x:x+2, y:y+2, chan ] # 2x2 block
+          if block[0,0] == block[1,0]:
+            pick = block[0,0]
+          elif block[0,0] == block[0,1]:
+            pick = block[0,0]
+          elif block[1,0] == block[0,1]:
+            pick = block[1,0]
+          else:
+            pick = block[1,1]
+          output[ x // 2, y // 2, chan ] = pick
+    return np.squeeze(output)
+def downsample_with_averaging(array):
+  """
+  Downsample x by factor using averaging.
+  @return: The downsampled array, of the same type as x.
+  """
+  if len(array.shape) == 3:
+    factor = (2,2,1)
+  else:
+    factor = (2,2)
+  if np.array_equal(factor[:3], np.array([1,1,1])):
+    return array
+  output_shape = tuple(int(math.ceil(s / f)) for s, f in zip(array.shape, factor))
+  temp = np.zeros(output_shape, float)
+  counts = np.zeros(output_shape, np.int)
+  for offset in np.ndindex(factor):
+      part = array[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+      indexing_expr = tuple(np.s_[:s] for s in part.shape)
+      temp[indexing_expr] += part
+      counts[indexing_expr] += 1
+  return np.cast[array.dtype](temp / counts)
+def downsample_with_max_pooling(array):
+  factor = (2,2)
+  if np.all(np.array(factor, int) == 1):
+      return array
+  sections = []
+  for offset in np.ndindex(factor):
+    part = array[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  output = sections[0].copy()
+  for section in sections[1:]:
+    np.maximum(output, section, output)
+  return output
+def striding(array):
+  """Downsample x by factor using striding.
+  @return: The downsampled array, of the same type as x.
+  """
+  factor = (2,2)
+  if np.all(np.array(factor, int) == 1):
+    return array
+  return array[tuple(np.s_[::f] for f in factor)]
+def benchmark():
+  filename = sys.argv[1]
+  img = Image.open(filename)
+  data = np.array(img.getdata(), dtype=np.uint8)
+  if len(data.shape) == 1:
+    n_channels = 1
+    reshape = (img.height, img.width)
+  else:
+    n_channels = min(data.shape[1], 3)
+    data = data[:, :n_channels]
+    reshape = (img.height, img.width, n_channels)
+  data = data.reshape(reshape).astype(np.uint8)
+  methods = [
+    simplest_countless,
+    quick_countless,
+    quick_countless_xor,
+    quickest_countless,
+    stippled_countless,
+    zero_corrected_countless,
+    countless,
+    downsample_with_averaging,
+    downsample_with_max_pooling,
+    ndzoom,
+    striding,
+    # countless_if,
+    # counting,
+  ]
+  formats = {
+    1: 'L',
+    3: 'RGB',
+    4: 'RGBA'
+  }
+  if not os.path.exists('./results'):
+    os.mkdir('./results')
+  N = 500
+  img_size = float(img.width * img.height) / 1024.0 / 1024.0
+  print("N = %d, %dx%d (%.2f MPx) %d chan, %s" % (N, img.width, img.height, img_size, n_channels, filename))
+  print("Algorithm\tMPx/sec\tMB/sec\tSec")
+  for fn in methods:
+    print(fn.__name__, end='')
+    sys.stdout.flush()
+    start = time.time()
+    # tqdm is here to show you what's going on the first time you run it.
+    # Feel free to remove it to get slightly more accurate timing results.
+    for _ in tqdm(range(N), desc=fn.__name__, disable=True):
+      result = fn(data)
+    end = time.time()
+    print("\r", end='')
+    total_time = (end - start)
+    mpx = N * img_size / total_time
+    mbytes = N * img_size * n_channels / total_time
+    # Output in tab separated format to enable copy-paste into excel/numbers
+    print("%s\t%.3f\t%.3f\t%.2f" % (fn.__name__, mpx, mbytes, total_time))
+    outimg = Image.fromarray(np.squeeze(result), formats[n_channels])
+    outimg.save('./results/{}.png'.format(fn.__name__, "PNG"))
+if __name__ == '__main__':
+  benchmark()
+# Example results:
+# N = 5, 1024x1024 (1.00 MPx) 1 chan, images/gray_segmentation.png
+# Function                        MPx/sec   MB/sec     Sec
+# simplest_countless              752.855   752.855    0.01
+# quick_countless                 920.328   920.328    0.01
+# zero_corrected_countless        534.143   534.143    0.01
+# countless                       644.247   644.247    0.01
+# downsample_with_averaging       372.575   372.575    0.01
+# downsample_with_max_pooling     974.060   974.060    0.01
+# ndzoom                          137.517   137.517    0.04
+# striding                      38550.588 38550.588    0.00
+# countless_if                      4.377     4.377    1.14
+# counting                          0.117     0.117   42.85
+# Run without non-numpy implementations:
+# N = 2000, 1024x1024 (1.00 MPx) 1 chan, images/gray_segmentation.png
+# Algorithm                       MPx/sec   MB/sec     Sec
+# simplest_countless              800.522   800.522    2.50
+# quick_countless                 945.420   945.420    2.12
+# quickest_countless              947.256   947.256    2.11
+# stippled_countless              544.049   544.049    3.68
+# zero_corrected_countless        575.310   575.310    3.48
+# countless                       646.684   646.684    3.09
+# downsample_with_averaging       385.132   385.132    5.19
+# downsample_with_max_poolin      988.361   988.361    2.02
+# ndzoom                          163.104   163.104   12.26
+# striding                      81589.340 81589.340    0.02

saicinpainting/evaluation/masks/countless/countless3d.py ADDED Viewed

	@@ -0,0 +1,356 @@

+from six.moves import range
+from PIL import Image
+import numpy as np
+import io
+import time
+import math
+import random
+import sys
+from collections import defaultdict
+from copy import deepcopy
+from itertools import combinations
+from functools import reduce
+from tqdm import tqdm
+from memory_profiler import profile
+def countless5(a,b,c,d,e):
+  """First stage of generalizing from countless2d.
+  You have five slots: A, B, C, D, E
+  You can decide if something is the winner by first checking for
+  matches of three, then matches of two, then picking just one if
+  the other two tries fail. In countless2d, you just check for matches
+  of two and then pick one of them otherwise.
+  Unfortunately, you need to check ABC, ABD, ABE, BCD, BDE, & CDE.
+  Then you need to check AB, AC, AD, BC, BD
+  We skip checking E because if none of these match, we pick E. We can
+  skip checking AE, BE, CE, DE since if any of those match, E is our boy
+  so it's redundant.
+  So countless grows cominatorially in complexity.
+  """
+  sections = [ a,b,c,d,e ]
+  p2 = lambda q,r: q * (q == r) # q if p == q else 0
+  p3 = lambda q,r,s: q * ( (q == r) & (r == s) ) # q if q == r == s else 0
+  lor = lambda x,y: x + (x == 0) * y
+  results3 = ( p3(x,y,z) for x,y,z in combinations(sections, 3) )
+  results3 = reduce(lor, results3)
+  results2 = ( p2(x,y) for x,y in combinations(sections[:-1], 2) )
+  results2 = reduce(lor, results2)
+  return reduce(lor, (results3, results2, e))
+def countless8(a,b,c,d,e,f,g,h):
+  """Extend countless5 to countless8. Same deal, except we also
+    need to check for matches of length 4."""
+  sections = [ a, b, c, d, e, f, g, h ]
+  p2 = lambda q,r: q * (q == r)
+  p3 = lambda q,r,s: q * ( (q == r) & (r == s) )
+  p4 = lambda p,q,r,s: p * ( (p == q) & (q == r) & (r == s) )
+  lor = lambda x,y: x + (x == 0) * y
+  results4 = ( p4(x,y,z,w) for x,y,z,w in combinations(sections, 4) )
+  results4 = reduce(lor, results4)
+  results3 = ( p3(x,y,z) for x,y,z in combinations(sections, 3) )
+  results3 = reduce(lor, results3)
+  # We can always use our shortcut of omitting the last element
+  # for N choose 2
+  results2 = ( p2(x,y) for x,y in combinations(sections[:-1], 2) )
+  results2 = reduce(lor, results2)
+  return reduce(lor, [ results4, results3, results2, h ])
+def dynamic_countless3d(data):
+  """countless8 + dynamic programming. ~2x faster"""
+  sections = []
+  # shift zeros up one so they don't interfere with bitwise operators
+  # we'll shift down at the end
+  data += 1
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  pick = lambda a,b: a * (a == b)
+  lor = lambda x,y: x + (x == 0) * y
+  subproblems2 = {}
+  results2 = None
+  for x,y in combinations(range(7), 2):
+    res = pick(sections[x], sections[y])
+    subproblems2[(x,y)] = res
+    if results2 is not None:
+      results2 += (results2 == 0) * res
+    else:
+      results2 = res
+  subproblems3 = {}
+  results3 = None
+  for x,y,z in combinations(range(8), 3):
+    res = pick(subproblems2[(x,y)], sections[z])
+    if z != 7:
+      subproblems3[(x,y,z)] = res
+    if results3 is not None:
+      results3 += (results3 == 0) * res
+    else:
+      results3 = res
+  results3 = reduce(lor, (results3, results2, sections[-1]))
+  # free memory
+  results2 = None
+  subproblems2 = None
+  res = None
+  results4 = ( pick(subproblems3[(x,y,z)], sections[w]) for x,y,z,w in combinations(range(8), 4) )
+  results4 = reduce(lor, results4)
+  subproblems3 = None # free memory
+  final_result = lor(results4, results3) - 1
+  data -= 1
+  return final_result
+def countless3d(data):
+  """Now write countless8 in such a way that it could be used
+  to process an image."""
+  sections = []
+  # shift zeros up one so they don't interfere with bitwise operators
+  # we'll shift down at the end
+  data += 1
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  factor = (2,2,2)
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  p2 = lambda q,r: q * (q == r)
+  p3 = lambda q,r,s: q * ( (q == r) & (r == s) )
+  p4 = lambda p,q,r,s: p * ( (p == q) & (q == r) & (r == s) )
+  lor = lambda x,y: x + (x == 0) * y
+  results4 = ( p4(x,y,z,w) for x,y,z,w in combinations(sections, 4)  )
+  results4 = reduce(lor, results4)
+  results3 = ( p3(x,y,z) for x,y,z in combinations(sections, 3)  )
+  results3 = reduce(lor, results3)
+  results2 = ( p2(x,y) for x,y in combinations(sections[:-1], 2)  )
+  results2 = reduce(lor, results2)
+  final_result = reduce(lor, (results4, results3, results2, sections[-1])) - 1
+  data -= 1
+  return final_result
+def countless_generalized(data, factor):
+  assert len(data.shape) == len(factor)
+  sections = []
+  mode_of = reduce(lambda x,y: x * y, factor)
+  majority = int(math.ceil(float(mode_of) / 2))
+  data += 1
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  def pick(elements):
+    eq = ( elements[i] == elements[i+1] for i in range(len(elements) - 1) )
+    anded = reduce(lambda p,q: p & q, eq)
+    return elements[0] * anded
+  def logical_or(x,y):
+    return x + (x == 0) * y
+  result = ( pick(combo) for combo in combinations(sections, majority) )
+  result = reduce(logical_or, result)
+  for i in range(majority - 1, 3-1, -1): # 3-1 b/c of exclusive bounds
+    partial_result = ( pick(combo) for combo in combinations(sections, i) )
+    partial_result = reduce(logical_or, partial_result)
+    result = logical_or(result, partial_result)
+  partial_result = ( pick(combo) for combo in combinations(sections[:-1], 2) )
+  partial_result = reduce(logical_or, partial_result)
+  result = logical_or(result, partial_result)
+  result = logical_or(result, sections[-1]) - 1
+  data -= 1
+  return result
+def dynamic_countless_generalized(data, factor):
+  assert len(data.shape) == len(factor)
+  sections = []
+  mode_of = reduce(lambda x,y: x * y, factor)
+  majority = int(math.ceil(float(mode_of) / 2))
+  data += 1 # offset from zero
+  # This loop splits the 2D array apart into four arrays that are
+  # all the result of striding by 2 and offset by (0,0), (0,1), (1,0),
+  # and (1,1) representing the A, B, C, and D positions from Figure 1.
+  for offset in np.ndindex(factor):
+    part = data[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  pick = lambda a,b: a * (a == b)
+  lor = lambda x,y: x + (x == 0) * y # logical or
+  subproblems = [ {}, {} ]
+  results2 = None
+  for x,y in combinations(range(len(sections) - 1), 2):
+    res = pick(sections[x], sections[y])
+    subproblems[0][(x,y)] = res
+    if results2 is not None:
+      results2 = lor(results2, res)
+    else:
+      results2 = res
+  results = [ results2 ]
+  for r in range(3, majority+1):
+    r_results = None
+    for combo in combinations(range(len(sections)), r):
+      res = pick(subproblems[0][combo[:-1]], sections[combo[-1]])
+      if combo[-1] != len(sections) - 1:
+        subproblems[1][combo] = res
+      if r_results is not None:
+        r_results = lor(r_results, res)
+      else:
+        r_results = res
+    results.append(r_results)
+    subproblems[0] = subproblems[1]
+    subproblems[1] = {}
+  results.reverse()
+  final_result = lor(reduce(lor, results), sections[-1]) - 1
+  data -= 1
+  return final_result
+def downsample_with_averaging(array):
+  """
+  Downsample x by factor using averaging.
+  @return: The downsampled array, of the same type as x.
+  """
+  factor = (2,2,2)
+  if np.array_equal(factor[:3], np.array([1,1,1])):
+    return array
+  output_shape = tuple(int(math.ceil(s / f)) for s, f in zip(array.shape, factor))
+  temp = np.zeros(output_shape, float)
+  counts = np.zeros(output_shape, np.int)
+  for offset in np.ndindex(factor):
+      part = array[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+      indexing_expr = tuple(np.s_[:s] for s in part.shape)
+      temp[indexing_expr] += part
+      counts[indexing_expr] += 1
+  return np.cast[array.dtype](temp / counts)
+def downsample_with_max_pooling(array):
+  factor = (2,2,2)
+  sections = []
+  for offset in np.ndindex(factor):
+    part = array[tuple(np.s_[o::f] for o, f in zip(offset, factor))]
+    sections.append(part)
+  output = sections[0].copy()
+  for section in sections[1:]:
+    np.maximum(output, section, output)
+  return output
+def striding(array):
+  """Downsample x by factor using striding.
+  @return: The downsampled array, of the same type as x.
+  """
+  factor = (2,2,2)
+  if np.all(np.array(factor, int) == 1):
+    return array
+  return array[tuple(np.s_[::f] for f in factor)]
+def benchmark():
+  def countless3d_generalized(img):
+    return countless_generalized(img, (2,8,1))
+  def countless3d_dynamic_generalized(img):
+    return dynamic_countless_generalized(img, (8,8,1))
+  methods = [
+    # countless3d,
+    # dynamic_countless3d,
+    countless3d_generalized,
+    # countless3d_dynamic_generalized,
+    # striding,
+    # downsample_with_averaging,
+    # downsample_with_max_pooling
+  ]
+  data = np.zeros(shape=(16**2, 16**2, 16**2), dtype=np.uint8) + 1
+  N = 5
+  print('Algorithm\tMPx\tMB/sec\tSec\tN=%d' % N)
+  for fn in methods:
+    start = time.time()
+    for _ in range(N):
+      result = fn(data)
+    end = time.time()
+    total_time = (end - start)
+    mpx = N * float(data.shape[0] * data.shape[1] * data.shape[2]) / total_time / 1024.0 / 1024.0
+    mbytes = mpx * np.dtype(data.dtype).itemsize
+    # Output in tab separated format to enable copy-paste into excel/numbers
+    print("%s\t%.3f\t%.3f\t%.2f" % (fn.__name__, mpx, mbytes, total_time))
+if __name__ == '__main__':
+  benchmark()
+# Algorithm MPx MB/sec  Sec N=5
+# countless3d 10.564  10.564  60.58
+# dynamic_countless3d 22.717  22.717  28.17
+# countless3d_generalized 9.702 9.702 65.96
+# countless3d_dynamic_generalized 22.720  22.720  28.17
+# striding  253360.506  253360.506  0.00
+# downsample_with_averaging 224.098 224.098 2.86
+# downsample_with_max_pooling 690.474 690.474 0.93

saicinpainting/evaluation/masks/countless/requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+Pillow>=6.2.0
+numpy>=1.16
+scipy
+tqdm
+memory_profiler
+six
+pytest

saicinpainting/evaluation/masks/countless/test.py ADDED Viewed

	@@ -0,0 +1,195 @@

+from copy import deepcopy
+import numpy as np
+import countless2d
+import countless3d
+def test_countless2d():
+  def test_all_cases(fn, test_zero):
+    case1 = np.array([ [ 1, 2 ], [ 3, 4 ] ]).reshape((2,2,1,1)) # all different
+    case2 = np.array([ [ 1, 1 ], [ 2, 3 ] ]).reshape((2,2,1,1)) # two are same
+    case1z = np.array([ [ 0, 1 ], [ 2, 3 ] ]).reshape((2,2,1,1)) # all different
+    case2z = np.array([ [ 0, 0 ], [ 2, 3 ] ]).reshape((2,2,1,1)) # two are same
+    case3 = np.array([ [ 1, 1 ], [ 2, 2 ] ]).reshape((2,2,1,1)) # two groups are same
+    case4 = np.array([ [ 1, 2 ], [ 2, 2 ] ]).reshape((2,2,1,1)) # 3 are the same
+    case5 = np.array([ [ 5, 5 ], [ 5, 5 ] ]).reshape((2,2,1,1)) # all are the same
+    is_255_handled = np.array([ [ 255, 255 ], [ 1, 2 ] ], dtype=np.uint8).reshape((2,2,1,1))
+    test = lambda case: fn(case)
+    if test_zero:
+      assert test(case1z) == [[[[3]]]] # d
+      assert test(case2z) == [[[[0]]]] # a==b
+    else:
+      assert test(case1) == [[[[4]]]] # d
+      assert test(case2) == [[[[1]]]] # a==b
+    assert test(case3) == [[[[1]]]] # a==b
+    assert test(case4) == [[[[2]]]] # b==c
+    assert test(case5) == [[[[5]]]] # a==b
+    assert test(is_255_handled) == [[[[255]]]]
+    assert fn(case1).dtype == case1.dtype
+  test_all_cases(countless2d.simplest_countless, False)
+  test_all_cases(countless2d.quick_countless, False)
+  test_all_cases(countless2d.quickest_countless, False)
+  test_all_cases(countless2d.stippled_countless, False)
+  methods = [
+    countless2d.zero_corrected_countless,
+    countless2d.countless,
+    countless2d.countless_if,
+    # countless2d.counting, # counting doesn't respect order so harder to write a test
+  ]
+  for fn in methods:
+    print(fn.__name__)
+    test_all_cases(fn, True)
+def test_stippled_countless2d():
+  a = np.array([ [ 1, 2 ], [ 3, 4 ] ]).reshape((2,2,1,1))
+  b = np.array([ [ 0, 2 ], [ 3, 4 ] ]).reshape((2,2,1,1))
+  c = np.array([ [ 1, 0 ], [ 3, 4 ] ]).reshape((2,2,1,1))
+  d = np.array([ [ 1, 2 ], [ 0, 4 ] ]).reshape((2,2,1,1))
+  e = np.array([ [ 1, 2 ], [ 3, 0 ] ]).reshape((2,2,1,1))
+  f = np.array([ [ 0, 0 ], [ 3, 4 ] ]).reshape((2,2,1,1))
+  g = np.array([ [ 0, 2 ], [ 0, 4 ] ]).reshape((2,2,1,1))
+  h = np.array([ [ 0, 2 ], [ 3, 0 ] ]).reshape((2,2,1,1))
+  i = np.array([ [ 1, 0 ], [ 0, 4 ] ]).reshape((2,2,1,1))
+  j = np.array([ [ 1, 2 ], [ 0, 0 ] ]).reshape((2,2,1,1))
+  k = np.array([ [ 1, 0 ], [ 3, 0 ] ]).reshape((2,2,1,1))
+  l = np.array([ [ 1, 0 ], [ 0, 0 ] ]).reshape((2,2,1,1))
+  m = np.array([ [ 0, 2 ], [ 0, 0 ] ]).reshape((2,2,1,1))
+  n = np.array([ [ 0, 0 ], [ 3, 0 ] ]).reshape((2,2,1,1))
+  o = np.array([ [ 0, 0 ], [ 0, 4 ] ]).reshape((2,2,1,1))
+  z = np.array([ [ 0, 0 ], [ 0, 0 ] ]).reshape((2,2,1,1))
+  test = countless2d.stippled_countless
+  # Note: We only tested non-matching cases above,
+  # cases f,g,h,i,j,k prove their duals work as well
+  # b/c if two pixels are black, either one can be chosen
+  # if they are different or the same.
+  assert test(a) == [[[[4]]]]
+  assert test(b) == [[[[4]]]]
+  assert test(c) == [[[[4]]]]
+  assert test(d) == [[[[4]]]]
+  assert test(e) == [[[[1]]]]
+  assert test(f) == [[[[4]]]]
+  assert test(g) == [[[[4]]]]
+  assert test(h) == [[[[2]]]]
+  assert test(i) == [[[[4]]]]
+  assert test(j) == [[[[1]]]]
+  assert test(k) == [[[[1]]]]
+  assert test(l) == [[[[1]]]]
+  assert test(m) == [[[[2]]]]
+  assert test(n) == [[[[3]]]]
+  assert test(o) == [[[[4]]]]
+  assert test(z) == [[[[0]]]]
+  bc = np.array([ [ 0, 2 ], [ 2, 4 ] ]).reshape((2,2,1,1))
+  bd = np.array([ [ 0, 2 ], [ 3, 2 ] ]).reshape((2,2,1,1))
+  cd = np.array([ [ 0, 2 ], [ 3, 3 ] ]).reshape((2,2,1,1))
+  assert test(bc) == [[[[2]]]]
+  assert test(bd) == [[[[2]]]]
+  assert test(cd) == [[[[3]]]]
+  ab = np.array([ [ 1, 1 ], [ 0, 4 ] ]).reshape((2,2,1,1))
+  ac = np.array([ [ 1, 2 ], [ 1, 0 ] ]).reshape((2,2,1,1))
+  ad = np.array([ [ 1, 0 ], [ 3, 1 ] ]).reshape((2,2,1,1))
+  assert test(ab) == [[[[1]]]]
+  assert test(ac) == [[[[1]]]]
+  assert test(ad) == [[[[1]]]]
+def test_countless3d():
+  def test_all_cases(fn):
+    alldifferent = [
+      [
+        [1,2],
+        [3,4],
+      ],
+      [
+        [5,6],
+        [7,8]
+      ]
+    ]
+    allsame = [
+      [
+        [1,1],
+        [1,1],
+      ],
+      [
+        [1,1],
+        [1,1]
+      ]
+    ]
+    assert fn(np.array(alldifferent)) == [[[8]]]
+    assert fn(np.array(allsame)) == [[[1]]]
+    twosame = deepcopy(alldifferent)
+    twosame[1][1][0] = 2
+    assert fn(np.array(twosame)) == [[[2]]]
+    threemixed = [
+      [
+        [3,3],
+        [1,2],
+      ],
+      [
+        [2,4],
+        [4,3]
+      ]
+    ]
+    assert fn(np.array(threemixed)) == [[[3]]]
+    foursame = [
+      [
+        [4,4],
+        [1,2],
+      ],
+      [
+        [2,4],
+        [4,3]
+      ]
+    ]
+    assert fn(np.array(foursame)) == [[[4]]]
+    fivesame = [
+      [
+        [5,4],
+        [5,5],
+      ],
+      [
+        [2,4],
+        [5,5]
+      ]
+    ]
+    assert fn(np.array(fivesame)) == [[[5]]]
+  def countless3d_generalized(img):
+    return countless3d.countless_generalized(img, (2,2,2))
+  def countless3d_dynamic_generalized(img):
+    return countless3d.dynamic_countless_generalized(img, (2,2,2))
+  methods = [
+    countless3d.countless3d,
+    countless3d.dynamic_countless3d,
+    countless3d_generalized,
+    countless3d_dynamic_generalized,
+  ]
+  for fn in methods:
+    test_all_cases(fn)

saicinpainting/evaluation/masks/mask.py ADDED Viewed

	@@ -0,0 +1,429 @@

+import enum
+from copy import deepcopy
+import numpy as np
+from skimage import img_as_ubyte
+from skimage.transform import rescale, resize
+try:
+    from detectron2 import model_zoo
+    from detectron2.config import get_cfg
+    from detectron2.engine import DefaultPredictor
+    DETECTRON_INSTALLED = True
+except:
+    print("Detectron v2 is not installed")
+    DETECTRON_INSTALLED = False
+from .countless.countless2d import zero_corrected_countless
+class ObjectMask():
+    def __init__(self, mask):
+        self.height, self.width = mask.shape
+        (self.up, self.down), (self.left, self.right) = self._get_limits(mask)
+        self.mask = mask[self.up:self.down, self.left:self.right].copy()
+    @staticmethod
+    def _get_limits(mask):
+        def indicator_limits(indicator):
+            lower = indicator.argmax()
+            upper = len(indicator) - indicator[::-1].argmax()
+            return lower, upper
+        vertical_indicator = mask.any(axis=1)
+        vertical_limits = indicator_limits(vertical_indicator)
+        horizontal_indicator = mask.any(axis=0)
+        horizontal_limits = indicator_limits(horizontal_indicator)
+        return vertical_limits, horizontal_limits
+    def _clean(self):
+        self.up, self.down, self.left, self.right = 0, 0, 0, 0
+        self.mask = np.empty((0, 0))
+    def horizontal_flip(self, inplace=False):
+        if not inplace:
+            flipped = deepcopy(self)
+            return flipped.horizontal_flip(inplace=True)
+        self.mask = self.mask[:, ::-1]
+        return self
+    def vertical_flip(self, inplace=False):
+        if not inplace:
+            flipped = deepcopy(self)
+            return flipped.vertical_flip(inplace=True)
+        self.mask = self.mask[::-1, :]
+        return self
+    def image_center(self):
+        y_center = self.up + (self.down - self.up) / 2
+        x_center = self.left + (self.right - self.left) / 2
+        return y_center, x_center
+    def rescale(self, scaling_factor, inplace=False):
+        if not inplace:
+            scaled = deepcopy(self)
+            return scaled.rescale(scaling_factor, inplace=True)
+        scaled_mask = rescale(self.mask.astype(float), scaling_factor, order=0) > 0.5
+        (up, down), (left, right) = self._get_limits(scaled_mask)
+        self.mask = scaled_mask[up:down, left:right]
+        y_center, x_center = self.image_center()
+        mask_height, mask_width = self.mask.shape
+        self.up = int(round(y_center - mask_height / 2))
+        self.down = self.up + mask_height
+        self.left = int(round(x_center - mask_width / 2))
+        self.right = self.left + mask_width
+        return self
+    def crop_to_canvas(self, vertical=True, horizontal=True, inplace=False):
+        if not inplace:
+            cropped = deepcopy(self)
+            cropped.crop_to_canvas(vertical=vertical, horizontal=horizontal, inplace=True)
+            return cropped
+        if vertical:
+            if self.up >= self.height or self.down <= 0:
+                self._clean()
+            else:
+                cut_up, cut_down = max(-self.up, 0), max(self.down - self.height, 0)
+                if cut_up != 0:
+                    self.mask = self.mask[cut_up:]
+                    self.up = 0
+                if cut_down != 0:
+                    self.mask = self.mask[:-cut_down]
+                    self.down = self.height
+        if horizontal:
+            if self.left >= self.width or self.right <= 0:
+                self._clean()
+            else:
+                cut_left, cut_right = max(-self.left, 0), max(self.right - self.width, 0)
+                if cut_left != 0:
+                    self.mask = self.mask[:, cut_left:]
+                    self.left = 0
+                if cut_right != 0:
+                    self.mask = self.mask[:, :-cut_right]
+                    self.right = self.width
+        return self
+    def restore_full_mask(self, allow_crop=False):
+        cropped = self.crop_to_canvas(inplace=allow_crop)
+        mask = np.zeros((cropped.height, cropped.width), dtype=bool)
+        mask[cropped.up:cropped.down, cropped.left:cropped.right] = cropped.mask
+        return mask
+    def shift(self, vertical=0, horizontal=0, inplace=False):
+        if not inplace:
+            shifted = deepcopy(self)
+            return shifted.shift(vertical=vertical, horizontal=horizontal, inplace=True)
+        self.up += vertical
+        self.down += vertical
+        self.left += horizontal
+        self.right += horizontal
+        return self
+    def area(self):
+        return self.mask.sum()
+class RigidnessMode(enum.Enum):
+    soft = 0
+    rigid = 1
+class SegmentationMask:
+    def __init__(self, confidence_threshold=0.5, rigidness_mode=RigidnessMode.rigid,
+                 max_object_area=0.3, min_mask_area=0.02, downsample_levels=6, num_variants_per_mask=4,
+                 max_mask_intersection=0.5, max_foreground_coverage=0.5, max_foreground_intersection=0.5,
+                 max_hidden_area=0.2, max_scale_change=0.25, horizontal_flip=True,
+                 max_vertical_shift=0.1, position_shuffle=True):
+        """
+        :param confidence_threshold: float; threshold for confidence of the panoptic segmentator to allow for
+        the instance.
+        :param rigidness_mode: RigidnessMode object
+            when soft, checks intersection only with the object from which the mask_object was produced
+            when rigid, checks intersection with any foreground class object
+        :param max_object_area: float; allowed upper bound for to be considered as mask_object.
+        :param min_mask_area: float; lower bound for mask to be considered valid
+        :param downsample_levels: int; defines width of the resized segmentation to obtain shifted masks;
+        :param num_variants_per_mask: int; maximal number of the masks for the same object;
+        :param max_mask_intersection: float; maximum allowed area fraction of intersection for 2 masks
+        produced by horizontal shift of the same mask_object; higher value -> more diversity
+        :param max_foreground_coverage: float; maximum allowed area fraction of intersection for foreground object to be
+        covered by mask; lower value -> less the objects are covered
+        :param max_foreground_intersection: float; maximum allowed area of intersection for the mask with foreground
+        object; lower value -> mask is more on the background than on the objects
+        :param max_hidden_area: upper bound on part of the object hidden by shifting object outside the screen area;
+        :param max_scale_change: allowed scale change for the mask_object;
+        :param horizontal_flip: if horizontal flips are allowed;
+        :param max_vertical_shift: amount of vertical movement allowed;
+        :param position_shuffle: shuffle
+        """
+        assert DETECTRON_INSTALLED, 'Cannot use SegmentationMask without detectron2'
+        self.cfg = get_cfg()
+        self.cfg.merge_from_file(model_zoo.get_config_file("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml"))
+        self.cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-PanopticSegmentation/panoptic_fpn_R_101_3x.yaml")
+        self.cfg.MODEL.PANOPTIC_FPN.COMBINE.INSTANCES_CONFIDENCE_THRESH = confidence_threshold
+        self.predictor = DefaultPredictor(self.cfg)
+        self.rigidness_mode = RigidnessMode(rigidness_mode)
+        self.max_object_area = max_object_area
+        self.min_mask_area = min_mask_area
+        self.downsample_levels = downsample_levels
+        self.num_variants_per_mask = num_variants_per_mask
+        self.max_mask_intersection = max_mask_intersection
+        self.max_foreground_coverage = max_foreground_coverage
+        self.max_foreground_intersection = max_foreground_intersection
+        self.max_hidden_area = max_hidden_area
+        self.position_shuffle = position_shuffle
+        self.max_scale_change = max_scale_change
+        self.horizontal_flip = horizontal_flip
+        self.max_vertical_shift = max_vertical_shift
+    def get_segmentation(self, img):
+        im = img_as_ubyte(img)
+        panoptic_seg, segment_info = self.predictor(im)["panoptic_seg"]
+        return panoptic_seg, segment_info
+    @staticmethod
+    def _is_power_of_two(n):
+        return (n != 0) and (n & (n-1) == 0)
+    def identify_candidates(self, panoptic_seg, segments_info):
+        potential_mask_ids = []
+        for segment in segments_info:
+            if not segment["isthing"]:
+                continue
+            mask = (panoptic_seg == segment["id"]).int().detach().cpu().numpy()
+            area = mask.sum().item() / np.prod(panoptic_seg.shape)
+            if area >= self.max_object_area:
+                continue
+            potential_mask_ids.append(segment["id"])
+        return potential_mask_ids
+    def downsample_mask(self, mask):
+        height, width = mask.shape
+        if not (self._is_power_of_two(height) and self._is_power_of_two(width)):
+            raise ValueError("Image sides are not power of 2.")
+        num_iterations = width.bit_length() - 1 - self.downsample_levels
+        if num_iterations < 0:
+            raise ValueError(f"Width is lower than 2^{self.downsample_levels}.")
+        if height.bit_length() - 1 < num_iterations:
+            raise ValueError("Height is too low to perform downsampling")
+        downsampled = mask
+        for _ in range(num_iterations):
+            downsampled = zero_corrected_countless(downsampled)
+        return downsampled
+    def _augmentation_params(self):
+        scaling_factor = np.random.uniform(1 - self.max_scale_change, 1 + self.max_scale_change)
+        if self.horizontal_flip:
+            horizontal_flip = bool(np.random.choice(2))
+        else:
+            horizontal_flip = False
+        vertical_shift = np.random.uniform(-self.max_vertical_shift, self.max_vertical_shift)
+        return {
+            "scaling_factor": scaling_factor,
+            "horizontal_flip": horizontal_flip,
+            "vertical_shift": vertical_shift
+        }
+    def _get_intersection(self, mask_array, mask_object):
+        intersection = mask_array[
+            mask_object.up:mask_object.down, mask_object.left:mask_object.right
+        ] & mask_object.mask
+        return intersection
+    def _check_masks_intersection(self, aug_mask, total_mask_area, prev_masks):
+        for existing_mask in prev_masks:
+            intersection_area = self._get_intersection(existing_mask, aug_mask).sum()
+            intersection_existing = intersection_area / existing_mask.sum()
+            intersection_current = 1 - (aug_mask.area() - intersection_area) / total_mask_area
+            if (intersection_existing > self.max_mask_intersection) or \
+               (intersection_current > self.max_mask_intersection):
+                return False
+        return True
+    def _check_foreground_intersection(self, aug_mask, foreground):
+        for existing_mask in foreground:
+            intersection_area = self._get_intersection(existing_mask, aug_mask).sum()
+            intersection_existing = intersection_area / existing_mask.sum()
+            if intersection_existing > self.max_foreground_coverage:
+                return False
+            intersection_mask = intersection_area / aug_mask.area()
+            if intersection_mask > self.max_foreground_intersection:
+                return False
+        return True
+    def _move_mask(self, mask, foreground):
+        # Obtaining properties of the original mask_object:
+        orig_mask = ObjectMask(mask)
+        chosen_masks = []
+        chosen_parameters = []
+        # to fix the case when resizing gives mask_object consisting only of False
+        scaling_factor_lower_bound = 0.
+        for var_idx in range(self.num_variants_per_mask):
+            # Obtaining augmentation parameters and applying them to the downscaled mask_object
+            augmentation_params = self._augmentation_params()
+            augmentation_params["scaling_factor"] = min([
+                augmentation_params["scaling_factor"],
+                2 * min(orig_mask.up, orig_mask.height - orig_mask.down) / orig_mask.height + 1.,
+                2 * min(orig_mask.left, orig_mask.width - orig_mask.right) / orig_mask.width + 1.
+            ])
+            augmentation_params["scaling_factor"] = max([
+                augmentation_params["scaling_factor"], scaling_factor_lower_bound
+            ])
+            aug_mask = deepcopy(orig_mask)
+            aug_mask.rescale(augmentation_params["scaling_factor"], inplace=True)
+            if augmentation_params["horizontal_flip"]:
+                aug_mask.horizontal_flip(inplace=True)
+            total_aug_area = aug_mask.area()
+            if total_aug_area == 0:
+                scaling_factor_lower_bound = 1.
+                continue
+            # Fix if the element vertical shift is too strong and shown area is too small:
+            vertical_area = aug_mask.mask.sum(axis=1) / total_aug_area  # share of area taken by rows
+            # number of rows which are allowed to be hidden from upper and lower parts of image respectively
+            max_hidden_up = np.searchsorted(vertical_area.cumsum(), self.max_hidden_area)
+            max_hidden_down = np.searchsorted(vertical_area[::-1].cumsum(), self.max_hidden_area)
+            # correcting vertical shift, so not too much area will be hidden
+            augmentation_params["vertical_shift"] = np.clip(
+                augmentation_params["vertical_shift"],
+                -(aug_mask.up + max_hidden_up) / aug_mask.height,
+                (aug_mask.height - aug_mask.down + max_hidden_down) / aug_mask.height
+            )
+            # Applying vertical shift:
+            vertical_shift = int(round(aug_mask.height * augmentation_params["vertical_shift"]))
+            aug_mask.shift(vertical=vertical_shift, inplace=True)
+            aug_mask.crop_to_canvas(vertical=True, horizontal=False, inplace=True)
+            # Choosing horizontal shift:
+            max_hidden_area = self.max_hidden_area - (1 - aug_mask.area() / total_aug_area)
+            horizontal_area = aug_mask.mask.sum(axis=0) / total_aug_area
+            max_hidden_left = np.searchsorted(horizontal_area.cumsum(), max_hidden_area)
+            max_hidden_right = np.searchsorted(horizontal_area[::-1].cumsum(), max_hidden_area)
+            allowed_shifts = np.arange(-max_hidden_left, aug_mask.width -
+                                      (aug_mask.right - aug_mask.left) + max_hidden_right + 1)
+            allowed_shifts = - (aug_mask.left - allowed_shifts)
+            if self.position_shuffle:
+                np.random.shuffle(allowed_shifts)
+            mask_is_found = False
+            for horizontal_shift in allowed_shifts:
+                aug_mask_left = deepcopy(aug_mask)
+                aug_mask_left.shift(horizontal=horizontal_shift, inplace=True)
+                aug_mask_left.crop_to_canvas(inplace=True)
+                prev_masks = [mask] + chosen_masks
+                is_mask_suitable = self._check_masks_intersection(aug_mask_left, total_aug_area, prev_masks) & \
+                                   self._check_foreground_intersection(aug_mask_left, foreground)
+                if is_mask_suitable:
+                    aug_draw = aug_mask_left.restore_full_mask()
+                    chosen_masks.append(aug_draw)
+                    augmentation_params["horizontal_shift"] = horizontal_shift / aug_mask_left.width
+                    chosen_parameters.append(augmentation_params)
+                    mask_is_found = True
+                    break
+            if not mask_is_found:
+                break
+        return chosen_parameters
+    def _prepare_mask(self, mask):
+        height, width = mask.shape
+        target_width = width if self._is_power_of_two(width) else (1 << width.bit_length())
+        target_height = height if self._is_power_of_two(height) else (1 << height.bit_length())
+        return resize(mask.astype('float32'), (target_height, target_width), order=0, mode='edge').round().astype('int32')
+    def get_masks(self, im, return_panoptic=False):
+        panoptic_seg, segments_info = self.get_segmentation(im)
+        potential_mask_ids = self.identify_candidates(panoptic_seg, segments_info)
+        panoptic_seg_scaled = self._prepare_mask(panoptic_seg.detach().cpu().numpy())
+        downsampled = self.downsample_mask(panoptic_seg_scaled)
+        scene_objects = []
+        for segment in segments_info:
+            if not segment["isthing"]:
+                continue
+            mask = downsampled == segment["id"]
+            if not np.any(mask):
+                continue
+            scene_objects.append(mask)
+        mask_set = []
+        for mask_id in potential_mask_ids:
+            mask = downsampled == mask_id
+            if not np.any(mask):
+                continue
+            if self.rigidness_mode is RigidnessMode.soft:
+                foreground = [mask]
+            elif self.rigidness_mode is RigidnessMode.rigid:
+                foreground = scene_objects
+            else:
+                raise ValueError(f'Unexpected rigidness_mode: {rigidness_mode}')
+            masks_params = self._move_mask(mask, foreground)
+            full_mask = ObjectMask((panoptic_seg == mask_id).detach().cpu().numpy())
+            for params in masks_params:
+                aug_mask = deepcopy(full_mask)
+                aug_mask.rescale(params["scaling_factor"], inplace=True)
+                if params["horizontal_flip"]:
+                    aug_mask.horizontal_flip(inplace=True)
+                vertical_shift = int(round(aug_mask.height * params["vertical_shift"]))
+                horizontal_shift = int(round(aug_mask.width * params["horizontal_shift"]))
+                aug_mask.shift(vertical=vertical_shift, horizontal=horizontal_shift, inplace=True)
+                aug_mask = aug_mask.restore_full_mask().astype('uint8')
+                if aug_mask.mean() <= self.min_mask_area:
+                    continue
+                mask_set.append(aug_mask)
+        if return_panoptic:
+            return mask_set, panoptic_seg.detach().cpu().numpy()
+        else:
+            return mask_set
+def propose_random_square_crop(mask, min_overlap=0.5):
+    height, width = mask.shape
+    mask_ys, mask_xs = np.where(mask > 0.5)  # mask==0 is known fragment and mask==1 is missing
+    if height < width:
+        crop_size = height
+        obj_left, obj_right = mask_xs.min(), mask_xs.max()
+        obj_width = obj_right - obj_left
+        left_border = max(0, min(width - crop_size - 1, obj_left + obj_width * min_overlap - crop_size))
+        right_border = max(left_border + 1, min(width - crop_size, obj_left + obj_width * min_overlap))
+        start_x = np.random.randint(left_border, right_border)
+        return start_x, 0, start_x + crop_size, height
+    else:
+        crop_size = width
+        obj_top, obj_bottom = mask_ys.min(), mask_ys.max()
+        obj_height = obj_bottom - obj_top
+        top_border = max(0, min(height - crop_size - 1, obj_top + obj_height * min_overlap - crop_size))
+        bottom_border = max(top_border + 1, min(height - crop_size, obj_top + obj_height * min_overlap))
+        start_y = np.random.randint(top_border, bottom_border)
+        return 0, start_y, width, start_y + crop_size

saicinpainting/evaluation/refinement.py ADDED Viewed

	@@ -0,0 +1,314 @@

+import torch
+import torch.nn as nn
+from torch.optim import Adam, SGD
+from kornia.filters import gaussian_blur2d
+from kornia.geometry.transform import resize
+from kornia.morphology import erosion
+from torch.nn import functional as F
+import numpy as np
+import cv2
+from saicinpainting.evaluation.data import pad_tensor_to_modulo
+from saicinpainting.evaluation.utils import move_to_device
+from saicinpainting.training.modules.ffc import FFCResnetBlock
+from saicinpainting.training.modules.pix2pixhd import ResnetBlock
+from tqdm import tqdm
+def _pyrdown(im : torch.Tensor, downsize : tuple=None):
+    """downscale the image"""
+    if downsize is None:
+        downsize = (im.shape[2]//2, im.shape[3]//2)
+    assert im.shape[1] == 3, "Expected shape for the input to be (n,3,height,width)"
+    im = gaussian_blur2d(im, kernel_size=(5,5), sigma=(1.0,1.0))
+    im = F.interpolate(im, size=downsize, mode='bilinear', align_corners=False)
+    return im
+def _pyrdown_mask(mask : torch.Tensor, downsize : tuple=None, eps : float=1e-8, blur_mask : bool=True, round_up : bool=True):
+    """downscale the mask tensor
+    Parameters
+    ----------
+    mask : torch.Tensor
+        mask of size (B, 1, H, W)
+    downsize : tuple, optional
+        size to downscale to. If None, image is downscaled to half, by default None
+    eps : float, optional
+        threshold value for binarizing the mask, by default 1e-8
+    blur_mask : bool, optional
+        if True, apply gaussian filter before downscaling, by default True
+    round_up : bool, optional
+        if True, values above eps are marked 1, else, values below 1-eps are marked 0, by default True
+    Returns
+    -------
+    torch.Tensor
+        downscaled mask
+    """
+    if downsize is None:
+        downsize = (mask.shape[2]//2, mask.shape[3]//2)
+    assert mask.shape[1] == 1, "Expected shape for the input to be (n,1,height,width)"
+    if blur_mask is True:
+        mask = gaussian_blur2d(mask, kernel_size=(5,5), sigma=(1.0,1.0))
+        mask = F.interpolate(mask, size=downsize,  mode='bilinear', align_corners=False)
+    else:
+        mask = F.interpolate(mask, size=downsize,  mode='bilinear', align_corners=False)
+    if round_up:
+        mask[mask>=eps] = 1
+        mask[mask<eps] = 0
+    else:
+        mask[mask>=1.0-eps] = 1
+        mask[mask<1.0-eps] = 0
+    return mask
+def _erode_mask(mask : torch.Tensor, ekernel : torch.Tensor=None, eps : float=1e-8):
+    """erode the mask, and set gray pixels to 0"""
+    if ekernel is not None:
+        mask = erosion(mask, ekernel)
+        mask[mask>=1.0-eps] = 1
+        mask[mask<1.0-eps] = 0
+    return mask
+def _l1_loss(
+    pred : torch.Tensor, pred_downscaled : torch.Tensor, ref : torch.Tensor,
+    mask : torch.Tensor, mask_downscaled : torch.Tensor,
+    image : torch.Tensor, on_pred : bool=True
+    ):
+    """l1 loss on src pixels, and downscaled predictions if on_pred=True"""
+    loss = torch.mean(torch.abs(pred[mask<1e-8] - image[mask<1e-8]))
+    if on_pred:
+        loss += torch.mean(torch.abs(pred_downscaled[mask_downscaled>=1e-8] - ref[mask_downscaled>=1e-8]))
+    return loss
+def _infer(
+    image : torch.Tensor, mask : torch.Tensor,
+    forward_front : nn.Module, forward_rears : nn.Module,
+    ref_lower_res : torch.Tensor, orig_shape : tuple, devices : list,
+    scale_ind : int, n_iters : int=15, lr : float=0.002):
+    """Performs inference with refinement at a given scale.
+    Parameters
+    ----------
+    image : torch.Tensor
+        input image to be inpainted, of size (1,3,H,W)
+    mask : torch.Tensor
+        input inpainting mask, of size (1,1,H,W)
+    forward_front : nn.Module
+        the front part of the inpainting network
+    forward_rears : nn.Module
+        the rear part of the inpainting network
+    ref_lower_res : torch.Tensor
+        the inpainting at previous scale, used as reference image
+    orig_shape : tuple
+        shape of the original input image before padding
+    devices : list
+        list of available devices
+    scale_ind : int
+        the scale index
+    n_iters : int, optional
+        number of iterations of refinement, by default 15
+    lr : float, optional
+        learning rate, by default 0.002
+    Returns
+    -------
+    torch.Tensor
+        inpainted image
+    """
+    masked_image = image * (1 - mask)
+    masked_image = torch.cat([masked_image, mask], dim=1)
+    mask = mask.repeat(1,3,1,1)
+    if ref_lower_res is not None:
+        ref_lower_res = ref_lower_res.detach()
+    with torch.no_grad():
+        z1,z2 = forward_front(masked_image)
+    # Inference
+    mask = mask.to(devices[-1])
+    ekernel = torch.from_numpy(cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(15,15)).astype(bool)).float()
+    ekernel = ekernel.to(devices[-1])
+    image = image.to(devices[-1])
+    z1, z2 = z1.detach().to(devices[0]), z2.detach().to(devices[0])
+    z1.requires_grad, z2.requires_grad = True, True
+    optimizer = Adam([z1,z2], lr=lr)
+    pbar = tqdm(range(n_iters), leave=False)
+    for idi in pbar:
+        optimizer.zero_grad()
+        input_feat = (z1,z2)
+        for idd, forward_rear in enumerate(forward_rears):
+            output_feat = forward_rear(input_feat)
+            if idd < len(devices) - 1:
+                midz1, midz2 = output_feat
+                midz1, midz2 = midz1.to(devices[idd+1]), midz2.to(devices[idd+1])
+                input_feat = (midz1, midz2)
+            else:
+                pred = output_feat
+        if ref_lower_res is None:
+            break
+        losses = {}
+        ######################### multi-scale #############################
+        # scaled loss with downsampler
+        pred_downscaled = _pyrdown(pred[:,:,:orig_shape[0],:orig_shape[1]])
+        mask_downscaled = _pyrdown_mask(mask[:,:1,:orig_shape[0],:orig_shape[1]], blur_mask=False, round_up=False)
+        mask_downscaled = _erode_mask(mask_downscaled, ekernel=ekernel)
+        mask_downscaled = mask_downscaled.repeat(1,3,1,1)
+        losses["ms_l1"] = _l1_loss(pred, pred_downscaled, ref_lower_res, mask, mask_downscaled, image, on_pred=True)
+        loss = sum(losses.values())
+        pbar.set_description("Refining scale {} using scale {} ...current loss: {:.4f}".format(scale_ind+1, scale_ind, loss.item()))
+        if idi < n_iters - 1:
+            loss.backward()
+            optimizer.step()
+            del pred_downscaled
+            del loss
+            del pred
+    # "pred" is the prediction after Plug-n-Play module
+    inpainted = mask * pred + (1 - mask) * image
+    inpainted = inpainted.detach().cpu()
+    return inpainted
+def _get_image_mask_pyramid(batch : dict, min_side : int, max_scales : int, px_budget : int):
+    """Build the image mask pyramid
+    Parameters
+    ----------
+    batch : dict
+        batch containing image, mask, etc
+    min_side : int
+        minimum side length to limit the number of scales of the pyramid
+    max_scales : int
+        maximum number of scales allowed
+    px_budget : int
+        the product H*W cannot exceed this budget, because of resource constraints
+    Returns
+    -------
+    tuple
+        image-mask pyramid in the form of list of images and list of masks
+    """
+    assert batch['image'].shape[0] == 1, "refiner works on only batches of size 1!"
+    h, w = batch['unpad_to_size']
+    h, w = h[0].item(), w[0].item()
+    image = batch['image'][...,:h,:w]
+    mask = batch['mask'][...,:h,:w]
+    if h*w > px_budget:
+        #resize
+        ratio = np.sqrt(px_budget / float(h*w))
+        h_orig, w_orig = h, w
+        h,w = int(h*ratio), int(w*ratio)
+        print(f"Original image too large for refinement! Resizing {(h_orig,w_orig)} to {(h,w)}...")
+        image = resize(image, (h,w),interpolation='bilinear', align_corners=False)
+        mask = resize(mask, (h,w),interpolation='bilinear', align_corners=False)
+        mask[mask>1e-8] = 1
+    breadth = min(h,w)
+    n_scales = min(1 + int(round(max(0,np.log2(breadth / min_side)))), max_scales)
+    ls_images = []
+    ls_masks = []
+    ls_images.append(image)
+    ls_masks.append(mask)
+    for _ in range(n_scales - 1):
+        image_p = _pyrdown(ls_images[-1])
+        mask_p = _pyrdown_mask(ls_masks[-1])
+        ls_images.append(image_p)
+        ls_masks.append(mask_p)
+    # reverse the lists because we want the lowest resolution image as index 0
+    return ls_images[::-1], ls_masks[::-1]
+def refine_predict(
+    batch : dict, inpainter : nn.Module, gpu_ids : str,
+    modulo : int, n_iters : int, lr : float, min_side : int,
+    max_scales : int, px_budget : int
+    ):
+    """Refines the inpainting of the network
+    Parameters
+    ----------
+    batch : dict
+        image-mask batch, currently we assume the batchsize to be 1
+    inpainter : nn.Module
+        the inpainting neural network
+    gpu_ids : str
+        the GPU ids of the machine to use. If only single GPU, use: "0,"
+    modulo : int
+        pad the image to ensure dimension % modulo == 0
+    n_iters : int
+        number of iterations of refinement for each scale
+    lr : float
+        learning rate
+    min_side : int
+        all sides of image on all scales should be >= min_side / sqrt(2)
+    max_scales : int
+        max number of downscaling scales for the image-mask pyramid
+    px_budget : int
+        pixels budget. Any image will be resized to satisfy height*width <= px_budget
+    Returns
+    -------
+    torch.Tensor
+        inpainted image of size (1,3,H,W)
+    """
+    assert not inpainter.training
+    assert not inpainter.add_noise_kwargs
+    assert inpainter.concat_mask
+    gpu_ids = [f'cuda:{gpuid}' for gpuid in gpu_ids.replace(" ","").split(",") if gpuid.isdigit()]
+    n_resnet_blocks = 0
+    first_resblock_ind = 0
+    found_first_resblock = False
+    for idl in range(len(inpainter.generator.model)):
+        if isinstance(inpainter.generator.model[idl], FFCResnetBlock) or isinstance(inpainter.generator.model[idl], ResnetBlock):
+            n_resnet_blocks += 1
+            found_first_resblock = True
+        elif not found_first_resblock:
+            first_resblock_ind += 1
+    resblocks_per_gpu = n_resnet_blocks // len(gpu_ids)
+    devices = [torch.device(gpu_id) for gpu_id in gpu_ids]
+    # split the model into front, and rear parts
+    forward_front = inpainter.generator.model[0:first_resblock_ind]
+    forward_front.to(devices[0])
+    forward_rears = []
+    for idd in range(len(gpu_ids)):
+        if idd < len(gpu_ids) - 1:
+            forward_rears.append(inpainter.generator.model[first_resblock_ind + resblocks_per_gpu*(idd):first_resblock_ind+resblocks_per_gpu*(idd+1)])
+        else:
+            forward_rears.append(inpainter.generator.model[first_resblock_ind + resblocks_per_gpu*(idd):])
+        forward_rears[idd].to(devices[idd])
+    ls_images, ls_masks = _get_image_mask_pyramid(
+        batch,
+        min_side,
+        max_scales,
+        px_budget
+        )
+    image_inpainted = None
+    for ids, (image, mask) in enumerate(zip(ls_images, ls_masks)):
+        orig_shape = image.shape[2:]
+        image = pad_tensor_to_modulo(image, modulo)
+        mask = pad_tensor_to_modulo(mask, modulo)
+        mask[mask >= 1e-8] = 1.0
+        mask[mask < 1e-8] = 0.0
+        image, mask = move_to_device(image, devices[0]), move_to_device(mask, devices[0])
+        if image_inpainted is not None:
+            image_inpainted = move_to_device(image_inpainted, devices[-1])
+        image_inpainted = _infer(image, mask, forward_front, forward_rears, image_inpainted, orig_shape, devices, ids, n_iters, lr)
+        image_inpainted = image_inpainted[:,:,:orig_shape[0], :orig_shape[1]]
+        # detach everything to save resources
+        image = image.detach().cpu()
+        mask = mask.detach().cpu()
+    return image_inpainted

saicinpainting/evaluation/utils.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from enum import Enum
+import yaml
+from easydict import EasyDict as edict
+import torch.nn as nn
+import torch
+def load_yaml(path):
+    with open(path, 'r') as f:
+        return edict(yaml.safe_load(f))
+def move_to_device(obj, device):
+    if isinstance(obj, nn.Module):
+        return obj.to(device)
+    if torch.is_tensor(obj):
+        return obj.to(device)
+    if isinstance(obj, (tuple, list)):
+        return [move_to_device(el, device) for el in obj]
+    if isinstance(obj, dict):
+        return {name: move_to_device(val, device) for name, val in obj.items()}
+    raise ValueError(f'Unexpected type {type(obj)}')
+class SmallMode(Enum):
+    DROP = "drop"
+    UPSCALE = "upscale"

saicinpainting/evaluation/vis.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import numpy as np
+from skimage import io
+from skimage.segmentation import mark_boundaries
+def save_item_for_vis(item, out_file):
+    mask = item['mask'] > 0.5
+    if mask.ndim == 3:
+        mask = mask[0]
+    img = mark_boundaries(np.transpose(item['image'], (1, 2, 0)),
+                          mask,
+                          color=(1., 0., 0.),
+                          outline_color=(1., 1., 1.),
+                          mode='thick')
+    if 'inpainted' in item:
+        inp_img = mark_boundaries(np.transpose(item['inpainted'], (1, 2, 0)),
+                                  mask,
+                                  color=(1., 0., 0.),
+                                  mode='outer')
+        img = np.concatenate((img, inp_img), axis=1)
+    img = np.clip(img * 255, 0, 255).astype('uint8')
+    io.imsave(out_file, img)
+def save_mask_for_sidebyside(item, out_file):
+    mask = item['mask']# > 0.5
+    if mask.ndim == 3:
+        mask = mask[0]
+    mask = np.clip(mask * 255, 0, 255).astype('uint8')
+    io.imsave(out_file, mask)
+def save_img_for_sidebyside(item, out_file):
+    img = np.transpose(item['image'], (1, 2, 0))
+    img = np.clip(img * 255, 0, 255).astype('uint8')
+    io.imsave(out_file, img)

saicinpainting/training/__init__.py ADDED Viewed

File without changes

saicinpainting/training/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (152 Bytes). View file

saicinpainting/training/data/__init__.py ADDED Viewed

File without changes

saicinpainting/training/data/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (157 Bytes). View file

saicinpainting/training/data/__pycache__/aug.cpython-39.pyc ADDED Viewed

Binary file (3.16 kB). View file

saicinpainting/training/data/__pycache__/datasets.cpython-39.pyc ADDED Viewed

Binary file (8.94 kB). View file

saicinpainting/training/data/__pycache__/masks.cpython-39.pyc ADDED Viewed

Binary file (11.9 kB). View file