File size: 8,168 Bytes

a3d6c18

"""
Source url: https://github.com/OPHoperHPO/image-background-remove-tool
Author: Nikita Selin (OPHoperHPO)[https://github.com/OPHoperHPO].
License: Apache License 2.0
"""
import pathlib
from typing import Union, List, Tuple

import PIL
import cv2
import numpy as np
import torch
from PIL import Image

from carvekit.ml.arch.fba_matting.models import FBA
from carvekit.ml.arch.fba_matting.transforms import (
    trimap_transform,
    groupnorm_normalise_image,
)
from carvekit.ml.files.models_loc import fba_pretrained
from carvekit.utils.image_utils import convert_image, load_image
from carvekit.utils.models_utils import get_precision_autocast, cast_network
from carvekit.utils.pool_utils import batch_generator, thread_pool_processing

__all__ = ["FBAMatting"]


class FBAMatting(FBA):
    """
    FBA Matting Neural Network to improve edges on image.
    """

    def __init__(
        self,
        device="cpu",
        input_tensor_size: Union[List[int], int] = 2048,
        batch_size: int = 2,
        encoder="resnet50_GN_WS",
        load_pretrained: bool = True,
        fp16: bool = False,
    ):
        """
        Initialize the FBAMatting model

        Args:
            device: processing device
            input_tensor_size: input image size
            batch_size: the number of images that the neural network processes in one run
            encoder: neural network encoder head
            load_pretrained: loading pretrained model
            fp16: use half precision

        """
        super(FBAMatting, self).__init__(encoder=encoder)
        self.fp16 = fp16
        self.device = device
        self.batch_size = batch_size
        if isinstance(input_tensor_size, list):
            self.input_image_size = input_tensor_size[:2]
        else:
            self.input_image_size = (input_tensor_size, input_tensor_size)
        self.to(device)
        if load_pretrained:
            self.load_state_dict(torch.load(fba_pretrained(), map_location=self.device))
        self.eval()

    def data_preprocessing(
        self, data: Union[PIL.Image.Image, np.ndarray]
    ) -> Tuple[torch.FloatTensor, torch.FloatTensor]:
        """
        Transform input image to suitable data format for neural network

        Args:
            data: input image

        Returns:
            input for neural network

        """
        resized = data.copy()
        if self.batch_size == 1:
            resized.thumbnail(self.input_image_size, resample=3)
        else:
            resized = resized.resize(self.input_image_size, resample=3)
        # noinspection PyTypeChecker
        image = np.array(resized, dtype=np.float64)
        image = image / 255.0  # Normalize image to [0, 1] values range
        if resized.mode == "RGB":
            image = image[:, :, ::-1]
        elif resized.mode == "L":
            image2 = np.copy(image)
            h, w = image2.shape
            image = np.zeros((h, w, 2))  # Transform trimap to binary data format
            image[image2 == 1, 1] = 1
            image[image2 == 0, 0] = 1
        else:
            raise ValueError("Incorrect color mode for image")
        h, w = image.shape[:2]  # Scale input mlt to 8
        h1 = int(np.ceil(1.0 * h / 8) * 8)
        w1 = int(np.ceil(1.0 * w / 8) * 8)
        x_scale = cv2.resize(image, (w1, h1), interpolation=cv2.INTER_LANCZOS4)
        image_tensor = torch.from_numpy(x_scale).permute(2, 0, 1)[None, :, :, :].float()
        if resized.mode == "RGB":
            return image_tensor, groupnorm_normalise_image(
                image_tensor.clone(), format="nchw"
            )
        else:
            return (
                image_tensor,
                torch.from_numpy(trimap_transform(x_scale))
                .permute(2, 0, 1)[None, :, :, :]
                .float(),
            )

    @staticmethod
    def data_postprocessing(
        data: torch.tensor, trimap: PIL.Image.Image
    ) -> PIL.Image.Image:
        """
        Transforms output data from neural network to suitable data
        format for using with other components of this framework.

        Args:
            data: output data from neural network
            trimap: Map with the area we need to refine

        Returns:
            Segmentation mask as PIL Image instance

        """
        if trimap.mode != "L":
            raise ValueError("Incorrect color mode for trimap")
        pred = data.numpy().transpose((1, 2, 0))
        pred = cv2.resize(pred, trimap.size, cv2.INTER_LANCZOS4)[:, :, 0]
        # noinspection PyTypeChecker
        # Clean mask by removing all false predictions outside trimap and already known area
        trimap_arr = np.array(trimap.copy())
        pred[trimap_arr[:, :] == 0] = 0
        # pred[trimap_arr[:, :] == 255] = 1
        pred[pred < 0.3] = 0
        return Image.fromarray(pred * 255).convert("L")

    def __call__(
        self,
        images: List[Union[str, pathlib.Path, PIL.Image.Image]],
        trimaps: List[Union[str, pathlib.Path, PIL.Image.Image]],
    ) -> List[PIL.Image.Image]:
        """
        Passes input images though neural network and returns segmentation masks as PIL.Image.Image instances

        Args:
            images: input images
            trimaps: Maps with the areas we need to refine

        Returns:
            segmentation masks as for input images, as PIL.Image.Image instances

        """

        if len(images) != len(trimaps):
            raise ValueError(
                "Len of specified arrays of images and trimaps should be equal!"
            )

        collect_masks = []
        autocast, dtype = get_precision_autocast(device=self.device, fp16=self.fp16)
        with autocast:
            cast_network(self, dtype)
            for idx_batch in batch_generator(range(len(images)), self.batch_size):
                inpt_images = thread_pool_processing(
                    lambda x: convert_image(load_image(images[x])), idx_batch
                )

                inpt_trimaps = thread_pool_processing(
                    lambda x: convert_image(load_image(trimaps[x]), mode="L"), idx_batch
                )

                inpt_img_batches = thread_pool_processing(
                    self.data_preprocessing, inpt_images
                )
                inpt_trimaps_batches = thread_pool_processing(
                    self.data_preprocessing, inpt_trimaps
                )

                inpt_img_batches_transformed = torch.vstack(
                    [i[1] for i in inpt_img_batches]
                )
                inpt_img_batches = torch.vstack([i[0] for i in inpt_img_batches])

                inpt_trimaps_transformed = torch.vstack(
                    [i[1] for i in inpt_trimaps_batches]
                )
                inpt_trimaps_batches = torch.vstack(
                    [i[0] for i in inpt_trimaps_batches]
                )

                with torch.no_grad():
                    inpt_img_batches = inpt_img_batches.to(self.device)
                    inpt_trimaps_batches = inpt_trimaps_batches.to(self.device)
                    inpt_img_batches_transformed = inpt_img_batches_transformed.to(
                        self.device
                    )
                    inpt_trimaps_transformed = inpt_trimaps_transformed.to(self.device)

                    output = super(FBAMatting, self).__call__(
                        inpt_img_batches,
                        inpt_trimaps_batches,
                        inpt_img_batches_transformed,
                        inpt_trimaps_transformed,
                    )
                    output_cpu = output.cpu()
                    del (
                        inpt_img_batches,
                        inpt_trimaps_batches,
                        inpt_img_batches_transformed,
                        inpt_trimaps_transformed,
                        output,
                    )
                masks = thread_pool_processing(
                    lambda x: self.data_postprocessing(output_cpu[x], inpt_trimaps[x]),
                    range(len(inpt_images)),
                )
                collect_masks += masks
            return collect_masks