|
import io |
|
from pathlib import Path |
|
from typing import Union |
|
|
|
import cv2 |
|
import huggingface_hub |
|
import numpy as np |
|
import onnxruntime as rt |
|
import torch |
|
import torch.nn.functional as F |
|
from briarmbg import BriaRMBG |
|
from PIL import Image |
|
from rembg import remove |
|
from torchvision.transforms.functional import normalize |
|
|
|
import internals.util.image as ImageUtil |
|
from carvekit.api.high import HiInterface |
|
from internals.data.task import ModelType |
|
from internals.util.commons import download_image, read_url |
|
|
|
|
|
class RemoveBackground: |
|
def remove(self, image: Union[str, Image.Image]) -> Image.Image: |
|
if type(image) is str: |
|
image = Image.open(io.BytesIO(read_url(image))) |
|
|
|
output = remove(image) |
|
return output |
|
|
|
|
|
class RemoveBackgroundV2: |
|
def __init__(self): |
|
model_path = huggingface_hub.hf_hub_download("skytnt/anime-seg", "isnetis.onnx") |
|
self.anime_rembg = rt.InferenceSession( |
|
model_path, providers=["CUDAExecutionProvider", "CPUExecutionProvider"] |
|
) |
|
|
|
self.interface = HiInterface( |
|
object_type="object", |
|
batch_size_seg=5, |
|
batch_size_matting=1, |
|
device="cuda" if torch.cuda.is_available() else "cpu", |
|
seg_mask_size=640, |
|
matting_mask_size=2048, |
|
trimap_prob_threshold=231, |
|
trimap_dilation=30, |
|
trimap_erosion_iters=5, |
|
fp16=False, |
|
) |
|
|
|
def remove( |
|
self, image: Union[str, Image.Image], model_type: ModelType = ModelType.REAL |
|
) -> Image.Image: |
|
if type(image) is str: |
|
image = download_image(image) |
|
|
|
if model_type == ModelType.ANIME or model_type == ModelType.COMIC: |
|
print("Using Anime Background remover") |
|
_, img = self.__rmbg_fn(np.array(image)) |
|
|
|
return Image.fromarray(img) |
|
else: |
|
print("Using Real Background remover") |
|
img_path = Path.home() / ".cache" / "rm_bg.png" |
|
|
|
w, h = image.size |
|
if max(w, h) > 1536: |
|
image = ImageUtil.resize_image(image, dimension=1024) |
|
|
|
image.save(img_path) |
|
images_without_background = self.interface([img_path]) |
|
out = images_without_background[0] |
|
return out |
|
|
|
def __get_mask(self, img, s=1024): |
|
img = (img / 255).astype(np.float32) |
|
h, w = h0, w0 = img.shape[:-1] |
|
h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s) |
|
ph, pw = s - h, s - w |
|
img_input = np.zeros([s, s, 3], dtype=np.float32) |
|
img_input[ph // 2 : ph // 2 + h, pw // 2 : pw // 2 + w] = cv2.resize( |
|
img, (w, h) |
|
) |
|
img_input = np.transpose(img_input, (2, 0, 1)) |
|
img_input = img_input[np.newaxis, :] |
|
mask = self.anime_rembg.run(None, {"img": img_input})[0][0] |
|
mask = np.transpose(mask, (1, 2, 0)) |
|
mask = mask[ph // 2 : ph // 2 + h, pw // 2 : pw // 2 + w] |
|
mask = cv2.resize(mask, (w0, h0))[:, :, np.newaxis] |
|
return mask |
|
|
|
def __rmbg_fn(self, img): |
|
mask = self.__get_mask(img) |
|
img = (mask * img + 255 * (1 - mask)).astype(np.uint8) |
|
mask = (mask * 255).astype(np.uint8) |
|
img = np.concatenate([img, mask], axis=2, dtype=np.uint8) |
|
mask = mask.repeat(3, axis=2) |
|
return mask, img |
|
|
|
|
|
class RemoveBackgroundV3: |
|
def __init__(self): |
|
net = BriaRMBG.from_pretrained("briaai/RMBG-1.4") |
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
net.to(device) |
|
self.net = net |
|
|
|
def remove(self, image: Union[str, Image.Image]) -> Image.Image: |
|
if type(image) is str: |
|
image = download_image(image, mode="RGBA") |
|
|
|
orig_image = image |
|
w, h = orig_im_size = orig_image.size |
|
image = self.__resize_image(orig_image) |
|
im_np = np.array(image) |
|
im_tensor = torch.tensor(im_np, dtype=torch.float32).permute(2, 0, 1) |
|
im_tensor = torch.unsqueeze(im_tensor, 0) |
|
im_tensor = torch.divide(im_tensor, 255.0) |
|
im_tensor = normalize(im_tensor, [0.5, 0.5, 0.5], [1.0, 1.0, 1.0]) |
|
if torch.cuda.is_available(): |
|
im_tensor = im_tensor.cuda() |
|
|
|
|
|
result = self.net(im_tensor) |
|
|
|
result = torch.squeeze( |
|
F.interpolate(result[0][0], size=(h, w), mode="bilinear"), 0 |
|
) |
|
ma = torch.max(result) |
|
mi = torch.min(result) |
|
result = (result - mi) / (ma - mi) |
|
|
|
im_array = (result * 255).cpu().data.numpy().astype(np.uint8) |
|
pil_im = Image.fromarray(np.squeeze(im_array)) |
|
|
|
new_im = Image.new("RGBA", pil_im.size, (0, 0, 0, 0)) |
|
new_im.paste(orig_image, mask=pil_im) |
|
|
|
|
|
return new_im |
|
|
|
def __resize_image(self, image): |
|
image = image.convert("RGB") |
|
model_input_size = (1024, 1024) |
|
image = image.resize(model_input_size, Image.BILINEAR) |
|
return image |
|
|