LivePortrait2

Sleeping

File size: 23,495 Bytes

e3af00f

from pathlib import Path

import cv2
import numpy as np
import pandas as pd


def maskblur(mask, kernel_size, sigma=1):
    mask_blur = cv2.GaussianBlur(mask, (kernel_size, kernel_size), sigma)
    return mask_blur


def erosion(mask, kernel_size):
    kernel = np.ones((kernel_size, kernel_size), np.uint8)
    erosion_image = cv2.erode(mask, kernel, iterations=1)  # // make erosion image
    return erosion_image


def dilate(mask, kernel_size):
    kernel = np.ones((kernel_size, kernel_size), np.uint8)
    erosion_image = cv2.dilate(mask, kernel, iterations=1)  # // make erosion image
    return erosion_image


def resize_adapt(model_out, crop_region):
    def inter_alg(target_size, img):
        if isinstance(target_size, tuple):
            w, h = target_size
        else:
            w, h = target_size, target_size
        return inter_alg_(w, h, img)

    def inter_alg_(w, h, img):
        if w * h < img.shape[0] * img.shape[1]:
            return cv2.INTER_AREA
        else:
            return cv2.INTER_CUBIC

    x1, y1, x2, y2 = crop_region
    h, w = y2 - y1 + 1, x2 - x1 + 1
    sz = model_out.shape[0]  # h,w 동일하다.
    if h == sz and w == sz:
        return model_out

    r = max(h, w) / sz
    max_hw = max(h, w)
    temp_ = cv2.resize(model_out, (max_hw, max_hw), inter_alg(max_hw, model_out))
    temp_ = temp_[
        (max_hw - h) // 2 : (max_hw - h) // 2 + h,
        (max_hw - w) // 2 : (max_hw - w) // 2 + w,
    ]
    return temp_


def get_face_mask(
    img_size, df_fan_row, blur_ratio=0.3, dilate_ratio=0.2, erosion_ratio=0
):
    assert blur_ratio >= 0 and blur_ratio <= 1
    assert erosion_ratio >= 0 and erosion_ratio <= 1
    assert dilate_ratio >= 0 and dilate_ratio <= 1

    def _masking(img, pts, value):
        img = cv2.fillPoly(img, [pts], value)
        return img

    def _get_face_pts_n_box(img_size, df_fan_row):
        box = df_fan_row["cropped_box"]
        pts2d = df_fan_row["pts2d"] - np.array([box[0], box[1]])

        if isinstance(df_fan_row["cropped_size"], float):
            cropped_size = df_fan_row["cropped_size"]
        else:
            cropped_size = df_fan_row["cropped_size"][0]
        ratio = img_size[0] / cropped_size
        pts2d = pts2d * ratio
        xs, ys = pts2d[:, 0], pts2d[:, 1]
        l, t, r, b = min(xs), min(ys), max(xs), max(ys)
        return np.concatenate([pts2d[0:17, :], pts2d[17:27, :][::-1]]).astype(
            np.uint832
        ), (l, t, r, b)

    if df_fan_row["pts2d"] is None:
        mask = np.zeros((img_size[1], img_size[0]), dtype=np.uint8)
        if len(mask.shape) == 2:
            mask = np.expand_dims(mask, axis=2)
        return {"crop": mask, "origin": 1 - mask}

    pts, box = _get_face_pts_n_box(img_size, df_fan_row)
    h = max(box[2] - box[0], box[3] - box[1])
    mask = np.zeros((img_size[1], img_size[0]), dtype=np.uint8)
    mask = _masking(mask, pts, (255))
    if dilate_ratio != 0:
        mask = dilate(mask, int(h * dilate_ratio) // 2 * 2 + 1)
    if erosion_ratio != 0:
        mask = erosion(mask, int(h * erosion_ratio) // 2 * 2 + 1)
    if blur_ratio != 0:
        blur_kernel_size = int(h * blur_ratio) // 2 * 2 + 1
        mask = maskblur(mask, blur_kernel_size, 0)
    mask = mask / 255
    if len(mask.shape) == 2:
        mask = np.expand_dims(mask, axis=2)
    return {"crop": mask, "origin": 1 - mask}


def cromakey_green(img):
    r = img[:, :, 0]
    g = img[:, :, 1]
    b = img[:, :, 2]
    g_alpha = g > 50
    r_alpha = (g * 1.0) > r
    b_alpha = (g * 0.7) > b
    alpha = g_alpha & (r_alpha & b_alpha)
    alpha = (1 - alpha) * 255
    alpha = alpha.astype(np.uint8)

    alpha = maskblur(alpha, kernel_size=13)
    alpha[np.where(alpha > 100)] = 255
    alpha = erosion(alpha, kernel_size=5)

    if len(alpha.shape) == 2:
        alpha2 = np.expand_dims(alpha, axis=2)
    else:
        alpha2 = alpha
    new = np.concatenate((img, alpha2), axis=2)
    return new


def cromakey_green_binary(img):
    img = cromakey_green(img)
    alpha = img[:, :, 3]
    alpha[np.where(alpha <= 128)] = 0
    alpha[np.where(alpha > 128)] = 1


def cromakey_green_hunet_lmy(img):
    r = img[:, :, 0]
    g = img[:, :, 1]
    b = img[:, :, 2]
    g_alpha = g > 70
    r_alpha = g > r
    b_alpha = (g * 0.8) > b
    alpha = g_alpha & (r_alpha & b_alpha)
    alpha = (1 - alpha) * 255
    alpha = alpha.astype(np.uint8)

    alpha = maskblur(alpha, kernel_size=11)
    alpha[np.where(alpha > 100)] = 255
    alpha = maskblur(alpha, kernel_size=3)
    alpha = erosion(alpha, kernel_size=3)

    if len(alpha.shape) == 2:
        alpha2 = np.expand_dims(alpha, axis=2)
    else:
        alpha2 = alpha
    new = np.concatenate((img, alpha2), axis=2)
    return new


# ybm 영상용 크로마키 함수
def cromakey_green_ybm_front(img):
    r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]

    g_alpha = g > 70
    # r_alpha = (g * 0.7) > r
    # b_alpha = (g * 0.7) > b
    r_alpha = g > r
    b_alpha = (g * 0.9) > b

    alpha = g_alpha & (r_alpha & b_alpha)
    alpha = (1 - alpha) * 255
    alpha = alpha.astype(np.uint8)

    alpha = maskblur(alpha, kernel_size=11)
    alpha[np.where(alpha > 100)] = 255
    alpha = maskblur(alpha, kernel_size=3)
    alpha = maskblur(alpha, kernel_size=3)

    grey_alpha = alpha < 255
    g[grey_alpha] = r[grey_alpha] * 0.8

    if len(alpha.shape) == 2:
        alpha2 = np.expand_dims(alpha, axis=2)
    else:
        alpha2 = alpha
    new = np.concatenate((img, alpha2), axis=2)
    return new


# ybm 영상용 크로마키 함수
def cromakey_green_ybm_side(img):
    img = img.copy()
    r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
    g_alpha = g > 50
    r_alpha = g > r
    b_alpha = (g * 0.9) > b

    alpha = g_alpha & (r_alpha & b_alpha)
    alpha = (1 - alpha) * 255
    alpha = alpha.astype(np.uint8)

    alpha = maskblur(alpha, kernel_size=11)
    alpha[np.where(alpha > 100)] = 255
    alpha = maskblur(alpha, kernel_size=3)
    alpha = maskblur(alpha, kernel_size=3)

    grey_alpha = alpha < 255
    g[grey_alpha] = r[grey_alpha] * 0.8

    if len(alpha.shape) == 2:
        alpha2 = np.expand_dims(alpha, axis=2)
    else:
        alpha2 = alpha
    new = np.concatenate((img, alpha2), axis=2)
    return new


# devin 영상용 크로마키 함수
def cromakey_green_devin_side(img):
    img = img.copy()
    r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2]
    g_alpha = g > 70
    r_alpha = (g * 0.8) > r
    # r_alpha = g > r
    b_alpha = (g * 0.9) > b

    alpha = g_alpha & (r_alpha & b_alpha)
    alpha = (1 - alpha) * 255
    alpha = alpha.astype(np.uint8)

    alpha = maskblur(alpha, kernel_size=7, sigma=3)
    alpha[np.where(alpha < 150)] = 0
    alpha = maskblur(alpha, kernel_size=5, sigma=2)

    if len(alpha.shape) == 2:
        alpha = np.expand_dims(alpha, axis=2)

    new = np.concatenate((img, alpha), axis=2)
    return new


def get_cromakey_func(args):
    if "cromakey" not in args.keys():
        return cromakey_green_hunet_lmy
    if "cromakey_green_ybm_front" == args.cromakey:
        return cromakey_green_ybm_front
    if "cromakey_green_ybm_side" == args.cromakey:
        return cromakey_green_ybm_side
    if "cromakey_green_devin_side" == args.cromakey:
        return cromakey_green_devin_side

    raise "cromakey not found"


def compose_default_(model_out, org_image_with_alpha, mask, **kwargs):
    # 1. 마스크 섞기 : 원래 비디오의 투명값과 계산한 마스크를 섞는다.
    mask = mask[:, :, 0]
    mask[np.where(mask > 0)] = 1  # 마스크 영역을 128 -> 1 로 만든다.
    model_out[:, :, 3] = (
        org_image_with_alpha[:, :, 3] * (1 - mask) + model_out[:, :, 3] * mask
    )

    # 2. 섞인 마스크가 좀 자연스럽게 섞이도록 함.
    model_out[:, :, 3] = maskblur(model_out[:, :, 3], kernel_size=3, sigma=1)
    return model_out


def compose_devin_(model_out, org_image_with_alpha, mask, debug=False, **kwargs):
    mask = mask[:, :, 0]
    mask[np.where(mask > 0)] = 1  # 마스크 영역을 128 -> 1 로 만든다.
    mask = mask.astype(np.float32)

    # 1. 기존마스크와 경계가 잘 안보이도록 마스크를 부드럽게 만든다.
    kernel_size = int(mask.shape[0] * 0.03) // 2 * 2 + 1  # 이미지 크기의 3% 정도 마스크를 확장한다.
    if debug:
        print(
            f"## compose_devin_: kernel_size:{kernel_size}, mask_height:{mask.shape[0]}"
        )
    if kernel_size >= 3:
        mask = dilate(mask, kernel_size=kernel_size)
        mask = maskblur(mask, kernel_size=kernel_size, sigma=kernel_size // 2)
        mask = maskblur(mask, kernel_size=kernel_size, sigma=kernel_size // 2)
        mask = erosion(mask, kernel_size=3)  # 1pixel 만 줄임

    # 2. 마스크 섞기 : 원래 비디오의 투명값과 계산한 마스크를 섞는다.
    model_out[:, :, 3] = (
        org_image_with_alpha[:, :, 3] * (1 - mask) + model_out[:, :, 3] * mask
    )

    # 3. 섞인 마스크가 부드럽게 한번더 블러를 한다.
    model_out[:, :, 3] = maskblur(model_out[:, :, 3], kernel_size=3, sigma=1)

    return model_out


def get_compose_mask_func(args):
    if "cromakey" in args.keys():
        if "cromakey_green_devin_side" == args.cromakey:
            return compose_devin_
    if "compose" in args.keys():
        if "compose_smooth" == args.compose:
            return compose_devin_
    return compose_default_


def get_keying_func(template):
    cromakey_func = get_cromakey_func(template.model.args)
    compose_func = get_compose_mask_func(template.model.args)

    def keying_(pred, idx, box=None):
        model_out, mask, alpha = pred["pred"], pred["mask"], pred["img_gt_with_alpha"]

        if pred["filename"].endswith("_no.jpg") or pred["filename"].endswith("_no.png"):
            return alpha[:, :, [2, 1, 0, 3]]

        if (
            alpha.shape[0] != mask.shape[0]
            or alpha.shape[1] != mask.shape[1]
            or alpha.shape[0] != model_out.shape[0]
            or alpha.shape[1] != model_out.shape[1]
        ):
            raise Exception(
                f"not matched keying shape. "
                f"alpha: {alpha.shape[0]}, {alpha.shape[1]}, {alpha.shape[2]}, "
                f"mask: {mask.shape[0]}, {mask.shape[1]}, "
                f"model_out: {model_out.shape[0]}, {model_out.shape[1]}"
            )

        if box is not None:
            model_h = model_out.shape[0]
            box_h = box[3] - box[1]
            if box_h > model_h:
                model_out = resize_adapt(model_out, box)
                mask = resize_adapt(mask, box)
                alpha = resize_adapt(alpha, box)
        model_out = cromakey_func(model_out)
        model_out = compose_func(
            model_out=model_out, org_image_with_alpha=alpha, mask=mask
        )

        return model_out

    return keying_


def get_box_mask(width, height, config, verbose=False):
    def get_mask_(
        width, height, gradation_width, gradation_bottom=None, box_mask_erosion=None
    ):
        mask = np.ones((height, width, 1))
        r = list(range(0, gradation_width, 1))
        for s, e in zip(r, r[1:]):
            g = s / gradation_width
            # print(f'---- s:{s}, e:{e}, g:{g}')
            mask[s:e, s : width - s, :] = g
            mask[height - e : height - s, s : width - s, :] = g
            mask[s : height - s, s:e, :] = g
            mask[s : height - s, width - e : width - s, :] = g
        if gradation_bottom is not None:
            r = list(range(0, gradation_bottom, 1))
            for s, e in zip(r, r[1:]):
                g = s / gradation_bottom
                mask[height - e : height - s, s : width - s, :] = g
        if box_mask_erosion is not None:
            mask = erosion(mask, box_mask_erosion * 2 + 1)
            if len(mask.shape) == 2:
                mask = np.expand_dims(mask, 2)  # mask shape ex: (352,352,1)

        return mask

    gradation_width = int(height * 0.1)
    gradation_bottom = (
        int(height * config["gradation_bottom"])
        if "gradation_bottom" in config.keys()
        else None
    )
    box_mask_erosion = (
        int(height * config["box_mask_erosion"])
        if "box_mask_erosion" in config.keys()
        else None
    )
    # if verbose:
    #    print('gradation_width : ', gradation_width)
    #    print('gradation_bottom : ', gradation_bottom)
    #    print('box_mask_erosion : ', box_mask_erosion)
    mask = get_mask_(width, height, gradation_width, gradation_bottom, box_mask_erosion)
    mask_crop = mask
    mask_origin = 1 - mask
    return {"crop": mask_crop, "origin": mask_origin}


def get_compose_func_without_keying_move(template, ratio, verbose=False):
    args = template.model.args
    df = pd.read_pickle(
        f"{template.crop_mp4_dir}/{Path(template.template_video_path).stem}_000/df_fan.pickle"
    )
    df = df.set_index("frame_idx")
    move_head_box_size = (
        (df.loc[0]["cropped_box"][2] - df.loc[0]["cropped_box"][0] - 20) // 10 * 10
    )

    def resize_and_scale(model_out, head_box_idx):
        # ratio 1.0 에 맞는 크기로 resize 하고,
        # 원래 영상에서 10의 배수에 해당하는 위치로 (head_box, model_out) 모두 잘라낸다.
        head_box = df["cropped_box"][head_box_idx]
        if ratio == 1.0:
            return model_out, head_box

        # 일단 원래 크기로 만든다.
        model_out = resize_adapt(model_out, head_box)

        # 원래 크기에서의 박스에서 10의 배수에 해당하는 좌표를 찾는다.
        l, t = (np.array(head_box[:2]) + 9) // 10 * 10
        new_head_box = np.array(
            [l, t, l + move_head_box_size - 1, t + move_head_box_size - 1]
        )  # 양쪽포함이라서 1을 빼준다.

        # 10의 배수에 맞춰서 이미지를 잘라낸다.
        diff_box = new_head_box - head_box
        new_model_out = model_out[diff_box[1] : diff_box[3], diff_box[0] : diff_box[2]]
        # if verbose and head_box_idx == 0:
        #     print('org head_box:', head_box, ', new_head_box:', new_head_box)
        #     print('alpah2.shape:', model_out.shape, ', new_model_out:', new_model_out.shape)
        if (
            new_model_out.shape[0] % 10 != 0 or new_model_out.shape[1] % 10 != 0
        ):  # 크기는 10의 배수여야 한다.
            raise Exception(f"new_model_out.shape % 10 != 0, {new_model_out.shape}")

        # ratio에 맞는 크기로 변경한다.
        x1, y1, _, _ = np.round(new_head_box * ratio).astype(np.uint8)
        # 양쪽포함이라서 -1을 해준다.
        new_head_box = (
            x1,
            y1,
            x1 + int(move_head_box_size * ratio) - 1,
            y1 + int(move_head_box_size * ratio) - 1,
        )
        new_model_out = resize_adapt(new_model_out, new_head_box)

        # if verbose and head_box_idx == 0:
        #     print('org head_box:', head_box, ', new_head_box:', new_head_box)
        #     print('alpah2.shape:', model_out.shape, ', new_model_out:', new_model_out.shape)

        return new_model_out, new_head_box

    def compose_one(model_out, full_img, head_box_idx):
        model_out, box = resize_and_scale(model_out, head_box_idx)
        x1, y1, x2, y2 = box
        img = resize_adapt(model_out, (x1, y1, x2, y2))
        if (
            "compose" in template.config.keys()
            and template.config.compose == "face_only"
        ):
            row = df.loc[head_box_idx]
            mask_box = get_face_mask(
                (img.shape[1], img.shape[0]), row, **get_compose_option(template.config)
            )
        else:
            mask_box = get_box_mask(
                x2 - x1 + 1, y2 - y1 + 1, config=args, verbose=verbose
            )

        if y2 - y1 + 1 != img.shape[0] or x2 - x1 + 1 != img.shape[1]:
            raise Exception(
                f"not matched compose shape. x2-x1+1: {x2 - x1 + 1}, y2-y1+1:{y2 - y1 + 1}, img: {img.shape[1]}, {img.shape[0]}"
            )

        # Compose the image
        if full_img.shape[2] == 3:
            alpha = np.zeros_like(full_img[:, :, :1])
            alpha.fill(255)
            full_img = np.concatenate([full_img, alpha], axis=2)

        out_memory = full_img.copy()

        alpha = img[:, :, 3]
        alpha = cv2.merge([alpha, alpha, alpha])

        back = out_memory[y1 : y2 + 1, x1 : x2 + 1].copy()
        front = img[:, :, 0:3]

        img = np.concatenate(
            [np.where(alpha < (255, 255, 255), back[:, :, :3], front), back[:, :, 3:]],
            axis=2,
        )
        out_memory[y1 : y2 + 1, x1 : x2 + 1] = (
            full_img[y1 : y2 + 1, x1 : x2 + 1] * mask_box["origin"]
            + img * mask_box["crop"]
        )
        return out_memory

    return compose_one


def get_compose_func_without_keying_default(template, ratio, verbose=False):
    args = template.model.args
    df = pd.read_pickle(
        f"{template.crop_mp4_dir}/{Path(template.template_video_path).stem}_000/df_fan.pickle"
    )
    # sz = df['cropped_size'].values[0]
    # 원래 4k 템플릿에서 축소된 비율만큼 cropped_box 크기를 줄여준다.
    x1, y1, x2, y2 = np.round(np.array(df["cropped_box"].values[0]) * ratio).astype(
        np.uint8
    )
    del df
    mask_box = get_box_mask(x2 - x1 + 1, y2 - y1 + 1, config=args, verbose=verbose)
    img_size = args.img_size
    if verbose:
        print("croped size: ", x2 - x1 + 1, y2 - y1 + 1)
        print("croped region(x1,y1,x2,y2): ", x1, y1, x2, y2)

    def compose_one(model_out, full_img, _):
        img = resize_adapt(model_out, (x1, y1, x2, y2))
        if y2 - y1 + 1 != img.shape[0] or x2 - x1 + 1 != img.shape[1]:
            raise Exception(
                f"not matched compose shape. x2-x1+1: {x2 - x1 + 1}, y2-y1+1:{y2 - y1 + 1}, img: {img.shape[1]}, {img.shape[0]}"
            )

        # 붙여넣기
        if full_img.shape[2] == 3:
            alpha = np.zeros_like(full_img[:, :, :1])
            alpha.fill(255)
            full_img = np.concatenate([full_img, alpha], axis=2)

        out_memory = full_img.copy()

        alpha = img[:, :, 3]
        alpha = cv2.merge([alpha, alpha, alpha])

        back = out_memory[y1 : y2 + 1, x1 : x2 + 1].copy()
        front = img[:, :, 0:3]

        img = np.concatenate(
            [np.where(alpha < (255, 255, 255), back[:, :, :3], front), back[:, :, 3:]],
            axis=2,
        )

        out_memory[y1 : y2 + 1, x1 : x2 + 1] = (
            full_img[y1 : y2 + 1, x1 : x2 + 1] * mask_box["origin"]
            + img * mask_box["crop"]
        )
        return out_memory

    return compose_one


def get_compose_option(config):
    blur_ratio = 0.3
    dilate_ratio = 0.2
    erosion_ratio = 0.0
    if "compose_args" in config.keys():
        if "blur_ratio" in config.compose_args.keys():
            blur_ratio = config.compose_args.blur_ratio
        if "dilate_ratio" in config.compose_args.keys():
            dilate_ratio = config.compose_args.dilate_ratio
        if "erosion_ratio" in config.compose_args.keys():
            erosion_ratio = config.compose_args.erosion_ratio
    return {
        "blur_ratio": blur_ratio,
        "dilate_ratio": dilate_ratio,
        "erosion_ratio": erosion_ratio,
    }


def get_compose_func_without_keying_face_only(template, ratio, verbose=False):
    df = pd.read_pickle(
        f"{template.crop_mp4_dir}/{Path(template.template_video_path).stem}_000/df_fan.pickle"
    )
    x1, y1, x2, y2 = np.round(np.array(df["cropped_box"].values[0]) * ratio).astype(
        np.uint8
    )

    df = df.set_index("frame_idx")
    if verbose:
        print("get_compose_option")
        print(get_compose_option(template.config))

    def compose_one(model_out, full_img, head_box_idx):
        try:
            row = df.loc[head_box_idx]
        except Exception as e:
            print("exception get_compose_func_without_keying_face_only", e)
            raise Exception("exception get_compose_func_without_keying_face_only", e)
        img = resize_adapt(model_out, (x1, y1, x2, y2))
        if y2 - y1 + 1 != img.shape[0] or x2 - x1 + 1 != img.shape[1]:
            raise Exception(
                f"not matched compose shape. x2-x1+1: {x2 - x1 + 1}, y2-y1+1:{y2 - y1 + 1}, img: {img.shape[1]}, {img.shape[0]}"
            )

        mask_box = get_face_mask(
            (img.shape[1], img.shape[0]), row, **get_compose_option(template.config)
        )
        # 붙여넣기
        out_memory = full_img.copy()
        out_memory[y1 : y2 + 1, x1 : x2 + 1] = (
            full_img[y1 : y2 + 1, x1 : x2 + 1] * mask_box["origin"]
            + img * mask_box["crop"]
        )
        return out_memory

    return compose_one


# template video 의 frame 과 model inference 결과를 합성하는 함수를 리턴한다.
# params
#     ratio : 템플릿 scale 비율.
#             1.0: 템플릿 크기 그대로
#             0.5: width, height 를 절반으로 줄인 크기
def get_compose_func_without_keying(template, ratio, verbose=False):
    if "move" in template.config.keys() and template.config.move:
        return get_compose_func_without_keying_move(
            template=template, ratio=ratio, verbose=verbose
        )
    if "compose" in template.config.keys() and template.config.compose == "face_only":
        return get_compose_func_without_keying_face_only(
            template=template, ratio=ratio, verbose=verbose
        )

    return get_compose_func_without_keying_default(
        template=template, ratio=ratio, verbose=verbose
    )


def compose_direct(box, model_args, ratio, model_out, full_img):
    x1, y1, x2, y2 = box
    mask_box = get_box_mask(x2 - x1 + 1, y2 - y1 + 1, config=model_args)
    img_size = model_args.img_size

    img = resize_adapt(model_out, (x1, y1, x2, y2))
    if y2 - y1 + 1 != img.shape[0] or x2 - x1 + 1 != img.shape[1]:
        raise Exception(
            f"not matched compose shape. x2-x1+1: {x2 - x1 + 1}, y2-y1+1:{y2 - y1 + 1}, img: {img.shape[1]}, {img.shape[0]}"
        )

    # 붙여넣기
    out_memory = full_img.copy()
    out_memory[y1 : y2 + 1, x1 : x2 + 1] = (
        full_img[y1 : y2 + 1, x1 : x2 + 1] * mask_box["origin"] + img * mask_box["crop"]
    )
    return out_memory


def keying_direct(model_args, pred, box=None):
    cromakey_func = get_cromakey_func(model_args)
    compose_func = get_compose_mask_func(model_args)

    model_out, mask, alpha = pred["pred"], pred["mask"], pred["img_gt_with_alpha"]

    if pred["filename"].endswith("_no.jpg") or pred["filename"].endswith("_no.png"):
        return alpha[:, :, [2, 1, 0, 3]]

    if (
        alpha.shape[0] != mask.shape[0]
        or alpha.shape[1] != mask.shape[1]
        or alpha.shape[0] != model_out.shape[0]
        or alpha.shape[1] != model_out.shape[1]
        or alpha.shape[2] != 4
    ):
        raise Exception(
            f"not matched keying shape. "
            f"alpha: {alpha.shape[0]}, {alpha.shape[1]}, {alpha.shape[2]}, "
            f"mask: {mask.shape[0]}, {mask.shape[1]}, "
            f"model_out: {model_out.shape[0]}, {model_out.shape[1]}"
        )

    if box is not None:
        model_h = model_out.shape[0]
        box_h = box[3] - box[1]
        if box_h > model_h:
            model_out = resize_adapt(model_out, box)
            mask = resize_adapt(mask, box)
            alpha = resize_adapt(alpha, box)
    model_out = cromakey_func(model_out)
    model_out = compose_func(model_out=model_out, org_image_with_alpha=alpha, mask=mask)

    return model_out