from pathlib import Path import cv2 import numpy as np import pandas as pd def maskblur(mask, kernel_size, sigma=1): mask_blur = cv2.GaussianBlur(mask, (kernel_size, kernel_size), sigma) return mask_blur def erosion(mask, kernel_size): kernel = np.ones((kernel_size, kernel_size), np.uint8) erosion_image = cv2.erode(mask, kernel, iterations=1) # // make erosion image return erosion_image def dilate(mask, kernel_size): kernel = np.ones((kernel_size, kernel_size), np.uint8) erosion_image = cv2.dilate(mask, kernel, iterations=1) # // make erosion image return erosion_image def resize_adapt(model_out, crop_region): def inter_alg(target_size, img): if isinstance(target_size, tuple): w, h = target_size else: w, h = target_size, target_size return inter_alg_(w, h, img) def inter_alg_(w, h, img): if w * h < img.shape[0] * img.shape[1]: return cv2.INTER_AREA else: return cv2.INTER_CUBIC x1, y1, x2, y2 = crop_region h, w = y2 - y1 + 1, x2 - x1 + 1 sz = model_out.shape[0] # h,w 동일하다. if h == sz and w == sz: return model_out r = max(h, w) / sz max_hw = max(h, w) temp_ = cv2.resize(model_out, (max_hw, max_hw), inter_alg(max_hw, model_out)) temp_ = temp_[ (max_hw - h) // 2 : (max_hw - h) // 2 + h, (max_hw - w) // 2 : (max_hw - w) // 2 + w, ] return temp_ def get_face_mask( img_size, df_fan_row, blur_ratio=0.3, dilate_ratio=0.2, erosion_ratio=0 ): assert blur_ratio >= 0 and blur_ratio <= 1 assert erosion_ratio >= 0 and erosion_ratio <= 1 assert dilate_ratio >= 0 and dilate_ratio <= 1 def _masking(img, pts, value): img = cv2.fillPoly(img, [pts], value) return img def _get_face_pts_n_box(img_size, df_fan_row): box = df_fan_row["cropped_box"] pts2d = df_fan_row["pts2d"] - np.array([box[0], box[1]]) if isinstance(df_fan_row["cropped_size"], float): cropped_size = df_fan_row["cropped_size"] else: cropped_size = df_fan_row["cropped_size"][0] ratio = img_size[0] / cropped_size pts2d = pts2d * ratio xs, ys = pts2d[:, 0], pts2d[:, 1] l, t, r, b = min(xs), min(ys), max(xs), max(ys) return np.concatenate([pts2d[0:17, :], pts2d[17:27, :][::-1]]).astype( np.uint832 ), (l, t, r, b) if df_fan_row["pts2d"] is None: mask = np.zeros((img_size[1], img_size[0]), dtype=np.uint8) if len(mask.shape) == 2: mask = np.expand_dims(mask, axis=2) return {"crop": mask, "origin": 1 - mask} pts, box = _get_face_pts_n_box(img_size, df_fan_row) h = max(box[2] - box[0], box[3] - box[1]) mask = np.zeros((img_size[1], img_size[0]), dtype=np.uint8) mask = _masking(mask, pts, (255)) if dilate_ratio != 0: mask = dilate(mask, int(h * dilate_ratio) // 2 * 2 + 1) if erosion_ratio != 0: mask = erosion(mask, int(h * erosion_ratio) // 2 * 2 + 1) if blur_ratio != 0: blur_kernel_size = int(h * blur_ratio) // 2 * 2 + 1 mask = maskblur(mask, blur_kernel_size, 0) mask = mask / 255 if len(mask.shape) == 2: mask = np.expand_dims(mask, axis=2) return {"crop": mask, "origin": 1 - mask} def cromakey_green(img): r = img[:, :, 0] g = img[:, :, 1] b = img[:, :, 2] g_alpha = g > 50 r_alpha = (g * 1.0) > r b_alpha = (g * 0.7) > b alpha = g_alpha & (r_alpha & b_alpha) alpha = (1 - alpha) * 255 alpha = alpha.astype(np.uint8) alpha = maskblur(alpha, kernel_size=13) alpha[np.where(alpha > 100)] = 255 alpha = erosion(alpha, kernel_size=5) if len(alpha.shape) == 2: alpha2 = np.expand_dims(alpha, axis=2) else: alpha2 = alpha new = np.concatenate((img, alpha2), axis=2) return new def cromakey_green_binary(img): img = cromakey_green(img) alpha = img[:, :, 3] alpha[np.where(alpha <= 128)] = 0 alpha[np.where(alpha > 128)] = 1 def cromakey_green_hunet_lmy(img): r = img[:, :, 0] g = img[:, :, 1] b = img[:, :, 2] g_alpha = g > 70 r_alpha = g > r b_alpha = (g * 0.8) > b alpha = g_alpha & (r_alpha & b_alpha) alpha = (1 - alpha) * 255 alpha = alpha.astype(np.uint8) alpha = maskblur(alpha, kernel_size=11) alpha[np.where(alpha > 100)] = 255 alpha = maskblur(alpha, kernel_size=3) alpha = erosion(alpha, kernel_size=3) if len(alpha.shape) == 2: alpha2 = np.expand_dims(alpha, axis=2) else: alpha2 = alpha new = np.concatenate((img, alpha2), axis=2) return new # ybm 영상용 크로마키 함수 def cromakey_green_ybm_front(img): r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] g_alpha = g > 70 # r_alpha = (g * 0.7) > r # b_alpha = (g * 0.7) > b r_alpha = g > r b_alpha = (g * 0.9) > b alpha = g_alpha & (r_alpha & b_alpha) alpha = (1 - alpha) * 255 alpha = alpha.astype(np.uint8) alpha = maskblur(alpha, kernel_size=11) alpha[np.where(alpha > 100)] = 255 alpha = maskblur(alpha, kernel_size=3) alpha = maskblur(alpha, kernel_size=3) grey_alpha = alpha < 255 g[grey_alpha] = r[grey_alpha] * 0.8 if len(alpha.shape) == 2: alpha2 = np.expand_dims(alpha, axis=2) else: alpha2 = alpha new = np.concatenate((img, alpha2), axis=2) return new # ybm 영상용 크로마키 함수 def cromakey_green_ybm_side(img): img = img.copy() r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] g_alpha = g > 50 r_alpha = g > r b_alpha = (g * 0.9) > b alpha = g_alpha & (r_alpha & b_alpha) alpha = (1 - alpha) * 255 alpha = alpha.astype(np.uint8) alpha = maskblur(alpha, kernel_size=11) alpha[np.where(alpha > 100)] = 255 alpha = maskblur(alpha, kernel_size=3) alpha = maskblur(alpha, kernel_size=3) grey_alpha = alpha < 255 g[grey_alpha] = r[grey_alpha] * 0.8 if len(alpha.shape) == 2: alpha2 = np.expand_dims(alpha, axis=2) else: alpha2 = alpha new = np.concatenate((img, alpha2), axis=2) return new # devin 영상용 크로마키 함수 def cromakey_green_devin_side(img): img = img.copy() r, g, b = img[:, :, 0], img[:, :, 1], img[:, :, 2] g_alpha = g > 70 r_alpha = (g * 0.8) > r # r_alpha = g > r b_alpha = (g * 0.9) > b alpha = g_alpha & (r_alpha & b_alpha) alpha = (1 - alpha) * 255 alpha = alpha.astype(np.uint8) alpha = maskblur(alpha, kernel_size=7, sigma=3) alpha[np.where(alpha < 150)] = 0 alpha = maskblur(alpha, kernel_size=5, sigma=2) if len(alpha.shape) == 2: alpha = np.expand_dims(alpha, axis=2) new = np.concatenate((img, alpha), axis=2) return new def get_cromakey_func(args): if "cromakey" not in args.keys(): return cromakey_green_hunet_lmy if "cromakey_green_ybm_front" == args.cromakey: return cromakey_green_ybm_front if "cromakey_green_ybm_side" == args.cromakey: return cromakey_green_ybm_side if "cromakey_green_devin_side" == args.cromakey: return cromakey_green_devin_side raise "cromakey not found" def compose_default_(model_out, org_image_with_alpha, mask, **kwargs): # 1. 마스크 섞기 : 원래 비디오의 투명값과 계산한 마스크를 섞는다. mask = mask[:, :, 0] mask[np.where(mask > 0)] = 1 # 마스크 영역을 128 -> 1 로 만든다. model_out[:, :, 3] = ( org_image_with_alpha[:, :, 3] * (1 - mask) + model_out[:, :, 3] * mask ) # 2. 섞인 마스크가 좀 자연스럽게 섞이도록 함. model_out[:, :, 3] = maskblur(model_out[:, :, 3], kernel_size=3, sigma=1) return model_out def compose_devin_(model_out, org_image_with_alpha, mask, debug=False, **kwargs): mask = mask[:, :, 0] mask[np.where(mask > 0)] = 1 # 마스크 영역을 128 -> 1 로 만든다. mask = mask.astype(np.float32) # 1. 기존마스크와 경계가 잘 안보이도록 마스크를 부드럽게 만든다. kernel_size = int(mask.shape[0] * 0.03) // 2 * 2 + 1 # 이미지 크기의 3% 정도 마스크를 확장한다. if debug: print( f"## compose_devin_: kernel_size:{kernel_size}, mask_height:{mask.shape[0]}" ) if kernel_size >= 3: mask = dilate(mask, kernel_size=kernel_size) mask = maskblur(mask, kernel_size=kernel_size, sigma=kernel_size // 2) mask = maskblur(mask, kernel_size=kernel_size, sigma=kernel_size // 2) mask = erosion(mask, kernel_size=3) # 1pixel 만 줄임 # 2. 마스크 섞기 : 원래 비디오의 투명값과 계산한 마스크를 섞는다. model_out[:, :, 3] = ( org_image_with_alpha[:, :, 3] * (1 - mask) + model_out[:, :, 3] * mask ) # 3. 섞인 마스크가 부드럽게 한번더 블러를 한다. model_out[:, :, 3] = maskblur(model_out[:, :, 3], kernel_size=3, sigma=1) return model_out def get_compose_mask_func(args): if "cromakey" in args.keys(): if "cromakey_green_devin_side" == args.cromakey: return compose_devin_ if "compose" in args.keys(): if "compose_smooth" == args.compose: return compose_devin_ return compose_default_ def get_keying_func(template): cromakey_func = get_cromakey_func(template.model.args) compose_func = get_compose_mask_func(template.model.args) def keying_(pred, idx, box=None): model_out, mask, alpha = pred["pred"], pred["mask"], pred["img_gt_with_alpha"] if pred["filename"].endswith("_no.jpg") or pred["filename"].endswith("_no.png"): return alpha[:, :, [2, 1, 0, 3]] if ( alpha.shape[0] != mask.shape[0] or alpha.shape[1] != mask.shape[1] or alpha.shape[0] != model_out.shape[0] or alpha.shape[1] != model_out.shape[1] ): raise Exception( f"not matched keying shape. " f"alpha: {alpha.shape[0]}, {alpha.shape[1]}, {alpha.shape[2]}, " f"mask: {mask.shape[0]}, {mask.shape[1]}, " f"model_out: {model_out.shape[0]}, {model_out.shape[1]}" ) if box is not None: model_h = model_out.shape[0] box_h = box[3] - box[1] if box_h > model_h: model_out = resize_adapt(model_out, box) mask = resize_adapt(mask, box) alpha = resize_adapt(alpha, box) model_out = cromakey_func(model_out) model_out = compose_func( model_out=model_out, org_image_with_alpha=alpha, mask=mask ) return model_out return keying_ def get_box_mask(width, height, config, verbose=False): def get_mask_( width, height, gradation_width, gradation_bottom=None, box_mask_erosion=None ): mask = np.ones((height, width, 1)) r = list(range(0, gradation_width, 1)) for s, e in zip(r, r[1:]): g = s / gradation_width # print(f'---- s:{s}, e:{e}, g:{g}') mask[s:e, s : width - s, :] = g mask[height - e : height - s, s : width - s, :] = g mask[s : height - s, s:e, :] = g mask[s : height - s, width - e : width - s, :] = g if gradation_bottom is not None: r = list(range(0, gradation_bottom, 1)) for s, e in zip(r, r[1:]): g = s / gradation_bottom mask[height - e : height - s, s : width - s, :] = g if box_mask_erosion is not None: mask = erosion(mask, box_mask_erosion * 2 + 1) if len(mask.shape) == 2: mask = np.expand_dims(mask, 2) # mask shape ex: (352,352,1) return mask gradation_width = int(height * 0.1) gradation_bottom = ( int(height * config["gradation_bottom"]) if "gradation_bottom" in config.keys() else None ) box_mask_erosion = ( int(height * config["box_mask_erosion"]) if "box_mask_erosion" in config.keys() else None ) # if verbose: # print('gradation_width : ', gradation_width) # print('gradation_bottom : ', gradation_bottom) # print('box_mask_erosion : ', box_mask_erosion) mask = get_mask_(width, height, gradation_width, gradation_bottom, box_mask_erosion) mask_crop = mask mask_origin = 1 - mask return {"crop": mask_crop, "origin": mask_origin} def get_compose_func_without_keying_move(template, ratio, verbose=False): args = template.model.args df = pd.read_pickle( f"{template.crop_mp4_dir}/{Path(template.template_video_path).stem}_000/df_fan.pickle" ) df = df.set_index("frame_idx") move_head_box_size = ( (df.loc[0]["cropped_box"][2] - df.loc[0]["cropped_box"][0] - 20) // 10 * 10 ) def resize_and_scale(model_out, head_box_idx): # ratio 1.0 에 맞는 크기로 resize 하고, # 원래 영상에서 10의 배수에 해당하는 위치로 (head_box, model_out) 모두 잘라낸다. head_box = df["cropped_box"][head_box_idx] if ratio == 1.0: return model_out, head_box # 일단 원래 크기로 만든다. model_out = resize_adapt(model_out, head_box) # 원래 크기에서의 박스에서 10의 배수에 해당하는 좌표를 찾는다. l, t = (np.array(head_box[:2]) + 9) // 10 * 10 new_head_box = np.array( [l, t, l + move_head_box_size - 1, t + move_head_box_size - 1] ) # 양쪽포함이라서 1을 빼준다. # 10의 배수에 맞춰서 이미지를 잘라낸다. diff_box = new_head_box - head_box new_model_out = model_out[diff_box[1] : diff_box[3], diff_box[0] : diff_box[2]] # if verbose and head_box_idx == 0: # print('org head_box:', head_box, ', new_head_box:', new_head_box) # print('alpah2.shape:', model_out.shape, ', new_model_out:', new_model_out.shape) if ( new_model_out.shape[0] % 10 != 0 or new_model_out.shape[1] % 10 != 0 ): # 크기는 10의 배수여야 한다. raise Exception(f"new_model_out.shape % 10 != 0, {new_model_out.shape}") # ratio에 맞는 크기로 변경한다. x1, y1, _, _ = np.round(new_head_box * ratio).astype(np.uint8) # 양쪽포함이라서 -1을 해준다. new_head_box = ( x1, y1, x1 + int(move_head_box_size * ratio) - 1, y1 + int(move_head_box_size * ratio) - 1, ) new_model_out = resize_adapt(new_model_out, new_head_box) # if verbose and head_box_idx == 0: # print('org head_box:', head_box, ', new_head_box:', new_head_box) # print('alpah2.shape:', model_out.shape, ', new_model_out:', new_model_out.shape) return new_model_out, new_head_box def compose_one(model_out, full_img, head_box_idx): model_out, box = resize_and_scale(model_out, head_box_idx) x1, y1, x2, y2 = box img = resize_adapt(model_out, (x1, y1, x2, y2)) if ( "compose" in template.config.keys() and template.config.compose == "face_only" ): row = df.loc[head_box_idx] mask_box = get_face_mask( (img.shape[1], img.shape[0]), row, **get_compose_option(template.config) ) else: mask_box = get_box_mask( x2 - x1 + 1, y2 - y1 + 1, config=args, verbose=verbose ) if y2 - y1 + 1 != img.shape[0] or x2 - x1 + 1 != img.shape[1]: raise Exception( f"not matched compose shape. x2-x1+1: {x2 - x1 + 1}, y2-y1+1:{y2 - y1 + 1}, img: {img.shape[1]}, {img.shape[0]}" ) # Compose the image if full_img.shape[2] == 3: alpha = np.zeros_like(full_img[:, :, :1]) alpha.fill(255) full_img = np.concatenate([full_img, alpha], axis=2) out_memory = full_img.copy() alpha = img[:, :, 3] alpha = cv2.merge([alpha, alpha, alpha]) back = out_memory[y1 : y2 + 1, x1 : x2 + 1].copy() front = img[:, :, 0:3] img = np.concatenate( [np.where(alpha < (255, 255, 255), back[:, :, :3], front), back[:, :, 3:]], axis=2, ) out_memory[y1 : y2 + 1, x1 : x2 + 1] = ( full_img[y1 : y2 + 1, x1 : x2 + 1] * mask_box["origin"] + img * mask_box["crop"] ) return out_memory return compose_one def get_compose_func_without_keying_default(template, ratio, verbose=False): args = template.model.args df = pd.read_pickle( f"{template.crop_mp4_dir}/{Path(template.template_video_path).stem}_000/df_fan.pickle" ) # sz = df['cropped_size'].values[0] # 원래 4k 템플릿에서 축소된 비율만큼 cropped_box 크기를 줄여준다. x1, y1, x2, y2 = np.round(np.array(df["cropped_box"].values[0]) * ratio).astype( np.uint8 ) del df mask_box = get_box_mask(x2 - x1 + 1, y2 - y1 + 1, config=args, verbose=verbose) img_size = args.img_size if verbose: print("croped size: ", x2 - x1 + 1, y2 - y1 + 1) print("croped region(x1,y1,x2,y2): ", x1, y1, x2, y2) def compose_one(model_out, full_img, _): img = resize_adapt(model_out, (x1, y1, x2, y2)) if y2 - y1 + 1 != img.shape[0] or x2 - x1 + 1 != img.shape[1]: raise Exception( f"not matched compose shape. x2-x1+1: {x2 - x1 + 1}, y2-y1+1:{y2 - y1 + 1}, img: {img.shape[1]}, {img.shape[0]}" ) # 붙여넣기 if full_img.shape[2] == 3: alpha = np.zeros_like(full_img[:, :, :1]) alpha.fill(255) full_img = np.concatenate([full_img, alpha], axis=2) out_memory = full_img.copy() alpha = img[:, :, 3] alpha = cv2.merge([alpha, alpha, alpha]) back = out_memory[y1 : y2 + 1, x1 : x2 + 1].copy() front = img[:, :, 0:3] img = np.concatenate( [np.where(alpha < (255, 255, 255), back[:, :, :3], front), back[:, :, 3:]], axis=2, ) out_memory[y1 : y2 + 1, x1 : x2 + 1] = ( full_img[y1 : y2 + 1, x1 : x2 + 1] * mask_box["origin"] + img * mask_box["crop"] ) return out_memory return compose_one def get_compose_option(config): blur_ratio = 0.3 dilate_ratio = 0.2 erosion_ratio = 0.0 if "compose_args" in config.keys(): if "blur_ratio" in config.compose_args.keys(): blur_ratio = config.compose_args.blur_ratio if "dilate_ratio" in config.compose_args.keys(): dilate_ratio = config.compose_args.dilate_ratio if "erosion_ratio" in config.compose_args.keys(): erosion_ratio = config.compose_args.erosion_ratio return { "blur_ratio": blur_ratio, "dilate_ratio": dilate_ratio, "erosion_ratio": erosion_ratio, } def get_compose_func_without_keying_face_only(template, ratio, verbose=False): df = pd.read_pickle( f"{template.crop_mp4_dir}/{Path(template.template_video_path).stem}_000/df_fan.pickle" ) x1, y1, x2, y2 = np.round(np.array(df["cropped_box"].values[0]) * ratio).astype( np.uint8 ) df = df.set_index("frame_idx") if verbose: print("get_compose_option") print(get_compose_option(template.config)) def compose_one(model_out, full_img, head_box_idx): try: row = df.loc[head_box_idx] except Exception as e: print("exception get_compose_func_without_keying_face_only", e) raise Exception("exception get_compose_func_without_keying_face_only", e) img = resize_adapt(model_out, (x1, y1, x2, y2)) if y2 - y1 + 1 != img.shape[0] or x2 - x1 + 1 != img.shape[1]: raise Exception( f"not matched compose shape. x2-x1+1: {x2 - x1 + 1}, y2-y1+1:{y2 - y1 + 1}, img: {img.shape[1]}, {img.shape[0]}" ) mask_box = get_face_mask( (img.shape[1], img.shape[0]), row, **get_compose_option(template.config) ) # 붙여넣기 out_memory = full_img.copy() out_memory[y1 : y2 + 1, x1 : x2 + 1] = ( full_img[y1 : y2 + 1, x1 : x2 + 1] * mask_box["origin"] + img * mask_box["crop"] ) return out_memory return compose_one # template video 의 frame 과 model inference 결과를 합성하는 함수를 리턴한다. # params # ratio : 템플릿 scale 비율. # 1.0: 템플릿 크기 그대로 # 0.5: width, height 를 절반으로 줄인 크기 def get_compose_func_without_keying(template, ratio, verbose=False): if "move" in template.config.keys() and template.config.move: return get_compose_func_without_keying_move( template=template, ratio=ratio, verbose=verbose ) if "compose" in template.config.keys() and template.config.compose == "face_only": return get_compose_func_without_keying_face_only( template=template, ratio=ratio, verbose=verbose ) return get_compose_func_without_keying_default( template=template, ratio=ratio, verbose=verbose ) def compose_direct(box, model_args, ratio, model_out, full_img): x1, y1, x2, y2 = box mask_box = get_box_mask(x2 - x1 + 1, y2 - y1 + 1, config=model_args) img_size = model_args.img_size img = resize_adapt(model_out, (x1, y1, x2, y2)) if y2 - y1 + 1 != img.shape[0] or x2 - x1 + 1 != img.shape[1]: raise Exception( f"not matched compose shape. x2-x1+1: {x2 - x1 + 1}, y2-y1+1:{y2 - y1 + 1}, img: {img.shape[1]}, {img.shape[0]}" ) # 붙여넣기 out_memory = full_img.copy() out_memory[y1 : y2 + 1, x1 : x2 + 1] = ( full_img[y1 : y2 + 1, x1 : x2 + 1] * mask_box["origin"] + img * mask_box["crop"] ) return out_memory def keying_direct(model_args, pred, box=None): cromakey_func = get_cromakey_func(model_args) compose_func = get_compose_mask_func(model_args) model_out, mask, alpha = pred["pred"], pred["mask"], pred["img_gt_with_alpha"] if pred["filename"].endswith("_no.jpg") or pred["filename"].endswith("_no.png"): return alpha[:, :, [2, 1, 0, 3]] if ( alpha.shape[0] != mask.shape[0] or alpha.shape[1] != mask.shape[1] or alpha.shape[0] != model_out.shape[0] or alpha.shape[1] != model_out.shape[1] or alpha.shape[2] != 4 ): raise Exception( f"not matched keying shape. " f"alpha: {alpha.shape[0]}, {alpha.shape[1]}, {alpha.shape[2]}, " f"mask: {mask.shape[0]}, {mask.shape[1]}, " f"model_out: {model_out.shape[0]}, {model_out.shape[1]}" ) if box is not None: model_h = model_out.shape[0] box_h = box[3] - box[1] if box_h > model_h: model_out = resize_adapt(model_out, box) mask = resize_adapt(mask, box) alpha = resize_adapt(alpha, box) model_out = cromakey_func(model_out) model_out = compose_func(model_out=model_out, org_image_with_alpha=alpha, mask=mask) return model_out