import sys import os import warnings sys.path.append(os.path.abspath(os.path.join(__file__, "..", ".."))) sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "FGT"))) sys.path.append(os.path.abspath(os.path.join(__file__, "..", "..", "LAFC"))) warnings.filterwarnings("ignore") import cvbase from torchvision.transforms import ToTensor from tool import get_flowNN_gradient from tool.utils.Poisson_blend_img import Poisson_blend_img from tool.utils.region_fill import regionfill from importlib import import_module import yaml from RAFT import RAFT from RAFT import utils import torch.nn.functional as F2 import torchvision.transforms.functional as F from skimage.feature import canny import scipy.ndimage from PIL import Image import imageio import torch import numpy as np import copy import glob import cv2 import argparse def to_tensor(img): img = Image.fromarray(img) img_t = F.to_tensor(img).float() return img_t def diffusion(flows, masks): flows_filled = [] for i in range(flows.shape[0]): flow, mask = flows[i], masks[i] flow_filled = np.zeros(flow.shape) flow_filled[:, :, 0] = regionfill(flow[:, :, 0], mask[:, :, 0]) flow_filled[:, :, 1] = regionfill(flow[:, :, 1], mask[:, :, 0]) flows_filled.append(flow_filled) return flows_filled def np2tensor(array, near="c"): if isinstance(array, list): array = np.stack(array, axis=0) # [t, h, w, c] if near == "c": array = ( torch.from_numpy(np.transpose(array, (3, 0, 1, 2))).unsqueeze(0).float() ) # [1, c, t, h, w] elif near == "t": array = torch.from_numpy(np.transpose(array, (0, 3, 1, 2))).unsqueeze(0).float() else: raise ValueError(f"Unknown near type: {near}") return array def tensor2np(array): array = torch.stack(array, dim=-1).squeeze(0).permute(1, 2, 0, 3).cpu().numpy() return array def gradient_mask(mask): gradient_mask = np.logical_or.reduce( ( mask, np.concatenate( (mask[1:, :], np.zeros((1, mask.shape[1]), dtype=np.bool)), axis=0 ), np.concatenate( (mask[:, 1:], np.zeros((mask.shape[0], 1), dtype=np.bool)), axis=1 ), ) ) return gradient_mask def indicesGen(pivot, interval, frames, t): singleSide = frames // 2 results = [] for i in range(-singleSide, singleSide + 1): index = pivot + interval * i if index < 0: index = abs(index) if index > t - 1: index = 2 * (t - 1) - index results.append(index) return results def get_ref_index(f, neighbor_ids, length, ref_length, num_ref): ref_index = [] if num_ref == -1: for i in range(0, length, ref_length): if i not in neighbor_ids: ref_index.append(i) else: start_idx = max(0, f - ref_length * (num_ref // 2)) end_idx = min(length, f + ref_length * (num_ref // 2)) for i in range(start_idx, end_idx + 1, ref_length): if i not in neighbor_ids: if len(ref_index) > num_ref: break ref_index.append(i) return ref_index def save_flows(output, videoFlowF, videoFlowB): create_dir(os.path.join(output, "completed_flow", "forward_flo")) create_dir(os.path.join(output, "completed_flow", "backward_flo")) create_dir(os.path.join(output, "completed_flow", "forward_png")) create_dir(os.path.join(output, "completed_flow", "backward_png")) N = videoFlowF.shape[-1] for i in range(N): forward_flow = videoFlowF[..., i] backward_flow = videoFlowB[..., i] forward_flow_vis = cvbase.flow2rgb(forward_flow) backward_flow_vis = cvbase.flow2rgb(backward_flow) cvbase.write_flow( forward_flow, os.path.join( output, "completed_flow", "forward_flo", "{:05d}.flo".format(i) ), ) cvbase.write_flow( backward_flow, os.path.join( output, "completed_flow", "backward_flo", "{:05d}.flo".format(i) ), ) imageio.imwrite( os.path.join( output, "completed_flow", "forward_png", "{:05d}.png".format(i) ), forward_flow_vis, ) imageio.imwrite( os.path.join( output, "completed_flow", "backward_png", "{:05d}.png".format(i) ), backward_flow_vis, ) def save_fgcp(output, frames, masks): create_dir(os.path.join(output, "prop_frames")) create_dir(os.path.join(output, "masks_left")) create_dir(os.path.join(output, "prop_frames_npy")) create_dir(os.path.join(output, "masks_left_npy")) assert len(frames) == masks.shape[2] for i in range(len(frames)): cv2.imwrite( os.path.join(output, "prop_frames", "%05d.png" % i), frames[i] * 255.0 ) cv2.imwrite( os.path.join(output, "masks_left", "%05d.png" % i), masks[:, :, i] * 255.0 ) np.save( os.path.join(output, "prop_frames_npy", "%05d.npy" % i), frames[i] * 255.0 ) np.save( os.path.join(output, "masks_left_npy", "%05d.npy" % i), masks[:, :, i] * 255.0, ) def create_dir(dir): """Creates a directory if not exist.""" if not os.path.exists(dir): os.makedirs(dir) def initialize_RAFT(args, device): """Initializes the RAFT model.""" model = torch.nn.DataParallel(RAFT(args)) model.load_state_dict(torch.load(args.raft_model)) model = model.module model.to(device) model.eval() return model def initialize_LAFC(args, device): print(args.lafc_ckpts) assert len(os.listdir(args.lafc_ckpts)) == 2 checkpoint, config_file = ( glob.glob(os.path.join(args.lafc_ckpts, "*.tar"))[0], glob.glob(os.path.join(args.lafc_ckpts, "*.yaml"))[0], ) with open(config_file, "r") as f: configs = yaml.full_load(f) model = configs["model"] pkg = import_module("LAFC.models.{}".format(model)) model = pkg.Model(configs) state = torch.load( checkpoint, map_location=lambda storage, loc: storage.cuda(device) ) model.load_state_dict(state["model_state_dict"]) model = model.to(device) return model, configs def initialize_FGT(args, device): assert len(os.listdir(args.fgt_ckpts)) == 2 checkpoint, config_file = ( glob.glob(os.path.join(args.fgt_ckpts, "*.tar"))[0], glob.glob(os.path.join(args.fgt_ckpts, "*.yaml"))[0], ) with open(config_file, "r") as f: configs = yaml.full_load(f) model = configs["model"] net = import_module("FGT.models.{}".format(model)) model = net.Model(configs).to(device) state = torch.load( checkpoint, map_location=lambda storage, loc: storage.cuda(device) ) model.load_state_dict(state["model_state_dict"]) return model, configs def calculate_flow(args, model, video, mode): """Calculates optical flow.""" if mode not in ["forward", "backward"]: raise NotImplementedError imgH, imgW = args.imgH, args.imgW Flow = np.empty(((imgH, imgW, 2, 0)), dtype=np.float32) if args.vis_flows: create_dir(os.path.join(args.outroot, "flow", mode + "_flo")) create_dir(os.path.join(args.outroot, "flow", mode + "_png")) with torch.no_grad(): for i in range(video.shape[0] - 1): print( "Calculating {0} flow {1:2d} <---> {2:2d}".format(mode, i, i + 1), "\r", end="", ) if mode == "forward": # Flow i -> i + 1 image1 = video[i, None] image2 = video[i + 1, None] elif mode == "backward": # Flow i + 1 -> i image1 = video[i + 1, None] image2 = video[i, None] else: raise NotImplementedError _, flow = model(image1, image2, iters=20, test_mode=True) flow = flow[0].permute(1, 2, 0).cpu().numpy() # resize optical flows h, w = flow.shape[:2] if h != imgH or w != imgW: flow = cv2.resize(flow, (imgW, imgH), cv2.INTER_LINEAR) flow[:, :, 0] *= float(imgW) / float(w) flow[:, :, 1] *= float(imgH) / float(h) Flow = np.concatenate((Flow, flow[..., None]), axis=-1) if args.vis_flows: # Flow visualization. flow_img = utils.flow_viz.flow_to_image(flow) flow_img = Image.fromarray(flow_img) # Saves the flow and flow_img. flow_img.save( os.path.join(args.outroot, "flow", mode + "_png", "%05d.png" % i) ) utils.frame_utils.writeFlow( os.path.join(args.outroot, "flow", mode + "_flo", "%05d.flo" % i), flow, ) return Flow def extrapolation(args, video_ori, corrFlowF_ori, corrFlowB_ori): """Prepares the data for video extrapolation.""" imgH, imgW, _, nFrame = video_ori.shape # Defines new FOV. imgH_extr = int(args.H_scale * imgH) imgW_extr = int(args.W_scale * imgW) imgH_extr = imgH_extr - imgH_extr % 4 imgW_extr = imgW_extr - imgW_extr % 4 H_start = int((imgH_extr - imgH) / 2) W_start = int((imgW_extr - imgW) / 2) # Generates the mask for missing region. flow_mask = np.ones(((imgH_extr, imgW_extr)), dtype=np.bool) flow_mask[H_start : H_start + imgH, W_start : W_start + imgW] = 0 mask_dilated = gradient_mask(flow_mask) # Extrapolates the FOV for video. video = np.zeros(((imgH_extr, imgW_extr, 3, nFrame)), dtype=np.float32) video[H_start : H_start + imgH, W_start : W_start + imgW, :, :] = video_ori for i in range(nFrame): print("Preparing frame {0}".format(i), "\r", end="") video[:, :, :, i] = ( cv2.inpaint( (video[:, :, :, i] * 255).astype(np.uint8), flow_mask.astype(np.uint8), 3, cv2.INPAINT_TELEA, ).astype(np.float32) / 255.0 ) # Extrapolates the FOV for flow. corrFlowF = np.zeros(((imgH_extr, imgW_extr, 2, nFrame - 1)), dtype=np.float32) corrFlowB = np.zeros(((imgH_extr, imgW_extr, 2, nFrame - 1)), dtype=np.float32) corrFlowF[H_start : H_start + imgH, W_start : W_start + imgW, :] = corrFlowF_ori corrFlowB[H_start : H_start + imgH, W_start : W_start + imgW, :] = corrFlowB_ori return ( video, corrFlowF, corrFlowB, flow_mask, mask_dilated, (W_start, H_start), (W_start + imgW, H_start + imgH), ) def complete_flow(config, flow_model, flows, flow_masks, mode, device): if mode not in ["forward", "backward"]: raise NotImplementedError(f"Error flow mode {mode}") flow_masks = np.moveaxis(flow_masks, -1, 0) # [N, H, W] flows = np.moveaxis(flows, -1, 0) # [N, H, W, 2] if len(flow_masks.shape) == 3: flow_masks = flow_masks[:, :, :, np.newaxis] if mode == "forward": flow_masks = flow_masks[0:-1] else: flow_masks = flow_masks[1:] num_flows, flow_interval = config["num_flows"], config["flow_interval"] diffused_flows = diffusion(flows, flow_masks) flows = np2tensor(flows) flow_masks = np2tensor(flow_masks) diffused_flows = np2tensor(diffused_flows) flows = flows.to(device) flow_masks = flow_masks.to(device) diffused_flows = diffused_flows.to(device) t = diffused_flows.shape[2] filled_flows = [None] * t pivot = num_flows // 2 for i in range(t): indices = indicesGen(i, flow_interval, num_flows, t) print("Indices: ", indices, "\r", end="") cand_flows = flows[:, :, indices] cand_masks = flow_masks[:, :, indices] inputs = diffused_flows[:, :, indices] pivot_mask = cand_masks[:, :, pivot] pivot_flow = cand_flows[:, :, pivot] with torch.no_grad(): output_flow = flow_model(inputs, cand_masks) if isinstance(output_flow, tuple) or isinstance(output_flow, list): output_flow = output_flow[0] comp = output_flow * pivot_mask + pivot_flow * (1 - pivot_mask) if filled_flows[i] is None: filled_flows[i] = comp assert None not in filled_flows return filled_flows def read_flow(flow_dir, video): nFrame, _, imgH, imgW = video.shape Flow = np.empty(((imgH, imgW, 2, 0)), dtype=np.float32) flows = sorted(glob.glob(os.path.join(flow_dir, "*.flo"))) for flow in flows: flow_data = cvbase.read_flow(flow) h, w = flow_data.shape[:2] flow_data = cv2.resize(flow_data, (imgW, imgH), cv2.INTER_LINEAR) flow_data[:, :, 0] *= float(imgW) / float(w) flow_data[:, :, 1] *= float(imgH) / float(h) Flow = np.concatenate((Flow, flow_data[..., None]), axis=-1) return Flow def norm_flows(flows): assert len(flows.shape) == 5, "FLow shape: {}".format(flows.shape) flattened_flows = flows.flatten(3) flow_max = torch.max(flattened_flows, dim=-1, keepdim=True)[0] flows = flows / flow_max.unsqueeze(-1) return flows def save_results(outdir, comp_frames): out_dir = os.path.join(outdir, "frames") if not os.path.exists(out_dir): os.makedirs(out_dir) for i in range(len(comp_frames)): out_path = os.path.join(out_dir, "{:05d}.png".format(i)) cv2.imwrite(out_path, comp_frames[i][:, :, ::-1]) def video_inpainting(args, imgArr, imgMaskArr): device = torch.device("cuda:{}".format(args.gpu)) print(args) if args.opt is not None: with open(args.opt, "r") as f: opts = yaml.full_load(f) for k in opts.keys(): if k in args: setattr(args, k, opts[k]) print(args) # Flow model. RAFT_model = initialize_RAFT(args, device) # LAFC (flow completion) LAFC_model, LAFC_config = initialize_LAFC(args, device) # FGT FGT_model, FGT_config = initialize_FGT(args, device) # Loads frames. # filename_list = glob.glob(os.path.join(args.path, '*.png')) + \ # glob.glob(os.path.join(args.path, '*.jpg')) # Obtains imgH, imgW and nFrame. imgH, imgW = args.imgH, args.imgW # nFrame = len(filename_list) nFrame = len(imgArr) if imgH < 350: flowH, flowW = imgH * 2, imgW * 2 else: flowH, flowW = imgH, imgW # Load video. video, video_flow = [], [] if args.mode == "watermark_removal": maskname_list = glob.glob(os.path.join(args.path_mask, "*.png")) + glob.glob( os.path.join(args.path_mask, "*.jpg") ) assert len(filename_list) == len(maskname_list) for filename, maskname in zip(sorted(filename_list), sorted(maskname_list)): frame = ( torch.from_numpy(np.array(Image.open(filename)).astype(np.uint8)) .permute(2, 0, 1) .float() .unsqueeze(0) ) mask = ( torch.from_numpy(np.array(Image.open(maskname)).astype(np.uint8)) .permute(2, 0, 1) .float() .unsqueeze(0) ) mask[mask > 0] = 1 frame = frame * (1 - mask) frame = F2.upsample( frame, size=(imgH, imgW), mode="bilinear", align_corners=False ) frame_flow = F2.upsample( frame, size=(flowH, flowW), mode="bilinear", align_corners=False ) video.append(frame) video_flow.append(frame_flow) else: """for filename in sorted(filename_list): frame = torch.from_numpy(np.array(Image.open(filename)).astype(np.uint8)).permute(2, 0, 1).float().unsqueeze(0) frame = F2.upsample(frame, size=(imgH, imgW), mode='bilinear', align_corners=False) frame_flow = F2.upsample(frame, size=(flowH, flowW), mode='bilinear', align_corners=False) video.append(frame) video_flow.append(frame_flow)""" for im in imgArr: frame = ( torch.from_numpy(np.array(im).astype(np.uint8)) .permute(2, 0, 1) .float() .unsqueeze(0) ) frame = F2.upsample( frame, size=(imgH, imgW), mode="bilinear", align_corners=False ) frame_flow = F2.upsample( frame, size=(flowH, flowW), mode="bilinear", align_corners=False ) video.append(frame) video_flow.append(frame_flow) video = torch.cat(video, dim=0) # [n, c, h, w] video_flow = torch.cat(video_flow, dim=0) gts = video.clone() video = video.to(device) video_flow = video_flow.to(device) # Calcutes the corrupted flow. forward_flows = calculate_flow( args, RAFT_model, video_flow, "forward" ) # [B, C, 2, N] backward_flows = calculate_flow(args, RAFT_model, video_flow, "backward") # Makes sure video is in BGR (opencv) format. video = ( video.permute(2, 3, 1, 0).cpu().numpy()[:, :, ::-1, :] / 255.0 ) # np array -> [h, w, c, N] (0~1) if args.mode == "video_extrapolation": # Creates video and flow where the extrapolated region are missing. ( video, forward_flows, backward_flows, flow_mask, mask_dilated, start_point, end_point, ) = extrapolation(args, video, forward_flows, backward_flows) imgH, imgW = video.shape[:2] # mask indicating the missing region in the video. mask = np.tile(flow_mask[..., None], (1, 1, nFrame)) flow_mask = np.tile(flow_mask[..., None], (1, 1, nFrame)) mask_dilated = np.tile(mask_dilated[..., None], (1, 1, nFrame)) else: # Loads masks. filename_list = glob.glob(os.path.join(args.path_mask, "*.png")) + glob.glob( os.path.join(args.path_mask, "*.jpg") ) mask = [] mask_dilated = [] flow_mask = [] """for filename in sorted(filename_list): mask_img = np.array(Image.open(filename).convert('L')) mask_img = cv2.resize(mask_img, dsize=(imgW, imgH), interpolation=cv2.INTER_NEAREST) if args.flow_mask_dilates > 0: flow_mask_img = scipy.ndimage.binary_dilation(mask_img, iterations=args.flow_mask_dilates) else: flow_mask_img = mask_img flow_mask.append(flow_mask_img) if args.frame_dilates > 0: mask_img = scipy.ndimage.binary_dilation(mask_img, iterations=args.frame_dilates) mask.append(mask_img) mask_dilated.append(gradient_mask(mask_img))""" for f_mask in imgMaskArr: mask_img = np.array(f_mask) mask_img = cv2.resize( mask_img, dsize=(imgW, imgH), interpolation=cv2.INTER_NEAREST ) if args.flow_mask_dilates > 0: flow_mask_img = scipy.ndimage.binary_dilation( mask_img, iterations=args.flow_mask_dilates ) else: flow_mask_img = mask_img flow_mask.append(flow_mask_img) if args.frame_dilates > 0: mask_img = scipy.ndimage.binary_dilation( mask_img, iterations=args.frame_dilates ) mask.append(mask_img) mask_dilated.append(gradient_mask(mask_img)) # mask indicating the missing region in the video. mask = np.stack(mask, -1).astype(np.bool) # [H, W, C, N] mask_dilated = np.stack(mask_dilated, -1).astype(np.bool) flow_mask = np.stack(flow_mask, -1).astype(np.bool) # Completes the flow. videoFlowF = complete_flow( LAFC_config, LAFC_model, forward_flows, flow_mask, "forward", device ) videoFlowB = complete_flow( LAFC_config, LAFC_model, backward_flows, flow_mask, "backward", device ) videoFlowF = tensor2np(videoFlowF) videoFlowB = tensor2np(videoFlowB) print("\nFinish flow completion.") if args.vis_completed_flows: save_flows(args.outroot, videoFlowF, videoFlowB) # Prepare gradients gradient_x = np.empty(((imgH, imgW, 3, 0)), dtype=np.float32) gradient_y = np.empty(((imgH, imgW, 3, 0)), dtype=np.float32) for indFrame in range(nFrame): img = video[:, :, :, indFrame] img[mask[:, :, indFrame], :] = 0 img = ( cv2.inpaint( (img * 255).astype(np.uint8), mask[:, :, indFrame].astype(np.uint8), 3, cv2.INPAINT_TELEA, ).astype(np.float32) / 255.0 ) gradient_x_ = np.concatenate( (np.diff(img, axis=1), np.zeros((imgH, 1, 3), dtype=np.float32)), axis=1 ) gradient_y_ = np.concatenate( (np.diff(img, axis=0), np.zeros((1, imgW, 3), dtype=np.float32)), axis=0 ) gradient_x = np.concatenate( (gradient_x, gradient_x_.reshape(imgH, imgW, 3, 1)), axis=-1 ) gradient_y = np.concatenate( (gradient_y, gradient_y_.reshape(imgH, imgW, 3, 1)), axis=-1 ) gradient_x[mask_dilated[:, :, indFrame], :, indFrame] = 0 gradient_y[mask_dilated[:, :, indFrame], :, indFrame] = 0 gradient_x_filled = gradient_x gradient_y_filled = gradient_y mask_gradient = mask_dilated video_comp = video # Gradient propagation. gradient_x_filled, gradient_y_filled, mask_gradient = get_flowNN_gradient( args, gradient_x_filled, gradient_y_filled, mask, mask_gradient, videoFlowF, videoFlowB, None, None, ) # if there exist holes in mask, Poisson blending will fail. So I did this trick. I sacrifice some value. Another solution is to modify Poisson blending. for indFrame in range(nFrame): mask_gradient[:, :, indFrame] = scipy.ndimage.binary_fill_holes( mask_gradient[:, :, indFrame] ).astype(np.bool) # After one gradient propagation iteration # gradient --> RGB frameBlends = [] for indFrame in range(nFrame): print("Poisson blending frame {0:3d}".format(indFrame)) if mask[:, :, indFrame].sum() > 0: try: frameBlend, UnfilledMask = Poisson_blend_img( video_comp[:, :, :, indFrame], gradient_x_filled[:, 0 : imgW - 1, :, indFrame], gradient_y_filled[0 : imgH - 1, :, :, indFrame], mask[:, :, indFrame], mask_gradient[:, :, indFrame], ) except: frameBlend, UnfilledMask = ( video_comp[:, :, :, indFrame], mask[:, :, indFrame], ) frameBlend = np.clip(frameBlend, 0, 1.0) tmp = ( cv2.inpaint( (frameBlend * 255).astype(np.uint8), UnfilledMask.astype(np.uint8), 3, cv2.INPAINT_TELEA, ).astype(np.float32) / 255.0 ) frameBlend[UnfilledMask, :] = tmp[UnfilledMask, :] video_comp[:, :, :, indFrame] = frameBlend mask[:, :, indFrame] = UnfilledMask frameBlend_ = copy.deepcopy(frameBlend) # Green indicates the regions that are not filled yet. frameBlend_[mask[:, :, indFrame], :] = [0, 1.0, 0] else: frameBlend_ = video_comp[:, :, :, indFrame] frameBlends.append(frameBlend_) if args.vis_prop: save_fgcp(args.outroot, frameBlends, mask) video_length = len(frameBlends) for i in range(len(frameBlends)): frameBlends[i] = frameBlends[i][:, :, ::-1] frames_first = np2tensor(frameBlends, near="t").to(device) mask = np.moveaxis(mask, -1, 0) mask = mask[:, :, :, np.newaxis] masks = np2tensor(mask, near="t").to(device) normed_frames = frames_first * 2 - 1 comp_frames = [None] * video_length ref_length = args.step num_ref = args.num_ref neighbor_stride = args.neighbor_stride videoFlowF = np.moveaxis(videoFlowF, -1, 0) videoFlowF = np.concatenate([videoFlowF, videoFlowF[-1:, ...]], axis=0) flows = np2tensor(videoFlowF, near="t") flows = norm_flows(flows).to(device) for f in range(0, video_length, neighbor_stride): neighbor_ids = [ i for i in range( max(0, f - neighbor_stride), min(video_length, f + neighbor_stride + 1) ) ] ref_ids = get_ref_index(f, neighbor_ids, video_length, ref_length, num_ref) print(f, len(neighbor_ids), len(ref_ids)) selected_frames = normed_frames[:, neighbor_ids + ref_ids] selected_masks = masks[:, neighbor_ids + ref_ids] masked_frames = selected_frames * (1 - selected_masks) selected_flows = flows[:, neighbor_ids + ref_ids] with torch.no_grad(): filled_frames = FGT_model(masked_frames, selected_flows, selected_masks) filled_frames = (filled_frames + 1) / 2 filled_frames = filled_frames.cpu().permute(0, 2, 3, 1).numpy() * 255 for i in range(len(neighbor_ids)): idx = neighbor_ids[i] valid_frame = frames_first[0, idx].cpu().permute(1, 2, 0).numpy() * 255.0 valid_mask = masks[0, idx].cpu().permute(1, 2, 0).numpy() comp = np.array(filled_frames[i]).astype(np.uint8) * valid_mask + np.array( valid_frame ).astype(np.uint8) * (1 - valid_mask) if comp_frames[idx] is None: comp_frames[idx] = comp else: comp_frames[idx] = ( comp_frames[idx].astype(np.float32) * 0.5 + comp.astype(np.float32) * 0.5 ) if args.vis_frame: save_results(args.outroot, comp_frames) create_dir(args.outroot) for i in range(len(comp_frames)): comp_frames[i] = comp_frames[i].astype(np.uint8) imageio.mimwrite( os.path.join(args.outroot, "result.mp4"), comp_frames, fps=30, quality=8 ) print(f"Done, please check your result in {args.outroot} ") def main(args): assert args.mode in ( "object_removal", "video_extrapolation", "watermark_removal", ), ( "Accepted modes: 'object_removal', 'video_extrapolation', and 'watermark_removal', but input is %s" ) % args.mode video_inpainting(args) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "--opt", default="configs/object_removal.yaml", help="Please select your config file for inference", ) # video completion parser.add_argument( "--mode", default="object_removal", choices=["object_removal", "watermark_removal", "video_extrapolation"], help="modes: object_removal / video_extrapolation", ) parser.add_argument( "--path", default="/myData/davis_resized/walking", help="dataset for evaluation" ) parser.add_argument( "--path_mask", default="/myData/dilateAnnotations_4/walking", help="mask for object removal", ) parser.add_argument( "--outroot", default="quick_start/walking3", help="output directory" ) parser.add_argument( "--consistencyThres", dest="consistencyThres", default=5, type=float, help="flow consistency error threshold", ) parser.add_argument("--alpha", dest="alpha", default=0.1, type=float) parser.add_argument("--Nonlocal", dest="Nonlocal", default=False, type=bool) # RAFT parser.add_argument( "--raft_model", default="../LAFC/flowCheckPoint/raft-things.pth", help="restore checkpoint", ) parser.add_argument("--small", action="store_true", help="use small model") parser.add_argument( "--mixed_precision", action="store_true", help="use mixed precision" ) parser.add_argument( "--alternate_corr", action="store_true", help="use efficent correlation implementation", ) # LAFC parser.add_argument("--lafc_ckpts", type=str, default="../LAFC/checkpoint") # FGT parser.add_argument("--fgt_ckpts", type=str, default="../FGT/checkpoint") # extrapolation parser.add_argument( "--H_scale", dest="H_scale", default=2, type=float, help="H extrapolation scale" ) parser.add_argument( "--W_scale", dest="W_scale", default=2, type=float, help="W extrapolation scale" ) # Image basic information parser.add_argument("--imgH", type=int, default=256) parser.add_argument("--imgW", type=int, default=432) parser.add_argument("--flow_mask_dilates", type=int, default=8) parser.add_argument("--frame_dilates", type=int, default=0) parser.add_argument("--gpu", type=int, default=0) # FGT inference parameters parser.add_argument("--step", type=int, default=10) parser.add_argument("--num_ref", type=int, default=-1) parser.add_argument("--neighbor_stride", type=int, default=5) # visualization parser.add_argument( "--vis_flows", action="store_true", help="Visualize the initialized flows" ) parser.add_argument( "--vis_completed_flows", action="store_true", help="Visualize the completed flows", ) parser.add_argument( "--vis_prop", action="store_true", help="Visualize the frames after stage-I filling (flow guided content propagation)", ) parser.add_argument("--vis_frame", action="store_true", help="Visualize frames") args = parser.parse_args() main(args)