import os import sys import gradio as gr os.makedirs("outputs", exist_ok=True) sys.path.insert(0, '.') import argparse import os.path as osp import mmcv import numpy as np import torch from mogen.models import build_architecture from mmcv.runner import load_checkpoint from mmcv.parallel import MMDataParallel from mogen.utils.plot_utils import ( recover_from_ric, plot_3d_motion, t2m_kinematic_chain ) from scipy.ndimage import gaussian_filter from IPython.display import Image def motion_temporal_filter(motion, sigma=1): motion = motion.reshape(motion.shape[0], -1) for i in range(motion.shape[1]): motion[:, i] = gaussian_filter(motion[:, i], sigma=sigma, mode="nearest") return motion.reshape(motion.shape[0], -1, 3) def plot_t2m(data, result_path, npy_path, caption): joint = recover_from_ric(torch.from_numpy(data).float(), 22).numpy() joint = motion_temporal_filter(joint, sigma=2.5) plot_3d_motion(result_path, t2m_kinematic_chain, joint, title=caption, fps=20) if npy_path is not None: np.save(npy_path, joint) def create_remodiffuse(): config_path = "configs/remodiffuse/remodiffuse_t2m.py" ckpt_path = "logs/remodiffuse/remodiffuse_t2m/latest.pth" cfg = mmcv.Config.fromfile(config_path) model = build_architecture(cfg.model) load_checkpoint(model, ckpt_path, map_location='cpu') model.cpu() model.eval() return model def create_motiondiffuse(): config_path = "configs/motiondiffuse/motiondiffuse_t2m.py" ckpt_path = "logs/motiondiffuse/motiondiffuse_t2m/latest.pth" cfg = mmcv.Config.fromfile(config_path) model = build_architecture(cfg.model) load_checkpoint(model, ckpt_path, map_location='cpu') model.cpu() model.eval() return model def create_mdm(): config_path = "configs/mdm/mdm_t2m_official.py" ckpt_path = "logs/mdm/mdm_t2m/latest.pth" cfg = mmcv.Config.fromfile(config_path) model = build_architecture(cfg.model) load_checkpoint(model, ckpt_path, map_location='cpu') model.cpu() model.eval() return model model_remodiffuse = create_remodiffuse() # model_motiondiffuse = create_motiondiffuse() # model_mdm = create_mdm() mean_path = "data/datasets/human_ml3d/mean.npy" std_path = "data/datasets/human_ml3d/std.npy" mean = np.load(mean_path) std = np.load(std_path) def show_generation_result(model, text, motion_length, result_path): device = 'cpu' motion = torch.zeros(1, motion_length, 263).to(device) motion_mask = torch.ones(1, motion_length).to(device) motion_length = torch.Tensor([motion_length]).long().to(device) model = model.to(device) input = { 'motion': motion, 'motion_mask': motion_mask, 'motion_length': motion_length, 'motion_metas': [{'text': text}], } all_pred_motion = [] with torch.no_grad(): input['inference_kwargs'] = {} output_list = [] output = model(**input)[0]['pred_motion'] pred_motion = output.cpu().detach().numpy() pred_motion = pred_motion * std + mean plot_t2m(pred_motion, result_path, None, text) def generate(prompt, length): if not os.path.exists("outputs"): os.mkdir("outputs") result_path = "outputs/" + str(hash(prompt)) + ".mp4" show_generation_result(model_remodiffuse, prompt, length, result_path) return result_path demo = gr.Interface( fn=generate, inputs=["text", gr.Slider(20, 196, value=60)], examples=[ ["a person performs a cartwheel", 57], ["a person picks up something from the ground", 79], ["a person walks around and then sits down", 190], ["a person performs a deep bow", 89], ], outputs="video", title="ReMoDiffuse: Retrieval-Augmented Motion Diffusion Model", description="This is an interactive demo for ReMoDiffuse. For more information, feel free to visit our project page(https://mingyuan-zhang.github.io/projects/ReMoDiffuse.html).") demo.queue() demo.launch()