import os
import sys
import gradio as gr

os.makedirs("outputs", exist_ok=True) 
sys.path.insert(0, '.')

import argparse
import os.path as osp
import mmcv
import numpy as np
import torch
from mogen.models import build_architecture
from mmcv.runner import load_checkpoint
from mmcv.parallel import MMDataParallel
from mogen.utils.plot_utils import (
    recover_from_ric,
    plot_3d_motion,
    t2m_kinematic_chain
)
from scipy.ndimage import gaussian_filter
from IPython.display import Image


def motion_temporal_filter(motion, sigma=1):
    motion = motion.reshape(motion.shape[0], -1)
    for i in range(motion.shape[1]):
        motion[:, i] = gaussian_filter(motion[:, i], sigma=sigma, mode="nearest")
    return motion.reshape(motion.shape[0], -1, 3)


def plot_t2m(data, result_path, npy_path, caption):
    joint = recover_from_ric(torch.from_numpy(data).float(), 22).numpy()
    joint = motion_temporal_filter(joint, sigma=2.5)
    plot_3d_motion(result_path, t2m_kinematic_chain, joint, title=caption, fps=20)
    if npy_path is not None:
        np.save(npy_path, joint)

def create_remodiffuse():
    config_path = "configs/remodiffuse/remodiffuse_t2m.py"
    ckpt_path = "logs/remodiffuse/remodiffuse_t2m/latest.pth"
    cfg = mmcv.Config.fromfile(config_path)
    model = build_architecture(cfg.model)
    load_checkpoint(model, ckpt_path, map_location='cpu')
    model.cpu()
    model.eval()
    return model

def create_motiondiffuse():
    config_path = "configs/motiondiffuse/motiondiffuse_t2m.py"
    ckpt_path = "logs/motiondiffuse/motiondiffuse_t2m/latest.pth"
    cfg = mmcv.Config.fromfile(config_path)
    model = build_architecture(cfg.model)
    load_checkpoint(model, ckpt_path, map_location='cpu')
    model.cpu()
    model.eval()
    return model

def create_mdm():
    config_path = "configs/mdm/mdm_t2m_official.py"
    ckpt_path = "logs/mdm/mdm_t2m/latest.pth"
    cfg = mmcv.Config.fromfile(config_path)
    model = build_architecture(cfg.model)
    load_checkpoint(model, ckpt_path, map_location='cpu')
    model.cpu()
    model.eval()
    return model

model_remodiffuse = create_remodiffuse()
# model_motiondiffuse = create_motiondiffuse()
# model_mdm = create_mdm()

mean_path = "data/datasets/human_ml3d/mean.npy"
std_path = "data/datasets/human_ml3d/std.npy"
mean = np.load(mean_path)
std = np.load(std_path)


def show_generation_result(model, text, motion_length, result_path):
    device = 'cpu'
    motion = torch.zeros(1, motion_length, 263).to(device)
    motion_mask = torch.ones(1, motion_length).to(device)
    motion_length = torch.Tensor([motion_length]).long().to(device)
    model = model.to(device)
    input = {
        'motion': motion,
        'motion_mask': motion_mask,
        'motion_length': motion_length,
        'motion_metas': [{'text': text}],
    }

    all_pred_motion = []
    with torch.no_grad():
        input['inference_kwargs'] = {}
        output_list = []
        output = model(**input)[0]['pred_motion']
        pred_motion = output.cpu().detach().numpy()
        pred_motion = pred_motion * std + mean

    plot_t2m(pred_motion, result_path, None, text)

def generate(prompt, length):
    if not os.path.exists("outputs"):
        os.mkdir("outputs")
    result_path = "outputs/" + str(hash(prompt)) + ".mp4"
    show_generation_result(model_remodiffuse, prompt, length, result_path)
    return result_path

demo = gr.Interface(
    fn=generate,
    inputs=["text", gr.Slider(20, 196, value=60)],
    examples=[
        ["a person performs a cartwheel", 57],
        ["a person picks up something from the ground", 79],
        ["a person walks around and then sits down", 190],
        ["a person performs a deep bow", 89],
    ],
    outputs="video",
    title="ReMoDiffuse: Retrieval-Augmented Motion Diffusion Model",
    description="This is an interactive demo for ReMoDiffuse. For more information, feel free to visit our project page(https://mingyuan-zhang.github.io/projects/ReMoDiffuse.html).")

demo.queue()
demo.launch()