Spaces:
No application file
No application file
import os | |
import time | |
import pdb | |
import cuid | |
import gradio as gr | |
from huggingface_hub import snapshot_download | |
ProjectDir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../..")) | |
CheckpointsDir = os.path.join(ProjectDir, "checkpoints") | |
def download_model(): | |
if not os.path.exists(CheckpointsDir): | |
print("Checkpoint Not Downloaded, start downloading...") | |
tic = time.time() | |
snapshot_download( | |
repo_id="TMElyralab/MuseV", | |
local_dir=CheckpointsDir, | |
max_workers=8, | |
) | |
toc = time.time() | |
print(f"download cost {toc-tic} seconds") | |
else: | |
print("Already download the model.") | |
download_model() # for huggingface deployment. | |
from gradio_video2video import online_v2v_inference | |
from gradio_text2video import online_t2v_inference | |
def update_shape(image): | |
if image != None: | |
h, w, _ = image.shape | |
else: | |
h, w = 768, 512 | |
return w, h | |
class ConcatenateBlock(gr.blocks.Block): | |
def __init__(self, options): | |
self.options = options | |
self.current_string = "" | |
def update_string(self, new_choice): | |
if new_choice and new_choice not in self.current_string.split(", "): | |
if self.current_string == "": | |
self.current_string = new_choice | |
else: | |
self.current_string += ", " + new_choice | |
return self.current_string | |
def process_input(new_choice): | |
return concatenate_block.update_string(new_choice), "" | |
control_options = [ | |
"pose", | |
"pose_body", | |
"pose_hand", | |
"pose_face", | |
"pose_hand_body", | |
"pose_hand_face", | |
"dwpose", | |
"dwpose_face", | |
"dwpose_hand", | |
"dwpose_body", | |
"dwpose_body_hand", | |
"canny", | |
"tile", | |
"hed", | |
"hed_scribble", | |
"depth", | |
"pidi", | |
"normal_bae", | |
"lineart", | |
"lineart_anime", | |
"zoe", | |
"sam", | |
"mobile_sam", | |
"leres", | |
"content", | |
"face_detector", | |
] | |
concatenate_block = ConcatenateBlock(control_options) | |
css = """#input_img {max-width: 1024px !important} #output_vid {max-width: 1024px; max-height: 576px}""" | |
with gr.Blocks(css=css) as demo: | |
gr.Markdown( | |
"<div align='center'> <h1> MuseV: Infinite-length and High Fidelity Virtual Human Video Generation with Visual Conditioned Parallel Denoising</span> </h1> \ | |
<h2 style='font-weight: 450; font-size: 1rem; margin: 0rem'>\ | |
</br>\ | |
Zhiqiang Xia <sup>*</sup>,\ | |
Zhaokang Chen<sup>*</sup>,\ | |
Bin Wu<sup>†</sup>,\ | |
Chao Li,\ | |
Kwok-Wai Hung,\ | |
Chao Zhan,\ | |
Yingjie He,\ | |
Wenjiang Zhou\ | |
(<sup>*</sup>Equal Contribution, <sup>†</sup>Corresponding Author, [email protected])\ | |
</br>\ | |
Lyra Lab, Tencent Music Entertainment\ | |
</h2> \ | |
<a style='font-size:18px;color: #000000' href='https://github.com/TMElyralab/MuseV'>[Github Repo]</a>\ | |
<a style='font-size:18px;color: #000000'>, which is important to Open-Source projects. Thanks!</a>\ | |
<a style='font-size:18px;color: #000000' href=''> [ArXiv(Coming Soon)] </a>\ | |
<a style='font-size:18px;color: #000000' href=''> [Project Page(Coming Soon)] </a> \ | |
<a style='font-size:18px;color: #000000'>If MuseV is useful, please help star the repo~ </a> </div>" | |
) | |
with gr.Tab("Text to Video"): | |
with gr.Row(): | |
with gr.Column(): | |
prompt = gr.Textbox(label="Prompt") | |
image = gr.Image(label="VisionCondImage") | |
gr.Markdown("seed=-1 means that the seeds run each time are different") | |
seed = gr.Number(label="Seed", value=-1) | |
video_length = gr.Number(label="Video Length", value=12) | |
fps = gr.Number(label="Generate Video FPS", value=6) | |
gr.Markdown( | |
( | |
"If W&H is -1, then use the Reference Image's Size. Size of target video is $(W, H)*img\_edge\_ratio$. \n" | |
"The shorter the image size, the larger the motion amplitude, and the lower video quality.\n" | |
"The longer the W&H, the smaller the motion amplitude, and the higher video quality" | |
) | |
) | |
with gr.Row(): | |
w = gr.Number(label="Width", value=-1) | |
h = gr.Number(label="Height", value=-1) | |
img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0) | |
btn1 = gr.Button("Generate") | |
out = gr.outputs.Video() | |
# pdb.set_trace() | |
with gr.Row(): | |
board = gr.Dataframe( | |
value=[["", "", ""]] * 3, | |
interactive=False, | |
type="array", | |
label="Demo Video", | |
) | |
# image.change(fn=update_shape, inputs=[image], outputs=[w, h]) | |
btn1.click( | |
fn=online_t2v_inference, | |
inputs=[prompt, image, seed, fps, w, h, video_length, img_edge_ratio], | |
outputs=out, | |
) | |
with gr.Tab("Video to Video"): | |
with gr.Row(): | |
with gr.Column(): | |
prompt = gr.Textbox(label="Prompt") | |
gr.Markdown( | |
( | |
"pose of VisionCondImage should be same as of the first frame of the video. " | |
"its better generate target first frame whose pose is same as of first frame of the video with text2image tool, sch as MJ, SDXL." | |
) | |
) | |
image = gr.Image(label="VisionCondImage") | |
video = gr.Video(label="ReferVideo") | |
# radio = gr.inputs.Radio(, label="Select an option") | |
# ctr_button = gr.inputs.Button(label="Add ControlNet List") | |
# output_text = gr.outputs.Textbox() | |
processor = gr.Textbox( | |
label=f"Control Condition. gradio code now only support dwpose_body_hand, use command can support multi of {control_options}", | |
value="dwpose_body_hand", | |
) | |
gr.Markdown("seed=-1 means that seeds are different in every run") | |
seed = gr.Number(label="Seed", value=-1) | |
video_length = gr.Number(label="Video Length", value=12) | |
fps = gr.Number(label="Generate Video FPS", value=6) | |
gr.Markdown( | |
( | |
"If W&H is -1, then use the Reference Image's Size. Size of target video is $(W, H)*img\_edge\_ratio$. \n" | |
"The shorter the image size, the larger the motion amplitude, and the lower video quality. \n" | |
"The longer the W&H, the smaller the motion amplitude, and the higher video quality. " | |
) | |
) | |
with gr.Row(): | |
w = gr.Number(label="Width", value=-1) | |
h = gr.Number(label="Height", value=-1) | |
img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0) | |
btn2 = gr.Button("Generate") | |
out1 = gr.outputs.Video() | |
# image.change(fn=update_shape, inputs=[image], outputs=[w, h]) | |
btn2.click( | |
fn=online_v2v_inference, | |
inputs=[ | |
prompt, | |
image, | |
video, | |
processor, | |
seed, | |
fps, | |
w, | |
h, | |
video_length, | |
img_edge_ratio, | |
], | |
outputs=out1, | |
) | |
# Set the IP and port | |
ip_address = "0.0.0.0" # Replace with your desired IP address | |
port_number = 7860 # Replace with your desired port number | |
demo.queue().launch( | |
share=False, debug=True, server_name=ip_address, server_port=port_number | |
) | |