Spaces:
No application file
No application file
File size: 8,066 Bytes
6755a2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
import os
import time
import pdb
import cuid
import gradio as gr
from huggingface_hub import snapshot_download
ProjectDir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
CheckpointsDir = os.path.join(ProjectDir, "checkpoints")
def download_model():
if not os.path.exists(CheckpointsDir):
print("Checkpoint Not Downloaded, start downloading...")
tic = time.time()
snapshot_download(
repo_id="TMElyralab/MuseV",
local_dir=CheckpointsDir,
max_workers=8,
)
toc = time.time()
print(f"download cost {toc-tic} seconds")
else:
print("Already download the model.")
download_model() # for huggingface deployment.
from gradio_video2video import online_v2v_inference
from gradio_text2video import online_t2v_inference
def update_shape(image):
if image != None:
h, w, _ = image.shape
else:
h, w = 768, 512
return w, h
class ConcatenateBlock(gr.blocks.Block):
def __init__(self, options):
self.options = options
self.current_string = ""
def update_string(self, new_choice):
if new_choice and new_choice not in self.current_string.split(", "):
if self.current_string == "":
self.current_string = new_choice
else:
self.current_string += ", " + new_choice
return self.current_string
def process_input(new_choice):
return concatenate_block.update_string(new_choice), ""
control_options = [
"pose",
"pose_body",
"pose_hand",
"pose_face",
"pose_hand_body",
"pose_hand_face",
"dwpose",
"dwpose_face",
"dwpose_hand",
"dwpose_body",
"dwpose_body_hand",
"canny",
"tile",
"hed",
"hed_scribble",
"depth",
"pidi",
"normal_bae",
"lineart",
"lineart_anime",
"zoe",
"sam",
"mobile_sam",
"leres",
"content",
"face_detector",
]
concatenate_block = ConcatenateBlock(control_options)
css = """#input_img {max-width: 1024px !important} #output_vid {max-width: 1024px; max-height: 576px}"""
with gr.Blocks(css=css) as demo:
gr.Markdown(
"<div align='center'> <h1> MuseV: Infinite-length and High Fidelity Virtual Human Video Generation with Visual Conditioned Parallel Denoising</span> </h1> \
<h2 style='font-weight: 450; font-size: 1rem; margin: 0rem'>\
</br>\
Zhiqiang Xia <sup>*</sup>,\
Zhaokang Chen<sup>*</sup>,\
Bin Wu<sup>†</sup>,\
Chao Li,\
Kwok-Wai Hung,\
Chao Zhan,\
Yingjie He,\
Wenjiang Zhou\
(<sup>*</sup>Equal Contribution, <sup>†</sup>Corresponding Author, [email protected])\
</br>\
Lyra Lab, Tencent Music Entertainment\
</h2> \
<a style='font-size:18px;color: #000000' href='https://github.com/TMElyralab/MuseV'>[Github Repo]</a>\
<a style='font-size:18px;color: #000000'>, which is important to Open-Source projects. Thanks!</a>\
<a style='font-size:18px;color: #000000' href=''> [ArXiv(Coming Soon)] </a>\
<a style='font-size:18px;color: #000000' href=''> [Project Page(Coming Soon)] </a> \
<a style='font-size:18px;color: #000000'>If MuseV is useful, please help star the repo~ </a> </div>"
)
with gr.Tab("Text to Video"):
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Prompt")
image = gr.Image(label="VisionCondImage")
gr.Markdown("seed=-1 means that the seeds run each time are different")
seed = gr.Number(label="Seed", value=-1)
video_length = gr.Number(label="Video Length", value=12)
fps = gr.Number(label="Generate Video FPS", value=6)
gr.Markdown(
(
"If W&H is -1, then use the Reference Image's Size. Size of target video is $(W, H)*img\_edge\_ratio$. \n"
"The shorter the image size, the larger the motion amplitude, and the lower video quality.\n"
"The longer the W&H, the smaller the motion amplitude, and the higher video quality"
)
)
with gr.Row():
w = gr.Number(label="Width", value=-1)
h = gr.Number(label="Height", value=-1)
img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0)
btn1 = gr.Button("Generate")
out = gr.outputs.Video()
# pdb.set_trace()
with gr.Row():
board = gr.Dataframe(
value=[["", "", ""]] * 3,
interactive=False,
type="array",
label="Demo Video",
)
# image.change(fn=update_shape, inputs=[image], outputs=[w, h])
btn1.click(
fn=online_t2v_inference,
inputs=[prompt, image, seed, fps, w, h, video_length, img_edge_ratio],
outputs=out,
)
with gr.Tab("Video to Video"):
with gr.Row():
with gr.Column():
prompt = gr.Textbox(label="Prompt")
gr.Markdown(
(
"pose of VisionCondImage should be same as of the first frame of the video. "
"its better generate target first frame whose pose is same as of first frame of the video with text2image tool, sch as MJ, SDXL."
)
)
image = gr.Image(label="VisionCondImage")
video = gr.Video(label="ReferVideo")
# radio = gr.inputs.Radio(, label="Select an option")
# ctr_button = gr.inputs.Button(label="Add ControlNet List")
# output_text = gr.outputs.Textbox()
processor = gr.Textbox(
label=f"Control Condition. gradio code now only support dwpose_body_hand, use command can support multi of {control_options}",
value="dwpose_body_hand",
)
gr.Markdown("seed=-1 means that seeds are different in every run")
seed = gr.Number(label="Seed", value=-1)
video_length = gr.Number(label="Video Length", value=12)
fps = gr.Number(label="Generate Video FPS", value=6)
gr.Markdown(
(
"If W&H is -1, then use the Reference Image's Size. Size of target video is $(W, H)*img\_edge\_ratio$. \n"
"The shorter the image size, the larger the motion amplitude, and the lower video quality. \n"
"The longer the W&H, the smaller the motion amplitude, and the higher video quality. "
)
)
with gr.Row():
w = gr.Number(label="Width", value=-1)
h = gr.Number(label="Height", value=-1)
img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0)
btn2 = gr.Button("Generate")
out1 = gr.outputs.Video()
# image.change(fn=update_shape, inputs=[image], outputs=[w, h])
btn2.click(
fn=online_v2v_inference,
inputs=[
prompt,
image,
video,
processor,
seed,
fps,
w,
h,
video_length,
img_edge_ratio,
],
outputs=out1,
)
# Set the IP and port
ip_address = "0.0.0.0" # Replace with your desired IP address
port_number = 7860 # Replace with your desired port number
demo.queue().launch(
share=False, debug=True, server_name=ip_address, server_port=port_number
)
|