Spaces:

kevinwang676
/

MuseV-test

No application file

App Files Files Community

MuseV-test / scripts /gradio /app_space.py

kevinwang676

Upload folder using huggingface_hub

6755a2d verified 10 months ago

raw

history blame contribute delete

8.07 kB

	import os
	import time
	import pdb

	import cuid
	import gradio as gr


	from huggingface_hub import snapshot_download

	ProjectDir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
	CheckpointsDir = os.path.join(ProjectDir, "checkpoints")


	def download_model():
	if not os.path.exists(CheckpointsDir):
	print("Checkpoint Not Downloaded, start downloading...")
	tic = time.time()
	snapshot_download(
	repo_id="TMElyralab/MuseV",
	local_dir=CheckpointsDir,
	max_workers=8,
	)
	toc = time.time()
	print(f"download cost {toc-tic} seconds")
	else:
	print("Already download the model.")


	download_model() # for huggingface deployment.

	from gradio_video2video import online_v2v_inference
	from gradio_text2video import online_t2v_inference


	def update_shape(image):
	if image != None:
	h, w, _ = image.shape
	else:
	h, w = 768, 512
	return w, h


	class ConcatenateBlock(gr.blocks.Block):
	def __init__(self, options):
	self.options = options
	self.current_string = ""

	def update_string(self, new_choice):
	if new_choice and new_choice not in self.current_string.split(", "):
	if self.current_string == "":
	self.current_string = new_choice
	else:
	self.current_string += ", " + new_choice
	return self.current_string


	def process_input(new_choice):
	return concatenate_block.update_string(new_choice), ""


	control_options = [
	"pose",
	"pose_body",
	"pose_hand",
	"pose_face",
	"pose_hand_body",
	"pose_hand_face",
	"dwpose",
	"dwpose_face",
	"dwpose_hand",
	"dwpose_body",
	"dwpose_body_hand",
	"canny",
	"tile",
	"hed",
	"hed_scribble",
	"depth",
	"pidi",
	"normal_bae",
	"lineart",
	"lineart_anime",
	"zoe",
	"sam",
	"mobile_sam",
	"leres",
	"content",
	"face_detector",
	]
	concatenate_block = ConcatenateBlock(control_options)


	css = """#input_img {max-width: 1024px !important} #output_vid {max-width: 1024px; max-height: 576px}"""


	with gr.Blocks(css=css) as demo:
	gr.Markdown(
	"<div align='center'> <h1> MuseV: Infinite-length and High Fidelity Virtual Human Video Generation with Visual Conditioned Parallel Denoising</span> </h1> \
	<h2 style='font-weight: 450; font-size: 1rem; margin: 0rem'>\
	</br>\
	Zhiqiang Xia <sup>*</sup>,\
	Zhaokang Chen<sup>*</sup>,\
	Bin Wu<sup>†</sup>,\
	Chao Li,\
	Kwok-Wai Hung,\
	Chao Zhan,\
	Yingjie He,\
	Wenjiang Zhou\
	(<sup>*</sup>Equal Contribution, <sup>†</sup>Corresponding Author, [email protected])\
	</br>\
	Lyra Lab, Tencent Music Entertainment\
	</h2> \
	<a style='font-size:18px;color: #000000' href='https://github.com/TMElyralab/MuseV'>[Github Repo]</a>\
	<a style='font-size:18px;color: #000000'>, which is important to Open-Source projects. Thanks!</a>\
	<a style='font-size:18px;color: #000000' href=''> [ArXiv(Coming Soon)] </a>\
	<a style='font-size:18px;color: #000000' href=''> [Project Page(Coming Soon)] </a> \
	<a style='font-size:18px;color: #000000'>If MuseV is useful, please help star the repo~ </a> </div>"
	)
	with gr.Tab("Text to Video"):
	with gr.Row():
	with gr.Column():
	prompt = gr.Textbox(label="Prompt")
	image = gr.Image(label="VisionCondImage")
	gr.Markdown("seed=-1 means that the seeds run each time are different")
	seed = gr.Number(label="Seed", value=-1)
	video_length = gr.Number(label="Video Length", value=12)
	fps = gr.Number(label="Generate Video FPS", value=6)
	gr.Markdown(
	(
	"If W&H is -1, then use the Reference Image's Size. Size of target video is $(W, H)*img\_edge\_ratio$. \n"
	"The shorter the image size, the larger the motion amplitude, and the lower video quality.\n"
	"The longer the W&H, the smaller the motion amplitude, and the higher video quality"
	)
	)
	with gr.Row():
	w = gr.Number(label="Width", value=-1)
	h = gr.Number(label="Height", value=-1)
	img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0)

	btn1 = gr.Button("Generate")
	out = gr.outputs.Video()
	# pdb.set_trace()
	with gr.Row():
	board = gr.Dataframe(
	value=[["", "", ""]] * 3,
	interactive=False,
	type="array",
	label="Demo Video",
	)

	# image.change(fn=update_shape, inputs=[image], outputs=[w, h])

	btn1.click(
	fn=online_t2v_inference,
	inputs=[prompt, image, seed, fps, w, h, video_length, img_edge_ratio],
	outputs=out,
	)

	with gr.Tab("Video to Video"):
	with gr.Row():
	with gr.Column():
	prompt = gr.Textbox(label="Prompt")
	gr.Markdown(
	(
	"pose of VisionCondImage should be same as of the first frame of the video. "
	"its better generate target first frame whose pose is same as of first frame of the video with text2image tool, sch as MJ, SDXL."
	)
	)
	image = gr.Image(label="VisionCondImage")
	video = gr.Video(label="ReferVideo")
	# radio = gr.inputs.Radio(, label="Select an option")
	# ctr_button = gr.inputs.Button(label="Add ControlNet List")
	# output_text = gr.outputs.Textbox()
	processor = gr.Textbox(
	label=f"Control Condition. gradio code now only support dwpose_body_hand, use command can support multi of {control_options}",
	value="dwpose_body_hand",
	)
	gr.Markdown("seed=-1 means that seeds are different in every run")
	seed = gr.Number(label="Seed", value=-1)
	video_length = gr.Number(label="Video Length", value=12)
	fps = gr.Number(label="Generate Video FPS", value=6)
	gr.Markdown(
	(
	"If W&H is -1, then use the Reference Image's Size. Size of target video is $(W, H)*img\_edge\_ratio$. \n"
	"The shorter the image size, the larger the motion amplitude, and the lower video quality. \n"
	"The longer the W&H, the smaller the motion amplitude, and the higher video quality. "
	)
	)
	with gr.Row():
	w = gr.Number(label="Width", value=-1)
	h = gr.Number(label="Height", value=-1)
	img_edge_ratio = gr.Number(label="img_edge_ratio", value=1.0)

	btn2 = gr.Button("Generate")
	out1 = gr.outputs.Video()
	# image.change(fn=update_shape, inputs=[image], outputs=[w, h])

	btn2.click(
	fn=online_v2v_inference,
	inputs=[
	prompt,
	image,
	video,
	processor,
	seed,
	fps,
	w,
	h,
	video_length,
	img_edge_ratio,
	],
	outputs=out1,
	)


	# Set the IP and port
	ip_address = "0.0.0.0" # Replace with your desired IP address
	port_number = 7860 # Replace with your desired port number


	demo.queue().launch(
	share=False, debug=True, server_name=ip_address, server_port=port_number
	)