test_video / app_i2v.py
zhiweili
add app_i2v
708a6ea
raw
history blame
3.93 kB
import spaces
import gradio as gr
import time
import torch
import gc
import tempfile
from diffusers.utils import export_to_video, load_image
from video_model import video_pipe
device = "cuda" if torch.cuda.is_available() else "cpu"
def create_demo() -> gr.Blocks:
@spaces.GPU(duration=60)
def image_to_video(
image_path: str,
prompt: str,
negative_prompt: str,
width: int = 768,
height: int = 512,
num_frames: int = 121,
frame_rate: int = 25,
num_inference_steps: int = 30,
seed: int = 8,
progress=gr.Progress(),
):
generator = torch.Generator(device=device).manual_seed(seed)
input_image = load_image(image_path)
run_task_time = 0
time_cost_str = ''
run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
try:
with torch.no_grad():
video = video_pipe(
image=input_image,
prompt=prompt,
negative_prompt=negative_prompt,
generator=generator,
width=width,
height=height,
num_frames=num_frames,
num_inference_steps=num_inference_steps,
).frames[0]
finally:
torch.cuda.empty_cache()
gc.collect()
run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
output_path = tempfile.mktemp(suffix=".mp4")
export_to_video(video, output_path, fps=frame_rate)
del video
torch.cuda.empty_cache()
return output_path, time_cost_str
def get_time_cost(run_task_time, time_cost_str):
now_time = int(time.time()*1000)
if run_task_time == 0:
time_cost_str = 'start'
else:
if time_cost_str != '':
time_cost_str += f'-->'
time_cost_str += f'{now_time - run_task_time}'
run_task_time = now_time
return run_task_time, time_cost_str
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
i2vid_image_path = gr.File(label="Input Image")
i2vid_prompt = gr.Textbox(
label="Enter Your Prompt",
placeholder="Describe the video you want to generate (minimum 50 characters)...",
value="A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage.",
lines=5,
)
i2vid_negative_prompt = gr.Textbox(
label="Enter Negative Prompt",
placeholder="Describe what you don't want in the video...",
value="low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly",
lines=2,
)
i2vid_generate = gr.Button(
"Generate Video",
variant="primary",
size="lg",
)
with gr.Column():
i2vid_output = gr.Video(label="Generated Output")
i2vid_generated_cost = gr.Textbox(label="Time cost by step (ms):", visible=True, interactive=False)
i2vid_generate.click(
fn=image_to_video,
inputs=[i2vid_image_path, i2vid_prompt, i2vid_negative_prompt],
outputs=[i2vid_output, i2vid_generated_cost],
)
return demo