File size: 3,934 Bytes
f011eee
 
 
 
 
 
 
708a6ea
f011eee
708a6ea
f011eee
 
 
 
 
 
708a6ea
 
f011eee
 
 
 
 
 
 
 
708a6ea
f011eee
 
708a6ea
f011eee
 
 
 
 
708a6ea
 
f011eee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
708a6ea
f011eee
 
 
 
 
 
 
 
 
 
 
 
 
 
708a6ea
 
f011eee
 
 
 
 
 
708a6ea
f011eee
 
 
 
 
 
708a6ea
f011eee
 
 
 
 
 
708a6ea
 
f011eee
708a6ea
 
 
 
f011eee
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import spaces
import gradio as gr
import time
import torch
import gc
import tempfile

from diffusers.utils import export_to_video, load_image

from video_model import video_pipe

device = "cuda" if torch.cuda.is_available() else "cpu"

def create_demo() -> gr.Blocks:

    @spaces.GPU(duration=60)
    def image_to_video(
        image_path: str,
        prompt: str,
        negative_prompt: str,
        width: int = 768,
        height: int = 512,
        num_frames: int = 121,
        frame_rate: int = 25,
        num_inference_steps: int = 30,
        seed: int = 8,
        progress=gr.Progress(),
    ):
        generator = torch.Generator(device=device).manual_seed(seed)
        input_image = load_image(image_path)
        run_task_time = 0
        time_cost_str = ''
        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
        try:
            with torch.no_grad():
                video = video_pipe(
                    image=input_image,
                    prompt=prompt,
                    negative_prompt=negative_prompt,
                    generator=generator,
                    width=width,
                    height=height,
                    num_frames=num_frames,
                    num_inference_steps=num_inference_steps,
                ).frames[0]
        finally:
            torch.cuda.empty_cache()
            gc.collect()
        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)

        output_path = tempfile.mktemp(suffix=".mp4")
        export_to_video(video, output_path, fps=frame_rate)
        
        del video
        torch.cuda.empty_cache()
        return output_path, time_cost_str


    def get_time_cost(run_task_time, time_cost_str):
        now_time = int(time.time()*1000)
        if run_task_time == 0:
            time_cost_str = 'start'
        else:
            if time_cost_str != '': 
                time_cost_str += f'-->'
            time_cost_str += f'{now_time - run_task_time}'
        run_task_time = now_time
        return run_task_time, time_cost_str

    with gr.Blocks() as demo:
        with gr.Row():
            with gr.Column():
                i2vid_image_path = gr.File(label="Input Image")
                i2vid_prompt = gr.Textbox(
                    label="Enter Your Prompt",
                    placeholder="Describe the video you want to generate (minimum 50 characters)...",
                    value="A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage.",
                    lines=5,
                )

                i2vid_negative_prompt = gr.Textbox(
                    label="Enter Negative Prompt",
                    placeholder="Describe what you don't want in the video...",
                    value="low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly",
                    lines=2,
                )

                i2vid_generate = gr.Button(
                    "Generate Video",
                    variant="primary",
                    size="lg",
                )

            with gr.Column():
                i2vid_output = gr.Video(label="Generated Output")
                i2vid_generated_cost = gr.Textbox(label="Time cost by step (ms):", visible=True, interactive=False)
                
        i2vid_generate.click(
            fn=image_to_video,
            inputs=[i2vid_image_path, i2vid_prompt, i2vid_negative_prompt],
            outputs=[i2vid_output, i2vid_generated_cost],
        )

    return demo