Spaces:

smartfeed
/

test_video

Running

File size: 3,890 Bytes

f011eee

import spaces
import gradio as gr
import time
import torch
import gc
import tempfile
import numpy as np
import cv2

from diffusers import LTXPipeline
from diffusers.utils import export_to_video


device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = LTXPipeline.from_pretrained("Lightricks/LTX-Video", torch_dtype=torch.bfloat16)
pipe.to(device)

def create_demo() -> gr.Blocks:

    @spaces.GPU(duration=60)
    def text_to_video(
        prompt: str,
        negative_prompt: str,
        width: int = 768,
        height: int = 512,
        num_frames: int = 121,
        frame_rate: int = 25,
        num_inference_steps: int = 30,
        seed: int = 8,
        progress: gr.Progress = gr.Progress(),
    ):
        generator = torch.Generator(device=device).manual_seed(seed)
        run_task_time = 0
        time_cost_str = ''
        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
        try:
            with torch.no_grad():
                video = pipe(
                    prompt=prompt,
                    negative_prompt=negative_prompt,
                    generator=generator,
                    width=width,
                    height=height,
                    num_frames=num_frames,
                    num_inference_steps=num_inference_steps,
                ).frames[0]
        finally:
            torch.cuda.empty_cache()
            gc.collect()
        run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)

        output_path = tempfile.mktemp(suffix=".mp4")
        export_to_video(video, output_path, fps=frame_rate)
        
        del video
        torch.cuda.empty_cache()
        return output_path, time_cost_str

    def get_time_cost(run_task_time, time_cost_str):
        now_time = int(time.time()*1000)
        if run_task_time == 0:
            time_cost_str = 'start'
        else:
            if time_cost_str != '': 
                time_cost_str += f'-->'
            time_cost_str += f'{now_time - run_task_time}'
        run_task_time = now_time
        return run_task_time, time_cost_str

    with gr.Blocks() as demo:
        with gr.Row():
            with gr.Column():
                txt2vid_prompt = gr.Textbox(
                    label="Enter Your Prompt",
                    placeholder="Describe the video you want to generate (minimum 50 characters)...",
                    value="A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage.",
                    lines=5,
                )

                txt2vid_negative_prompt = gr.Textbox(
                    label="Enter Negative Prompt",
                    placeholder="Describe what you don't want in the video...",
                    value="low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly",
                    lines=2,
                )

                txt2vid_generate = gr.Button(
                    "Generate Video",
                    variant="primary",
                    size="lg",
                )

            with gr.Column():
                txt2vid_output = gr.Video(label="Generated Output")
                txt2vid_generated_cost = gr.Textbox(label="Time cost by step (ms):", visible=True, interactive=False)
                
        txt2vid_generate.click(
            fn=text_to_video,
            inputs=[txt2vid_prompt, txt2vid_negative_prompt],
            outputs=[txt2vid_output, txt2vid_generated_cost],
        )

    return demo