Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
from datetime import datetime | |
import gc | |
import gradio as gr | |
import numpy as np | |
import random | |
from pathlib import Path | |
import os | |
from diffusers import AutoencoderKLLTXVideo, LTXPipeline, LTXVideoTransformer3DModel | |
from diffusers.utils import export_to_video | |
from transformers import T5EncoderModel, T5Tokenizer | |
import torch | |
from utils import install_packages | |
torch.backends.cuda.matmul.allow_tf32 = True | |
torch.backends.cudnn.allow_tf32 = True | |
torch.jit._state.disable() | |
torch.set_grad_enabled(False) | |
gc.collect() | |
torch.cuda.empty_cache() | |
ckpt_path = Path("a-r-r-o-w/LTX-Video-0.9.1-diffusers") | |
single_file_url = "https://huggingface.co/Lightricks/LTX-Video/ltx-video-2b-v0.9.1.safetensors" | |
transformer = LTXVideoTransformer3DModel.from_single_file( | |
single_file_url, torch_dtype=torch.bfloat16 | |
) | |
vae = AutoencoderKLLTXVideo.from_single_file( | |
single_file_url, torch_dtype=torch.bfloat16) | |
vae.eval() | |
vae = vae.to("cuda") | |
text_encoder = T5EncoderModel.from_pretrained( | |
ckpt_path, | |
subfolder="text_encoder", | |
torch_dtype=torch.bfloat16 | |
) | |
text_encoder.eval() | |
text_encoder = text_encoder.to("cuda") | |
tokenizer = T5Tokenizer.from_pretrained( | |
ckpt_path, | |
subfolder="tokenizer" | |
) | |
pipeline = LTXPipeline.from_single_file( | |
single_file_url, | |
transformer=transformer, | |
text_encoder=text_encoder, | |
tokenizer=tokenizer, | |
vae=vae, | |
torch_dtype=torch.bfloat16 | |
) | |
# pipeline.enable_model_cpu_offload() | |
pipeline.vae.enable_tiling() | |
pipeline.vae.enable_slicing() | |
pipeline = pipeline.to("cuda") | |
MAX_SEED = np.iinfo(np.int32).max | |
MAX_IMAGE_SIZE = 1280 | |
def infer( | |
prompt, | |
negative_prompt, | |
seed, | |
randomize_seed, | |
width=704, | |
height=448, | |
num_frames=129, | |
fps=24, | |
num_inference_steps=30, | |
progress=gr.Progress(track_tqdm=True), | |
): | |
if randomize_seed: | |
seed = random.randint(0, MAX_SEED) | |
generator = torch.Generator(device='cuda').manual_seed(seed) | |
with torch.amp.autocast_mode.autocast('cuda', torch.bfloat16), torch.no_grad(), torch.inference_mode(): | |
video = pipeline( | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
width=width, | |
height=height, | |
num_frames=num_frames, | |
# guidance_scale=guidance_scale, | |
num_inference_steps=num_inference_steps, | |
# decode_timestep=decode_timestep, | |
# decode_noise_scale=decode_noise_scale, | |
generator=generator, | |
# max_sequence_length=512, | |
).frames[0] | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
filename = f"output_{timestamp}.mp4" | |
os.makedirs("output", exist_ok=True) | |
output_path = f"./output/{filename}" | |
export_to_video(video, output_path, fps=fps) | |
gc.collect | |
torch.cuda.empty_cache() | |
return output_path | |
css = """ | |
#col-container { | |
margin: 0 auto; | |
max-width: 640px; | |
} | |
""" | |
with gr.Blocks(css=css) as demo: | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown(" # Text-to-Image Gradio Template") | |
with gr.Row(): | |
prompt = gr.Textbox( | |
label="Prompt", | |
lines=3, | |
value=str("A woman with long brown hair and light skin smiles at another woman with long blonde hair. The woman with brown hair wears a black jacket and has a small, barely noticeable mole on her right cheek. The camera angle is a close-up, focused on the woman with brown hair's face. The lighting is warm and natural, likely from the setting sun, casting a soft glow on the scene. The scene appears to be real-life footage"), | |
) | |
negative_prompt = gr.Textbox( | |
label="Negative prompt", | |
lines=3, | |
value=str("worst quality, blurry, distorted"), | |
) | |
with gr.Row(): | |
run_button = gr.Button("Run", scale=0, variant="huggingface") | |
with gr.Row(): | |
result = gr.Video(label="Result", show_label=False) | |
with gr.Accordion("Advanced Settings", open=False): | |
seed = gr.Slider( | |
label="Seed", | |
minimum=0, | |
maximum=MAX_SEED, | |
step=1, | |
value=0, | |
) | |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True) | |
with gr.Row(): | |
width = gr.Slider( | |
label="Width", | |
minimum=256, | |
maximum=MAX_IMAGE_SIZE, | |
step=32, | |
value=704, # Replace with defaults that work for your model | |
) | |
height = gr.Slider( | |
label="Height", | |
minimum=256, | |
maximum=MAX_IMAGE_SIZE, | |
step=32, | |
value=448, # Replace with defaults that work for your model | |
) | |
with gr.Row(): | |
num_frames = gr.Slider( | |
label="Number of frames", | |
minimum=1, | |
maximum=257, | |
step=32, | |
value=129, # Replace with defaults that work for your model | |
) | |
fps = gr.Slider( | |
label="Number of frames per second", | |
minimum=1, | |
maximum=30, | |
step=1, | |
value=24, # Replace with defaults that work for your model | |
) | |
with gr.Row(): | |
num_inference_steps = gr.Slider( | |
label="Number of inference steps", | |
minimum=1, | |
maximum=50, | |
step=1, | |
value=30, # Replace with defaults that work for your model | |
) | |
gr.on( | |
triggers=[run_button.click, prompt.submit], | |
fn=infer, | |
inputs=[ | |
prompt, | |
negative_prompt, | |
seed, | |
randomize_seed, | |
width, | |
height, | |
num_frames, | |
fps, | |
num_inference_steps, | |
], | |
outputs=[result], | |
) | |
if __name__ == "__main__": | |
install_packages() | |
demo.launch() |