TEXT_TO_VIDEO / ttv.py
akthangdz's picture
main
a2495b3
raw
history blame contribute delete
958 Bytes
import torch
from diffusers import CogVideoXImageToVideoPipeline
from diffusers.utils import export_to_video, load_image
import os
print("Starting...")
# Lấy prompt từ biến môi trường hoặc sử dụng giá trị mặc định
prompt = os.getenv('PROMPT_TEXT', "A little girl is riding a bicycle at high speed. Focused, detailed, realistic.")
print("Loading model...")
image = load_image(image="input.jpg")
pipe = CogVideoXImageToVideoPipeline.from_pretrained(
"THUDM/CogVideoX-5b-I2V",
torch_dtype=torch.bfloat16
)
pipe.enable_sequential_cpu_offload()
pipe.vae.enable_tiling()
pipe.vae.enable_slicing()
print("Generating...")
video = pipe(
prompt=prompt,
image=image,
num_videos_per_prompt=1,
num_inference_steps=50,
num_frames=49,
guidance_scale=6,
generator=torch.Generator(device="cuda").manual_seed(42),
).frames[0]
print("Saving video...")
export_to_video(video, "output.mp4", fps=8)
print("Done!")