Spaces:

akthangdz
/

TEXT_TO_VIDEO

Running

TEXT_TO_VIDEO / ttv.py

main

a2495b3 26 days ago

958 Bytes

	import torch
	from diffusers import CogVideoXImageToVideoPipeline
	from diffusers.utils import export_to_video, load_image
	import os

	print("Starting...")

	# Lấy prompt từ biến môi trường hoặc sử dụng giá trị mặc định
	prompt = os.getenv('PROMPT_TEXT', "A little girl is riding a bicycle at high speed. Focused, detailed, realistic.")

	print("Loading model...")
	image = load_image(image="input.jpg")
	pipe = CogVideoXImageToVideoPipeline.from_pretrained(
	"THUDM/CogVideoX-5b-I2V",
	torch_dtype=torch.bfloat16
	)

	pipe.enable_sequential_cpu_offload()
	pipe.vae.enable_tiling()
	pipe.vae.enable_slicing()

	print("Generating...")
	video = pipe(
	prompt=prompt,
	image=image,
	num_videos_per_prompt=1,
	num_inference_steps=50,
	num_frames=49,
	guidance_scale=6,
	generator=torch.Generator(device="cuda").manual_seed(42),
	).frames[0]

	print("Saving video...")
	export_to_video(video, "output.mp4", fps=8)
	print("Done!")