pretrained_model_name_or_path: "checkpoints/stable-video-diffusion-img2vid-xt"
unet_checkpoint_path: "checkpoints/Sonic/unet.pth"
audio2token_checkpoint_path: "checkpoints/Sonic/audio2token.pth"
audio2bucket_checkpoint_path: "checkpoints/Sonic/audio2bucket.pth"

weight_dtype: 'fp16'  # [fp16, fp32]

num_inference_steps: 25
n_sample_frames: 25
fps: 12.5
decode_chunk_size: 8
motion_bucket_scale: 1.0
image_size: 512
area: 1.1
frame_num: 50
step: 2
overlap: 0
shift_offset: 7
min_appearance_guidance_scale: 2.0
max_appearance_guidance_scale: 2.0
audio_guidance_scale: 7.5
i2i_noise_strength: 1.0
ip_audio_scale: 1.0
noise_aug_strength: 0.00

use_interframe: True

seed: 72589