pretrained_model_name_or_path: "checkpoints/stable-video-diffusion-img2vid-xt" unet_checkpoint_path: "checkpoints/Sonic/unet.pth" audio2token_checkpoint_path: "checkpoints/Sonic/audio2token.pth" audio2bucket_checkpoint_path: "checkpoints/Sonic/audio2bucket.pth" weight_dtype: 'fp16' # [fp16, fp32] num_inference_steps: 25 n_sample_frames: 25 fps: 12.5 decode_chunk_size: 8 motion_bucket_scale: 1.0 image_size: 512 area: 1.1 frame_num: 50 step: 2 overlap: 0 shift_offset: 7 min_appearance_guidance_scale: 2.0 max_appearance_guidance_scale: 2.0 audio_guidance_scale: 7.5 i2i_noise_strength: 1.0 ip_audio_scale: 1.0 noise_aug_strength: 0.00 use_interframe: True seed: 72589