MM-StoryAgent / configs /mm_story_agent.yaml
Xu Xuenan
Transformers MusicGen
676ec69
raw
history blame
1.59 kB
audio_sample_rate: &audio_sample_rate 16000
audio_codec: mp3 # [mp3, aac, ...]
story_setting:
story_topic: "learn to use computer"
main_role: "(no main role specified)"
scene: "(no scene specified)"
story_gen_config:
max_conv_turns: 3
num_outline: 4
temperature: 0.5
caption_config:
font: resources/font/msyh.ttf
# bg_color: LightGrey
fontsize: 32
color: white
# stroke_color: white
# stroke_width: 0.5
max_single_caption_length: 50
sound_generation:
call_cfg:
guidance_scale: 3.5
seed: 0
ddim_steps: 100
n_candidate_per_text: 3
revise_cfg:
num_turns: 3
sample_rate: *audio_sample_rate
speech_generation:
call_cfg:
voice: longyuan
sample_rate: *audio_sample_rate
image_generation:
revise_cfg:
num_turns: 3
obj_cfg:
model_name: stabilityai/stable-diffusion-xl-base-1.0
id_length: 1
height: 512
width: 1024
call_cfg:
seed: 112536
guidance_scale: 10.0
style_name: "Storybook" # ['(No style)', 'Japanese Anime', 'Digital/Oil Painting', 'Pixar/Disney Character',
# 'Photographic', 'Comic book', 'Line art', 'Black and White Film Noir', 'Isometric Rooms']
music_generation:
revise_cfg:
num_turns: 3
obj_cfg: {}
call_cfg: {}
slideshow_effect:
fade_duration: 0.8
slide_duration: 0.4
zoom_speed: 0.5
move_ratio: 0.9
sound_volume: 0.6
music_volume: 0.5
bg_speech_ratio: 0.6
fps: 8