audio_sample_rate: &audio_sample_rate 16000 audio_codec: mp3 # [mp3, aac, ...] story_setting: story_topic: "learn to use computer" main_role: "(no main role specified)" scene: "(no scene specified)" story_gen_config: max_conv_turns: 3 num_outline: 4 temperature: 0.5 caption_config: font: resources/font/msyh.ttf # bg_color: LightGrey fontsize: 32 color: white # stroke_color: white # stroke_width: 0.5 max_single_caption_length: 50 sound_generation: call_cfg: guidance_scale: 3.5 seed: 0 ddim_steps: 100 n_candidate_per_text: 3 revise_cfg: num_turns: 3 sample_rate: *audio_sample_rate speech_generation: call_cfg: voice: longyuan sample_rate: *audio_sample_rate image_generation: revise_cfg: num_turns: 3 obj_cfg: model_name: stabilityai/stable-diffusion-xl-base-1.0 id_length: 1 height: 512 width: 1024 call_cfg: seed: 112536 guidance_scale: 10.0 style_name: "Storybook" # ['(No style)', 'Japanese Anime', 'Digital/Oil Painting', 'Pixar/Disney Character', # 'Photographic', 'Comic book', 'Line art', 'Black and White Film Noir', 'Isometric Rooms'] music_generation: revise_cfg: num_turns: 3 obj_cfg: {} call_cfg: {} slideshow_effect: fade_duration: 0.8 slide_duration: 0.4 zoom_speed: 0.5 move_ratio: 0.9 sound_volume: 0.6 music_volume: 0.5 bg_speech_ratio: 0.6 fps: 8