audio_sample_rate: &audio_sample_rate 16000
audio_codec: mp3 # [mp3, aac, ...]


story_setting:
    story_topic: "learn to use computer"
    main_role: "(no main role specified)"
    scene: "(no scene specified)"

story_gen_config:
    max_conv_turns: 3
    num_outline: 4
    temperature: 0.5

caption_config:
    font: resources/font/msyh.ttf
    # bg_color: LightGrey
    fontsize: 32
    color: white
    # stroke_color: white
    # stroke_width: 0.5
max_single_caption_length: 50

sound_generation:
    call_cfg:
        guidance_scale: 3.5
        seed: 0
        ddim_steps: 100
        n_candidate_per_text: 3
    revise_cfg:
        num_turns: 3
    sample_rate: *audio_sample_rate


speech_generation:
    call_cfg:
        voice: longyuan
        sample_rate: *audio_sample_rate


image_generation:
    revise_cfg:
        num_turns: 3
    obj_cfg:
        model_name: stabilityai/stable-diffusion-xl-base-1.0
        id_length: 1
        height: 512
        width: 1024
    call_cfg:
        seed: 112536
        guidance_scale: 10.0
        style_name: "Storybook" # ['(No style)', 'Japanese Anime', 'Digital/Oil Painting', 'Pixar/Disney Character', 
                                        #  'Photographic', 'Comic book', 'Line art', 'Black and White Film Noir', 'Isometric Rooms']

music_generation:
    revise_cfg:
        num_turns: 3
    obj_cfg: {}
    call_cfg: {}

slideshow_effect:
    fade_duration: 0.8
    slide_duration: 0.4
    zoom_speed: 0.5
    move_ratio: 0.9
    
    sound_volume: 0.6
    music_volume: 0.5
    bg_speech_ratio: 0.6

    fps: 8