File size: 6,707 Bytes
959541f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# Copyright (2024) Bytedance Ltd. and/or its affiliates 
#
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.

from __future__ import annotations
import uuid

from loguru import logger
import hashlib
import gradio as gr

import io
import base64

from caller import (
    SeedT2ICaller,
    SeedEditCaller
)
from PIL import Image

API_KEY = ""
help_text = """
## How to use this Demo
1. Type-in the caption/instruction text box, and click "Generate" to generate an initial image using Seed_T2I_V14 (CFG and steps are not used here)
2. Type-in the caption/instruction text box, and click "Edit" to edit the current image.
3. Click Undo if you are not satisfied with the current results, and re-edit. Otherwise, edit will apply to current results.
4. Currently, we do not support too many rounds of editing [as shown in our video] since the current API hasn't been updated to the new model yet.

This is a demo with limited QPS and a simple interface.
For a better experience, please use Doubao/Dreamina APP.

<font size=2>Note: This demo is governed by the license of CC BY-NC \
We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content, \
including hate speech, violence, pornography, deception, etc. \
(注:本演示受CC BY-NC的许可协议限制。我们强烈建议,用户不应传播及不应允许他人传播以下内容,\
包括但不限于仇恨言论、暴力、色情、欺诈相关的有害信息。)
"""

def image2str(image):
    buf = io.BytesIO()
    image.save(buf, format="PNG")
    i_str = base64.b64encode(buf.getvalue()).decode()
    return f'<div style="float:left"><img src="data:image/png;base64, {i_str}"></div>'

def main():
    resolution = 1024
    max_edit_iter = 3 
    
    cfg_t2i = {
        "resolution": resolution
    }
    model_t2i = SeedT2ICaller(cfg_t2i)

    cfg_edit = cfg_t2i

    model_edit = SeedEditCaller(cfg_edit)
    logger.info("All models loaded")

    def generate_t2i(instruction: str, state):
        logger.info("Generate images ...")
        # 调用模型生成图像并捕获返回结果
        gen_image, success = model_t2i.generate(instruction, batch_size=1)

        # 检查生成是否成功以及生成的图像是否有效
        if not success or gen_image is None:
            logger.error("Image generation failed or returned None.")
            raise ValueError("Image generation was unsuccessful.")
                    
        # Write cache
        if state is None:
            state = {}

        output_md5 = hashlib.md5(gen_image.tobytes()).hexdigest()
        logger.info(output_md5)
        state[output_md5] = gen_image

        return instruction, gen_image, state

    def generate(prev_image, cur_image, cfg_scale, instruction, state):
        if len(state.keys()) >= max_edit_iter:
            return prev_image, cur_image, instruction, state

        try:
            if cur_image is None:
                cur_image = prev_image

            logger.info("Generating edited images ...")
            if not instruction:
                return prev_image, cur_image, instruction, state

            logger.info("Running diffusion models ...")

            image_out = f"./cache/{'-'.join(instruction.split()[:10])[:50]}_{uuid.uuid4()}.jpg"
            logger.info(f"Input size {cur_image.size}")

            edited_image, success = model_edit.edit(cur_image, instruction, batch_size=1, cfg_scale=cfg_scale, filename=image_out)
            if not success or edited_image is None:
                logger.error("Image generation failed or returned None.")
                raise ValueError("Image generation was unsuccessful.")

            output_md5 = hashlib.md5(edited_image.tobytes()).hexdigest()
            logger.info(f"EDIT adding {output_md5}")
            state[output_md5] = edited_image

            return cur_image, edited_image, instruction, state

        except Exception as e:
            logger.error(e)
            return prev_image, cur_image, instruction, state

    def reset():
        return 0.5, None, None, "", {}

    def undo(prev_image, cur_image, instruction, state):
        if cur_image is not None:
            cur_md5 = hashlib.md5(cur_image.tobytes()).hexdigest()
            if cur_md5 in state:
                logger.info(f"UNDO removing {cur_md5}")
                state.pop(cur_md5, None)
        return prev_image, prev_image, instruction, state

    def show_state(state):
        num_cache = len(state.keys())
        return f"Num Cache: {num_cache}" if num_cache < max_edit_iter else "Max edit number reached. Please reset for testing."

    with gr.Blocks(css="footer {visibility: hidden}") as demo:
        state = gr.State({})

        with gr.Row():
            with gr.Column(scale=2):
                prev_image = gr.Image(label="Input Image", type="pil", interactive=True, visible=False, height=resolution, width=resolution)
                cur_image = gr.Image(label="Edited Image", type="pil", interactive=True, height=resolution, width=resolution)

            with gr.Column(scale=1):
                with gr.Row():
                    generate_t2i_button = gr.Button("Generate")
                    generate_button = gr.Button("Edit")
                    reset_button = gr.Button("Reset")
                    undo_button = gr.Button("Undo")

                with gr.Row():
                    instruction = gr.Textbox(lines=1, label="Caption (Generate) / Instruction (Edit)", interactive=True)

                with gr.Row():
                    cfg_scale = gr.Slider(value=0.5, minimum=0.0, maximum=1.0, step=0.1, label="Edit Strength (CFG)", interactive=True)

                with gr.Row():
                    output_label = gr.Label()

        gr.Markdown(help_text)

        # Function bindings
        generate_t2i_button.click(generate_t2i, [instruction, state], [instruction, cur_image, state])
        generate_button.click(generate, [prev_image, cur_image, cfg_scale, instruction, state], [prev_image, cur_image, instruction, state])
        reset_button.click(reset, [], [cfg_scale, prev_image, cur_image, instruction, state])
        undo_button.click(undo, [prev_image, cur_image, instruction, state], [prev_image, cur_image, instruction, state])

        # Update state display
        generate_t2i_button.click(show_state, [state], output_label)
        generate_button.click(show_state, [state], output_label)
        reset_button.click(show_state, [state], output_label)
        undo_button.click(show_state, [state], output_label)

    demo.launch(server_name="0.0.0.0", server_port=8024)

if __name__ == "__main__":
    main()