Spaces:

qihoo360
/

HiCo_T2I

Running

App Files Files Community

boomcheng commited on Nov 7, 2024

Commit

be7e4dd

verified ·

1 Parent(s): a8c917d

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -52

app.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import gradio as gr
 import numpy as np
-import random
 from PIL import Image
 import torch
 from diffusers import ControlNetModel, UniPCMultistepScheduler
@@ -10,45 +9,41 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
 # Initialize model
 controlnet = ControlNetModel.from_pretrained("qihoo360/HiCo_T2I", torch_dtype=torch.float16)
-print("ControlNet 模型加载完成！")
 pipe = StableDiffusionControlNetMultiLayoutPipeline.from_pretrained(
     "krnl/realisticVisionV51_v51VAE", controlnet=[controlnet], torch_dtype=torch.float16
 )
-print("Stable Diffusion 管道加载完成！")
 pipe = pipe.to(device)
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
 MAX_SEED = np.iinfo(np.int32).max
-# Function to dynamically update object input fields
-def update_object_inputs(num_objects):
-    captions = [gr.Textbox(label=f"Subcaption for Object {i+1}", placeholder=f"Enter caption for Object {i+1}") for i in range(num_objects)]
-    bbox_coords = [gr.Textbox(label=f"Bounding Box for Object {i+1} (x1, y1, x2, y2)", placeholder="e.g., 50, 50, 150, 150") for i in range(num_objects)]
-    return captions + bbox_coords
-# Inference function
-def infer(prompt, num_objects, subcaptions, bboxes, guidance_scale, num_inference_steps, seed):
-    obj_class = ["Background"] + subcaptions
-    obj_bbox = [[0, 0, 512, 512]] + [list(map(int, bbox.split(','))) for bbox in bboxes]
     img_width, img_height = 512, 512
     r_image = np.zeros((img_height, img_width, 3), dtype=np.uint8)
-    list_cond_image = [np.zeros_like(r_image, dtype=np.uint8)]
-    for bbox in obj_bbox[1:]:
-        x1, y1, x2, y2 = bbox
         cond_image = np.zeros_like(r_image, dtype=np.uint8)
         cond_image[y1:y2, x1:x2] = 255
-        list_cond_image.append(cond_image)
-    list_cond_image_pil = [Image.fromarray(img).convert('RGB') for img in list_cond_image]
-    if seed is None:
-        seed = random.randint(0, MAX_SEED)
     generator = torch.manual_seed(seed)
     image = pipe(
         prompt=prompt,
-        layo_prompt=obj_class,
         guess_mode=False,
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
@@ -62,42 +57,27 @@ def infer(prompt, num_objects, subcaptions, bboxes, guidance_scale, num_inferenc
 # Gradio UI
 with gr.Blocks() as demo:
-    gr.Markdown("# Text-to-Image with Layout Control")
-    # Global Caption and Object Number
     with gr.Row():
-        prompt = gr.Textbox(label="Global Caption", placeholder="Enter a global caption", value="123")
-        num_objects = gr.Slider(label="Number of Objects", minimum=1, maximum=5, step=1, value=1)
-    # Dynamic inputs for subcaptions and bounding boxes
-    subcaptions_column = gr.Column(visible=False)
-    bbox_column = gr.Column(visible=False)
-    # "确定" 按钮
-    confirm_button = gr.Button("确定")
-    # Update inputs when the "确定" button is clicked
-    def on_confirm_click(n):
-        inputs = update_object_inputs(n)
-        return {subcaptions_column: inputs[:n], bbox_column: inputs[n:], "visible": True}
-    confirm_button.click(on_confirm_click, inputs=num_objects, outputs=[subcaptions_column, bbox_column])
-    # Advanced settings
-    with gr.Accordion("Advanced Settings", open=False):
         guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=10.0, step=0.1, value=7.5)
         num_inference_steps = gr.Slider(label="Number of Inference Steps", minimum=1, maximum=50, step=1, value=50)
-        seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, interactive=True)
-    # Generate button and result image
-    generate_button = gr.Button("Generate Image")
-    result_image = gr.Image(label="Generated Image")
-    # Link button to inference function
-    generate_button.click(
-        fn=infer,
-        inputs=[prompt, num_objects, subcaptions_column, bbox_column, guidance_scale, num_inference_steps, seed],
-        outputs=[result_image, seed]
     )
 if __name__ == "__main__":

 import gradio as gr
 import numpy as np
 from PIL import Image
 import torch
 from diffusers import ControlNetModel, UniPCMultistepScheduler
 # Initialize model
 controlnet = ControlNetModel.from_pretrained("qihoo360/HiCo_T2I", torch_dtype=torch.float16)
+print("ControlNet 模型加载完成！")
 pipe = StableDiffusionControlNetMultiLayoutPipeline.from_pretrained(
     "krnl/realisticVisionV51_v51VAE", controlnet=[controlnet], torch_dtype=torch.float16
 )
+print("Stable Diffusion 管道加载完成！")
 pipe = pipe.to(device)
 pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
 MAX_SEED = np.iinfo(np.int32).max
+# Function to generate images based on user input
+def generate_user_data(object_classes, object_bboxes):
     img_width, img_height = 512, 512
     r_image = np.zeros((img_height, img_width, 3), dtype=np.uint8)
+    list_cond_image = []
+    for bbox in object_bboxes:
+        x1, y1, x2, y2 = map(int, bbox.split(","))
         cond_image = np.zeros_like(r_image, dtype=np.uint8)
         cond_image[y1:y2, x1:x2] = 255
+        list_cond_image.append(Image.fromarray(cond_image).convert('RGB'))
+    return object_classes.split(","), list_cond_image
+# Inference function
+def infer(prompt, guidance_scale, num_inference_steps, randomize_seed, seed, object_classes, object_bboxes):
+    obj_classes, list_cond_image_pil = generate_user_data(object_classes, object_bboxes)
+    if randomize_seed or seed is None:
+        seed = np.random.randint(0, MAX_SEED)
     generator = torch.manual_seed(seed)
     image = pipe(
         prompt=prompt,
+        layo_prompt=obj_classes,
         guess_mode=False,
         guidance_scale=guidance_scale,
         num_inference_steps=num_inference_steps,
 # Gradio UI
 with gr.Blocks() as demo:
+    gr.Markdown("# Text-to-Image Generator with Manual Input")
     with gr.Row():
+        prompt = gr.Textbox(label="Prompt", placeholder="Enter your prompt here")
+        object_classes = gr.Textbox(label="Object Classes (comma-separated)", placeholder="e.g., Object_1,Object_2")
+        object_bboxes = gr.Textbox(label="Bounding Boxes (format: x1,y1,x2,y2; separated by commas)", placeholder="e.g., 50,50,150,150")
+    with gr.Row():
         guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.0, maximum=10.0, step=0.1, value=7.5)
         num_inference_steps = gr.Slider(label="Number of Inference Steps", minimum=1, maximum=50, step=1, value=50)
+    randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
+    seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
+    run_button = gr.Button("Generate")
+    result = gr.Image(label="Generated Image")
+    run_button.click(
+        infer,
+        inputs=[prompt, guidance_scale, num_inference_steps, randomize_seed, seed, object_classes, object_bboxes],
+        outputs=[result, seed]
     )
 if __name__ == "__main__":