HiCo_T2I / app.py
boomcheng's picture
Update app.py
722243d verified
raw
history blame
4.53 kB
import gradio as gr
import numpy as np
import random
from PIL import Image
import torch
from diffusers import ControlNetModel, UniPCMultistepScheduler
from hico_pipeline import StableDiffusionControlNetMultiLayoutPipeline
device = "cuda" if torch.cuda.is_available() else "cpu"
# Initialize model
controlnet = ControlNetModel.from_pretrained("qihoo360/HiCo_T2I", torch_dtype=torch.float16)
pipe = StableDiffusionControlNetMultiLayoutPipeline.from_pretrained(
"krnl/realisticVisionV51_v51VAE", controlnet=[controlnet], torch_dtype=torch.float16
)
pipe = pipe.to(device)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
MAX_SEED = np.iinfo(np.int32).max
# Function for generating dummy bounding box and label data
def generate_dummy_data():
# Generate random image size
img_width, img_height = 512, 512
r_image = np.zeros((img_height, img_width, 3), dtype=np.uint8)
# Generate random bounding boxes and labels
num_objects = random.randint(1, 5)
r_obj_bbox = []
r_obj_class = ["Object"]
list_cond_image = []
for _ in range(num_objects):
x1, y1 = random.randint(0, img_width // 2), random.randint(0, img_height // 2)
x2, y2 = random.randint(x1, img_width), random.randint(y1, img_height)
r_obj_bbox.append([x1, y1, x2, y2])
cond_image = np.zeros_like(r_image, dtype=np.uint8)
cond_image[y1:y2, x1:x2] = 255
list_cond_image.append(cond_image)
r_obj_bbox.insert(0, [0, 0, img_width, img_height]) # Add background
r_obj_class.insert(0, "Background")
list_cond_image.insert(0, np.zeros_like(r_image, dtype=np.uint8)) # Add full background
obj_cond_image = np.stack(list_cond_image, axis=0)
list_cond_image_pil = [Image.fromarray(img).convert('RGB') for img in list_cond_image]
return r_obj_class, r_obj_bbox, list_cond_image_pil, obj_cond_image
# Inference function
def infer(
prompt, guidance_scale, num_inference_steps, randomize_seed, seed=None
):
# Generate dummy data for demonstration
r_obj_class, r_obj_bbox, list_cond_image_pil, _ = generate_dummy_data()
if randomize_seed or seed is None:
seed = random.randint(0, MAX_SEED)
generator = torch.manual_seed(seed)
# Run inference
image = pipe(
prompt=prompt,
layo_prompt=r_obj_class,
guess_mode=False,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
image=list_cond_image_pil,
fuse_type="avg",
width=512,
height=512
).images[0]
return image, seed
examples = [
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
"An astronaut riding a green horse",
"A delicious ceviche cheesecake slice",
]
css = """
#col-container {
margin: 0 auto;
max-width: 640px;
}
"""
# Gradio UI
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(" # Text-to-Image Gradio Template")
with gr.Row():
prompt = gr.Text(
label="Prompt",
show_label=False,
max_lines=1,
placeholder="Enter your prompt",
container=False,
)
run_button = gr.Button("Run", scale=0, variant="primary")
result = gr.Image(label="Result", show_label=False)
with gr.Accordion("Advanced Settings", open=False):
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance scale",
minimum=0.0,
maximum=10.0,
step=0.1,
value=7.5,
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=1,
maximum=50,
step=1,
value=50,
)
gr.Examples(examples=examples, inputs=[prompt])
run_button.click(
fn=infer,
inputs=[
prompt,
guidance_scale,
num_inference_steps,
randomize_seed,
seed,
],
outputs=[result, seed],
)
if __name__ == "__main__":
demo.launch()