|
import gradio as gr |
|
from gradio_imageslider import ImageSlider |
|
import torch |
|
from diffusers import DiffusionPipeline, AutoencoderKL |
|
from PIL import Image |
|
from torchvision import transforms |
|
import numpy as np |
|
import tempfile |
|
import os |
|
import uuid |
|
|
|
TORCH_COMPILE = os.getenv("TORCH_COMPILE", "0") == "1" |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
dtype = torch.float16 |
|
|
|
vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=dtype) |
|
pipe = DiffusionPipeline.from_pretrained( |
|
"stabilityai/stable-diffusion-xl-base-1.0", |
|
custom_pipeline="pipeline_demofusion_sdxl.py", |
|
custom_revision="main", |
|
torch_dtype=dtype, |
|
variant="fp16", |
|
use_safetensors=True, |
|
vae=vae, |
|
) |
|
pipe = pipe.to(device) |
|
if TORCH_COMPILE: |
|
pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) |
|
|
|
|
|
def load_and_process_image(pil_image): |
|
transform = transforms.Compose( |
|
[ |
|
transforms.Resize((1024, 1024)), |
|
transforms.ToTensor(), |
|
transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), |
|
] |
|
) |
|
image = transform(pil_image) |
|
image = image.unsqueeze(0).half() |
|
return image |
|
|
|
|
|
def pad_image(image): |
|
w, h = image.size |
|
if w == h: |
|
return image |
|
elif w > h: |
|
new_image = Image.new(image.mode, (w, w), (0, 0, 0)) |
|
pad_w = 0 |
|
pad_h = (w - h) // 2 |
|
new_image.paste(image, (0, pad_h)) |
|
return new_image |
|
else: |
|
new_image = Image.new(image.mode, (h, h), (0, 0, 0)) |
|
pad_w = (h - w) // 2 |
|
pad_h = 0 |
|
new_image.paste(image, (pad_w, 0)) |
|
return new_image |
|
|
|
|
|
def predict( |
|
input_image, |
|
prompt, |
|
negative_prompt, |
|
seed, |
|
scale=2, |
|
progress=gr.Progress(track_tqdm=True), |
|
): |
|
if input_image is None: |
|
raise gr.Error("Please upload an image.") |
|
padded_image = pad_image(input_image).resize((1024, 1024)) |
|
padded_image.save(f"padded_image+{seed}.jpg") |
|
image_lr = load_and_process_image(padded_image).to(device) |
|
generator = torch.manual_seed(seed) |
|
images = pipe( |
|
prompt, |
|
negative_prompt=negative_prompt, |
|
image_lr=image_lr, |
|
width=1024 * scale, |
|
height=1024 * scale, |
|
view_batch_size=16, |
|
stride=64, |
|
generator=generator, |
|
num_inference_steps=25, |
|
guidance_scale=7.5, |
|
cosine_scale_1=3, |
|
cosine_scale_2=1, |
|
cosine_scale_3=1, |
|
sigma=0.8, |
|
multi_decoder=True, |
|
show_image=False, |
|
lowvram=True, |
|
) |
|
images_path = tempfile.mkdtemp() |
|
paths = [] |
|
uuid_name = uuid.uuid4() |
|
for i, img in enumerate(images): |
|
img.save(images_path + f"/img_{uuid_name}_{img.size[0]}.jpg") |
|
paths.append(images_path + f"/img_{uuid_name}_{img.size[0]}.jpg") |
|
return (images[0], images[-1]), paths |
|
|
|
|
|
css = """ |
|
#intro{ |
|
max-width: 100%; |
|
text-align: center; |
|
margin: 0 auto; |
|
} |
|
""" |
|
|
|
with gr.Blocks(css=css) as demo: |
|
gr.Markdown( |
|
"""# Super Resolution - SDXL |
|
## [DemoFusion](https://github.com/PRIS-CV/DemoFusion)""", |
|
elem_id="intro", |
|
) |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
image_input = gr.Image(type="pil", label="Input Image") |
|
prompt = gr.Textbox( |
|
label="Prompt", |
|
info="The prompt is very important to get the desired results. Please try to describe the image as best as you can.", |
|
) |
|
negative_prompt = gr.Textbox( |
|
label="Negative Prompt", |
|
value="blurry, ugly, duplicate, poorly drawn, deformed, mosaic", |
|
) |
|
scale = gr.Slider(minimum=2, maximum=5, value=2, step=1, label="x Scale") |
|
seed = gr.Slider( |
|
minimum=0, |
|
maximum=2**64 - 1, |
|
value=1415926535897932, |
|
step=1, |
|
label="Seed", |
|
randomize=True, |
|
) |
|
btn = gr.Button() |
|
with gr.Column(scale=2): |
|
image_slider = ImageSlider() |
|
files = gr.Files() |
|
inputs = [image_input, prompt, negative_prompt, seed, scale] |
|
outputs = [image_slider, files] |
|
btn.click(predict, inputs=inputs, outputs=outputs, concurrency_limit=1) |
|
gr.Examples( |
|
fn=predict, |
|
examples=[ |
|
[ |
|
"./examples/lara.jpeg", |
|
"photography of lara croft 8k high definition award winning", |
|
"blurry, ugly, duplicate, poorly drawn, deformed, mosaic", |
|
1415535897932, |
|
2, |
|
], |
|
[ |
|
"./examples/cybetruck.jpeg", |
|
"photo of tesla cybertruck futuristic car 8k high definition on a sand dune in mars, future", |
|
"blurry, ugly, duplicate, poorly drawn, deformed, mosaic", |
|
1415535897932, |
|
2, |
|
], |
|
[ |
|
"./examples/jesus.png", |
|
"a photorealistic painting of Jesus Christ, 4k high definition", |
|
"blurry, ugly, duplicate, poorly drawn, deformed, mosaic", |
|
1415535897932, |
|
2, |
|
], |
|
], |
|
inputs=inputs, |
|
outputs=outputs, |
|
cache_examples=True, |
|
) |
|
|
|
|
|
demo.queue(api_open=False) |
|
demo.launch(show_api=False) |
|
|