File size: 3,089 Bytes
07cf1be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# Diffusers' ControlNet Implementation Subjective Evaluation
import einops
import numpy as np
import torch
import sys
import os
import yaml
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, DDIMScheduler
from PIL import Image
test_prompt = "best quality, extremely detailed"
test_negative_prompt = "lowres, bad anatomy, worst quality, low quality"
def make_image_condition(image, image_mask=None):
image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
if image_mask is not None:
image_mask = np.array(image_mask.convert("L"))
assert (
image.shape[0:1] == image_mask.shape[0:1]
), "image and image_mask must have the same image size"
image[image_mask < 128] = -1.0 # set as masked pixel
image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
image = torch.from_numpy(image)
return image
def generate_image(seed, prompt, negative_prompt, control, guess_mode=False):
latent = torch.randn(
(1, 4, 64, 64),
device="cpu",
generator=torch.Generator(device="cpu").manual_seed(seed),
).cuda()
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
guidance_scale=4.0 if guess_mode else 9.0,
num_inference_steps=50 if guess_mode else 20,
latents=latent,
image=control,
# guess_mode=guess_mode,
).images[0]
return image
if __name__ == "__main__":
model_name = "p_sd15_inpaint"
original_image_folder = "./control_images/"
control_image_folder = "./control_images/converted/"
output_image_folder = "./output_images/diffusers/"
os.makedirs(output_image_folder, exist_ok=True)
model_id = f"lllyasviel/control_v11{model_name}"
controlnet = ControlNetModel.from_pretrained(model_id)
if model_name == "p_sd15s2_lineart_anime":
base_model_id = "Linaqruf/anything-v3.0"
base_model_revision = None
else:
base_model_id = "runwayml/stable-diffusion-v1-5"
base_model_revision = "non-ema"
pipe = StableDiffusionControlNetPipeline.from_pretrained(
base_model_id,
revision=base_model_revision,
controlnet=controlnet,
safety_checker=None,
).to("cuda")
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
original_image_filenames = [
"pexels-sound-on-3760767_512x512.png",
"vermeer_512x512.png",
"bird_512x512.png",
]
inpaint_image_conditions = [
make_image_condition(
Image.open(f"{original_image_folder}{fn}"),
Image.open(f"{original_image_folder}mask_512x512.png"),
)
for fn in original_image_filenames
]
for i, control in enumerate(inpaint_image_conditions):
for seed in range(4):
image = generate_image(
seed=seed,
prompt=test_prompt,
negative_prompt=test_negative_prompt,
control=control,
)
image.save(f"{output_image_folder}output_{model_name}_{i}_{seed}.png")
|