# Diffusers' ControlNet Implementation Subjective Evaluation # https://github.com/takuma104/diffusers/tree/controlnet import einops import numpy as np import torch import sys from diffusers import StableDiffusionControlNetPipeline from PIL import Image test_prompt = "best quality, extremely detailed" test_negative_prompt = "lowres, bad anatomy, worst quality, low quality" def generate_image(seed, control): latent = torch.randn((1,4,64,64), device="cpu", generator=torch.Generator(device="cpu").manual_seed(seed)).cuda() image = pipe( prompt=test_prompt, negative_prompt=test_negative_prompt, guidance_scale=9.0, num_inference_steps=20, latents=latent, #generator=torch.Generator(device="cuda").manual_seed(seed), image=control, ).images[0] return image if __name__ == '__main__': model_name = sys.argv[1] control_image_folder = '../huggingface/controlnet_dev/gen_compare/control_images/converted/' output_image_folder = '../huggingface/controlnet_dev/gen_compare/output_images/diffusers/' model_id = f'../huggingface/control_sd15_{model_name}' pipe = StableDiffusionControlNetPipeline.from_pretrained(model_id).to("cuda") pipe.enable_attention_slicing(1) image_types = {'bird', 'human', 'room', 'vermeer'} for image_type in image_types: control_image = Image.open(f'{control_image_folder}control_{image_type}_{model_name}.png') control = np.array(control_image)[:,:,::-1].copy() control = torch.from_numpy(control).float().cuda() / 255.0 control = torch.stack([control for _ in range(1)], dim=0) control = einops.rearrange(control, 'b h w c -> b c h w').clone() for seed in range(4): image = generate_image(seed=seed, control=control) image.save(f'{output_image_folder}output_{image_type}_{model_name}_{seed}.png')