import torch from diffusers import ControlNetModel from PIL import Image from torchvision import transforms import internals.util.image as ImageUtils from carvekit.api import high from internals.data.result import Result from internals.pipelines.commons import AbstractPipeline, Text2Img from internals.pipelines.controlnets import ControlNet from internals.pipelines.demofusion_sdxl import DemoFusionSDXLControlNetPipeline from internals.pipelines.high_res import HighRes from internals.util.commons import download_image from internals.util.config import get_base_dimension controlnet = ControlNet() class SDXLTileUpscaler(AbstractPipeline): def create(self, high_res: HighRes, pipeline: Text2Img, model_id: int): # temporal hack for upscale model till multicontrolnet support is added model = ( "thibaud/controlnet-openpose-sdxl-1.0" if int(model_id) == 2000293 else "diffusers/controlnet-canny-sdxl-1.0" ) controlnet = ControlNetModel.from_pretrained(model, torch_dtype=torch.float16) pipe = DemoFusionSDXLControlNetPipeline( **pipeline.pipe.components, controlnet=controlnet ) pipe = pipe.to("cuda") pipe.enable_vae_tiling() pipe.enable_vae_slicing() pipe.enable_xformers_memory_efficient_attention() self.high_res = high_res self.pipe = pipe def process( self, prompt: str, imageUrl: str, resize_dimension: int, negative_prompt: str, width: int, height: int, model_id: int, ): if int(model_id) == 2000293: condition_image = controlnet.detect_pose(imageUrl) else: condition_image = download_image(imageUrl) condition_image = ControlNet.canny_detect_edge(condition_image) img = download_image(imageUrl).resize((width, height)) img = ImageUtils.resize_image(img, get_base_dimension()) condition_image = condition_image.resize(img.size) img2 = self.__resize_for_condition_image(img, resize_dimension) image_lr = self.load_and_process_image(img) print("img", img2.size, img.size) if int(model_id) == 2000173: kwargs = { "prompt": prompt, "negative_prompt": negative_prompt, "image": img2, "strength": 0.3, "num_inference_steps": 30, } images = self.high_res.pipe.__call__(**kwargs).images else: images = self.pipe.__call__( image_lr=image_lr, prompt=prompt, condition_image=condition_image, negative_prompt="blurry, ugly, duplicate, poorly drawn, deformed, mosaic", guidance_scale=11, sigma=0.8, num_inference_steps=24, width=img2.size[0], height=img2.size[1], ) images = images[::-1] return images, False def load_and_process_image(self, pil_image): transform = transforms.Compose( [ transforms.Resize((1024, 1024)), transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ] ) image = transform(pil_image) image = image.unsqueeze(0).half() image = image.to("cuda") return image def __resize_for_condition_image(self, image: Image.Image, resolution: int): input_image = image.convert("RGB") W, H = input_image.size k = float(resolution) / max(W, H) H *= k W *= k H = int(round(H / 64.0)) * 64 W = int(round(W / 64.0)) * 64 img = input_image.resize((W, H), resample=Image.LANCZOS) return img