Bingsu commited on
Commit
79b8418
·
1 Parent(s): 3f76664

Upload files: v0.1.1.dev0

Browse files
Files changed (7) hide show
  1. README.md +1 -1
  2. asdff/__init__.py +9 -0
  3. asdff/__version__.py +1 -0
  4. asdff/sd.py +123 -0
  5. asdff/utils.py +70 -0
  6. asdff/yolo.py +73 -0
  7. pipeline.py +1 -0
README.md CHANGED
@@ -25,4 +25,4 @@ result = pipe(common=common)
25
  images = result[0]
26
  ```
27
 
28
- github: https://github.com/Bing-su/asdff
 
25
  images = result[0]
26
  ```
27
 
28
+ github: https://github.com/Bing-su/asdff
asdff/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from .__version__ import __version__
2
+ from .sd import AdPipeline
3
+ from .yolo import yolo_detector
4
+
5
+ __all__ = [
6
+ "AdPipeline",
7
+ "yolo_detector",
8
+ "__version__",
9
+ ]
asdff/__version__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ __version__ = "0.1.1.dev0"
asdff/sd.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from functools import cached_property
4
+ from typing import Any, Callable, Iterable, List, Mapping, Optional
5
+
6
+ from diffusers import StableDiffusionInpaintPipeline, StableDiffusionPipeline
7
+ from diffusers.utils import logging
8
+ from PIL import Image
9
+
10
+ from asdff.utils import (
11
+ ADOutput,
12
+ bbox_padding,
13
+ composite,
14
+ mask_dilate,
15
+ mask_gaussian_blur,
16
+ )
17
+ from asdff.yolo import yolo_detector
18
+
19
+ logger = logging.get_logger("diffusers")
20
+
21
+
22
+ DetectorType = Callable[[Image.Image], Optional[List[Image.Image]]]
23
+
24
+
25
+ def ordinal(n: int) -> str:
26
+ d = {1: "st", 2: "nd", 3: "rd"}
27
+ return str(n) + ("th" if 11 <= n % 100 <= 13 else d.get(n % 10, "th"))
28
+
29
+
30
+ class AdPipeline(StableDiffusionPipeline):
31
+ @cached_property
32
+ def inpaint_pipeline(self):
33
+ return StableDiffusionInpaintPipeline(
34
+ vae=self.vae,
35
+ text_encoder=self.text_encoder,
36
+ tokenizer=self.tokenizer,
37
+ unet=self.unet,
38
+ scheduler=self.scheduler,
39
+ safety_checker=self.safety_checker,
40
+ feature_extractor=self.feature_extractor,
41
+ requires_safety_checker=self.config.requires_safety_checker,
42
+ )
43
+
44
+ def __call__( # noqa: C901
45
+ self,
46
+ common: Mapping[str, Any] | None = None,
47
+ txt2img_only: Mapping[str, Any] | None = None,
48
+ inpaint_only: Mapping[str, Any] | None = None,
49
+ detectors: DetectorType | Iterable[DetectorType] | None = None,
50
+ mask_dilation: int = 4,
51
+ mask_blur: int = 4,
52
+ mask_padding: int = 32,
53
+ ):
54
+ if common is None:
55
+ common = {}
56
+ if txt2img_only is None:
57
+ txt2img_only = {}
58
+ if inpaint_only is None:
59
+ inpaint_only = {}
60
+ if "strength" not in inpaint_only:
61
+ inpaint_only = {**inpaint_only, "strength": 0.4}
62
+
63
+ if detectors is None:
64
+ detectors = [self.default_detector]
65
+ elif callable(detectors):
66
+ detectors = [detectors]
67
+
68
+ txt2img_output = super().__call__(**common, **txt2img_only, output_type="pil")
69
+ txt2img_images: list[Image.Image] = txt2img_output[0]
70
+
71
+ init_images = []
72
+ final_images = []
73
+
74
+ for i, init_image in enumerate(txt2img_images):
75
+ init_images.append(init_image.copy())
76
+ final_image = None
77
+
78
+ for j, detector in enumerate(detectors):
79
+ masks = detector(init_image)
80
+ if masks is None:
81
+ logger.info(
82
+ f"No object detected on {ordinal(i + 1)} image with {ordinal(j + 1)} detector."
83
+ )
84
+ continue
85
+
86
+ for k, mask in enumerate(masks):
87
+ mask = mask.convert("L")
88
+ mask = mask_dilate(mask, mask_dilation)
89
+ bbox = mask.getbbox()
90
+ if bbox is None:
91
+ logger.info(f"No object in {ordinal(k + 1)} mask.")
92
+ continue
93
+ mask = mask_gaussian_blur(mask, mask_blur)
94
+ bbox_padded = bbox_padding(bbox, init_image.size, mask_padding)
95
+
96
+ crop_image = init_image.crop(bbox_padded)
97
+ crop_mask = mask.crop(bbox_padded)
98
+
99
+ inpaint_output = self.inpaint_pipeline(
100
+ **common,
101
+ **inpaint_only,
102
+ image=crop_image,
103
+ mask_image=crop_mask,
104
+ num_images_per_prompt=1,
105
+ output_type="pil",
106
+ )
107
+ inpaint_image: Image.Image = inpaint_output[0][0]
108
+ final_image = composite(
109
+ init=init_image,
110
+ mask=mask,
111
+ gen=inpaint_image,
112
+ bbox_padded=bbox_padded,
113
+ )
114
+ init_image = final_image
115
+
116
+ if final_image is not None:
117
+ final_images.append(final_image)
118
+
119
+ return ADOutput(images=final_images, init_images=init_images)
120
+
121
+ @property
122
+ def default_detector(self) -> Callable[..., list[Image.Image] | None]:
123
+ return yolo_detector
asdff/utils.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import cv2
6
+ import numpy as np
7
+ from diffusers.utils import BaseOutput
8
+ from PIL import Image, ImageFilter, ImageOps
9
+
10
+
11
+ @dataclass
12
+ class ADOutput(BaseOutput):
13
+ images: list[Image.Image]
14
+ init_images: list[Image.Image]
15
+
16
+
17
+ def mask_dilate(image: Image.Image, value: int = 4) -> Image.Image:
18
+ if value <= 0:
19
+ return image
20
+
21
+ arr = np.array(image)
22
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (value, value))
23
+ dilated = cv2.dilate(arr, kernel, iterations=1)
24
+ return Image.fromarray(dilated)
25
+
26
+
27
+ def mask_gaussian_blur(image: Image.Image, value: int = 4) -> Image.Image:
28
+ if value <= 0:
29
+ return image
30
+
31
+ blur = ImageFilter.GaussianBlur(value)
32
+ return image.filter(blur)
33
+
34
+
35
+ def bbox_padding(
36
+ bbox: tuple[int, int, int, int], image_size: tuple[int, int], value: int = 32
37
+ ) -> tuple[int, int, int, int]:
38
+ if value <= 0:
39
+ return bbox
40
+
41
+ arr = np.array(bbox).reshape(2, 2)
42
+ arr[0] -= value
43
+ arr[1] += value
44
+ arr = np.clip(arr, (0, 0), image_size)
45
+ return tuple(arr.flatten())
46
+
47
+
48
+ def composite(
49
+ init: Image.Image,
50
+ mask: Image.Image,
51
+ gen: Image.Image,
52
+ bbox_padded: tuple[int, int, int, int],
53
+ ) -> Image.Image:
54
+ img_masked = Image.new("RGBa", init.size)
55
+ img_masked.paste(
56
+ init.convert("RGBA").convert("RGBa"),
57
+ mask=ImageOps.invert(mask),
58
+ )
59
+ img_masked = img_masked.convert("RGBA")
60
+
61
+ size = (
62
+ bbox_padded[2] - bbox_padded[0],
63
+ bbox_padded[3] - bbox_padded[1],
64
+ )
65
+ resized = gen.resize(size)
66
+
67
+ output = Image.new("RGBA", init.size)
68
+ output.paste(resized, bbox_padded)
69
+ output.alpha_composite(img_masked)
70
+ return output.convert("RGB")
asdff/yolo.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import numpy as np
4
+ import torch
5
+ from huggingface_hub import hf_hub_download
6
+ from PIL import Image, ImageDraw
7
+ from torchvision.transforms.functional import to_pil_image
8
+ from ultralytics import YOLO
9
+
10
+
11
+ def create_mask_from_bbox(
12
+ bboxes: np.ndarray, shape: tuple[int, int]
13
+ ) -> list[Image.Image]:
14
+ """
15
+ Parameters
16
+ ----------
17
+ bboxes: list[list[float]]
18
+ list of [x1, y1, x2, y2]
19
+ bounding boxes
20
+ shape: tuple[int, int]
21
+ shape of the image (width, height)
22
+
23
+ Returns
24
+ -------
25
+ masks: list[Image.Image]
26
+ A list of masks
27
+
28
+ """
29
+ masks = []
30
+ for bbox in bboxes:
31
+ mask = Image.new("L", shape, "black")
32
+ mask_draw = ImageDraw.Draw(mask)
33
+ mask_draw.rectangle(bbox, fill="white")
34
+ masks.append(mask)
35
+ return masks
36
+
37
+
38
+ def mask_to_pil(masks: torch.Tensor, shape: tuple[int, int]) -> list[Image.Image]:
39
+ """
40
+ Parameters
41
+ ----------
42
+ masks: torch.Tensor, dtype=torch.float32, shape=(N, H, W).
43
+ The device can be CUDA, but `to_pil_image` takes care of that.
44
+
45
+ shape: tuple[int, int]
46
+ (width, height) of the original image
47
+
48
+ Returns
49
+ -------
50
+ images: list[Image.Image]
51
+ """
52
+ n = masks.shape[0]
53
+ return [to_pil_image(masks[i], mode="L").resize(shape) for i in range(n)]
54
+
55
+
56
+ def yolo_detector(
57
+ image: Image.Image, model_path: str | None = None, confidence: float = 0.3
58
+ ) -> list[Image.Image] | None:
59
+ if not model_path:
60
+ model_path = hf_hub_download("Bingsu/adetailer", "face_yolov8n.pt")
61
+ model = YOLO(model_path)
62
+ pred = model(image, conf=confidence)
63
+
64
+ bboxes = pred[0].boxes.xyxy.cpu().numpy()
65
+ if bboxes.size == 0:
66
+ return None
67
+
68
+ if pred[0].masks is None:
69
+ masks = create_mask_from_bbox(bboxes, image.size)
70
+ else:
71
+ masks = mask_to_pil(pred[0].masks.data, image.size)
72
+
73
+ return masks
pipeline.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from asdff import AdPipeline # noqa: F401