yeq6x commited on
Commit
96d2bbe
·
1 Parent(s): 7536520
Files changed (7) hide show
  1. .gitignore +2 -0
  2. README.md +3 -1
  3. convert_source_to_sketch.py +75 -0
  4. dataset_aug.py +250 -0
  5. lineart_util.py +67 -0
  6. requirements.txt +17 -0
  7. test_app.py +418 -0
.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ generate_prompt.py
2
+ output/
README.md CHANGED
@@ -10,4 +10,6 @@ pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
10
  license: apache-2.0
11
  ---
12
 
13
+ Check out hugging face spaces for demo!
14
+
15
+ https://huggingface.co/spaces/yeq6x/pair-images-aug-test
convert_source_to_sketch.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ python convert_source_to_scribble_xdog,py "input_folder" "output_folder"
3
+ 画像をスケッチ(線画)に変換する機能を提供します。
4
+ """
5
+ import argparse
6
+ import os
7
+ from lineart_util import scribble_xdog
8
+ from PIL import Image
9
+ import numpy as np
10
+ from tqdm import tqdm
11
+ import cv2
12
+
13
+ def convert(image_path):
14
+ """
15
+ 画像をスケッチに変換します。
16
+
17
+ Args:
18
+ image_path (str): 入力画像のパス
19
+
20
+ Returns:
21
+ str: 変換後の画像のパス
22
+ """
23
+ image = Image.open(image_path)
24
+ return convert_pil_to_sketch(image)
25
+
26
+ def convert_pil_to_sketch(image):
27
+ """
28
+ PIL.Imageをスケッチに変換します。
29
+
30
+ Args:
31
+ image (PIL.Image): 入力画像
32
+
33
+ Returns:
34
+ PIL.Image: 変換後の画像
35
+ """
36
+ input_width, input_height = image.size
37
+ image = np.array(image)
38
+ processed_image, _ = scribble_xdog(image, 2048, 16) # PIL.Image
39
+ processed_image = processed_image.resize((input_width, input_height))
40
+ # make PIL.Image to cv2 and INVERSE
41
+ processed_image = cv2.cvtColor(np.array(processed_image), cv2.COLOR_RGB2BGR)
42
+ processed_image = 255 - processed_image
43
+ return Image.fromarray(processed_image)
44
+
45
+ def process_images(input_folder, output_folder):
46
+ """
47
+ フォルダ内の画像を一括変換します。
48
+
49
+ Args:
50
+ input_folder (str): 入力フォルダのパス
51
+ output_folder (str): 出力フォルダのパス
52
+ """
53
+ # 入力フォルダ内の全ての画像ファイルを取得
54
+ image_files = [f for f in os.listdir(input_folder) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
55
+
56
+ # 出力フォルダを作成(存在しない場合)
57
+ os.makedirs(output_folder, exist_ok=True)
58
+
59
+ # 各画像ファイルを処理
60
+ for image_file in tqdm(image_files):
61
+ input_path = os.path.join(input_folder, image_file)
62
+ output_path = os.path.join(output_folder, image_file)
63
+
64
+ processed_image = convert_pil_to_sketch(Image.open(input_path))
65
+ processed_image.save(output_path)
66
+
67
+ if __name__ == '__main__':
68
+ import argparse
69
+
70
+ parser = argparse.ArgumentParser(description='指定したフォルダ内の全ての画像をscribble_xdogで処理し、出力フォルダに保存します。')
71
+ parser.add_argument('input_folder', type=str, help='入力フォルダのパス')
72
+ parser.add_argument('output_folder', type=str, help='出力フォルダのパス')
73
+
74
+ args = parser.parse_args()
75
+ process_images(args.input_folder, args.output_folder)
dataset_aug.py ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 画像の拡張処理を行うための関数群を提供します。
3
+
4
+ 1. 画像の平均色を計算する関数
5
+ 2. 画像を指定された角度で回転させ、平均色で余白を埋める関数
6
+ 3. 回転した画像から最大の長方形を切り出す関数
7
+ 4. ランダムな正方形を切り出す関数
8
+ """
9
+
10
+ import os
11
+ from PIL import Image, ImageStat, ImageOps
12
+ from collections import Counter
13
+ import random
14
+ import math
15
+ from tqdm import tqdm
16
+ import argparse
17
+
18
+ def get_average_color(image):
19
+ """画像の平均色を計算する"""
20
+ stat = ImageStat.Stat(image)
21
+ # 平均色を取得(RGB)
22
+ r, g, b = map(int, stat.mean)
23
+ return (r, g, b)
24
+
25
+ def get_edge_mode_color(img, edge_width=10):
26
+ """画像の外周の最頻値(mode)を取得する"""
27
+ # 外周の10ピクセル領域を取得
28
+ left = img.crop((0, 0, edge_width, img.height)) # 左端
29
+ right = img.crop((img.width - edge_width, 0, img.width, img.height)) # 右端
30
+ top = img.crop((0, 0, img.width, edge_width)) # 上端
31
+ bottom = img.crop((0, img.height - edge_width, img.width, img.height)) # 下端
32
+
33
+ # 各領域のピクセルデータを取得して結合
34
+ colors = list(left.getdata()) + list(right.getdata()) + list(top.getdata()) + list(bottom.getdata())
35
+ # 最頻値(mode)を計算
36
+ mode_color = Counter(colors).most_common(1)[0][0] # 最も頻繁に出現する色を取得
37
+
38
+ return mode_color
39
+
40
+ def rotate_image(image, angle, fill_color=(255, 255, 255)):
41
+ """画像を指定された角度で回転させ、指定された色で余白を埋める"""
42
+ return image.rotate(angle, expand=True, fillcolor=fill_color)
43
+
44
+ def crop_square(cropped_rect_image, left, top, crop_size):
45
+ """ランダムな正方形を切り出す"""
46
+ return cropped_rect_image.crop((left, top, left + crop_size, top + crop_size))
47
+
48
+ def apply_random_flip(image, is_horizontal):
49
+ """画像にランダムなフリップ(水平または垂直)を適用する"""
50
+ if is_horizontal:
51
+ return ImageOps.mirror(image) # 水平フリップ
52
+ return image
53
+
54
+ def process_image_pair(
55
+ source_image,
56
+ target_image,
57
+ output_size=(1024, 1024),
58
+ is_flip=False,
59
+ rotation_range=40,
60
+ min_scale=0.6,
61
+ max_scale=1.2,
62
+ source_is_avg_color_fill=False,
63
+ source_is_edge_mode_fill=False,
64
+ target_is_avg_color_fill=True,
65
+ target_is_edge_mode_fill=False,
66
+ expand_to_long_side=False
67
+ ):
68
+ """1組の画像に対して拡張処理を行う"""
69
+ orig_source_width, orig_source_height = source_image.size
70
+ orig_target_width, orig_target_height = target_image.size
71
+
72
+ # ソース画像の余白の色を決定
73
+ if source_is_edge_mode_fill:
74
+ source_fill_color = get_edge_mode_color(source_image, edge_width=10)
75
+ elif source_is_avg_color_fill:
76
+ source_fill_color = get_average_color(source_image)
77
+ else:
78
+ source_fill_color = (255, 255, 255)
79
+
80
+ # ターゲット画像の余白の色を決定
81
+ if target_is_edge_mode_fill:
82
+ target_fill_color = get_edge_mode_color(target_image, edge_width=10)
83
+ elif target_is_avg_color_fill:
84
+ target_fill_color = get_average_color(target_image)
85
+ else:
86
+ target_fill_color = (255, 255, 255)
87
+
88
+ base_source = source_image
89
+ base_target = target_image
90
+
91
+ # 長辺を基準にする場合の処理を追加
92
+ if expand_to_long_side:
93
+ # sourceの長辺を取得して正方形のキャンバスを作成
94
+ source_long_side = max(base_source.width, base_source.height)
95
+ source_canvas = Image.new("RGB", (source_long_side, source_long_side), source_fill_color)
96
+ # 中央に配置
97
+ source_paste_x = (source_long_side - base_source.width) // 2
98
+ source_paste_y = (source_long_side - base_source.height) // 2
99
+ source_canvas.paste(base_source, (source_paste_x, source_paste_y))
100
+ base_source = source_canvas
101
+
102
+ # targetも同様に処理
103
+ target_long_side = max(base_target.width, base_target.height)
104
+ target_canvas = Image.new("RGB", (target_long_side, target_long_side), target_fill_color)
105
+ target_paste_x = (target_long_side - base_target.width) // 2
106
+ target_paste_y = (target_long_side - base_target.height) // 2
107
+ target_canvas.paste(base_target, (target_paste_x, target_paste_y))
108
+ base_target = target_canvas
109
+
110
+ if rotation_range > 0:
111
+ angle = random.uniform(-rotation_range, rotation_range)
112
+ rotated_source = rotate_image(source_image, angle, source_fill_color)
113
+ rotated_target = rotate_image(target_image, angle, target_fill_color)
114
+ base_source = rotated_source
115
+ base_target = rotated_target
116
+
117
+ if is_flip:
118
+ is_horizontal = random.choice([True, False])
119
+ flipped_source = apply_random_flip(base_source, is_horizontal)
120
+ flipped_target = apply_random_flip(base_target, is_horizontal)
121
+ base_source = flipped_source
122
+ base_target = flipped_target
123
+
124
+ scale = random.uniform(min_scale, max_scale)
125
+ canvas_scale = 1/scale
126
+
127
+ if canvas_scale > 1.0:
128
+ # 新規画像(canvas)を作成し中心に画像を配置
129
+ scaled_source = Image.new("RGB", (int(base_source.width*canvas_scale), int(base_source.height*canvas_scale)), source_fill_color)
130
+ scaled_target = Image.new("RGB", (int(base_target.width*canvas_scale), int(base_target.height*canvas_scale)), target_fill_color)
131
+ scaled_source.paste(base_source, (int((scaled_source.width-base_source.width)/2), int((scaled_source.height-base_source.height)/2)))
132
+ scaled_target.paste(base_target, (int((scaled_target.width-base_target.width)/2), int((scaled_target.height-base_target.height)/2)))
133
+ else:
134
+ scaled_source = base_source
135
+ scaled_target = base_target
136
+
137
+ base_source_width, base_source_height = base_source.size
138
+ base_source_max_square_size = min(base_source_height, base_source_width)
139
+ crop_source_size = int(base_source_max_square_size * canvas_scale)
140
+
141
+ base_target_width, base_target_height = base_target.size
142
+ base_target_max_square_size = min(base_target_height, base_target_width)
143
+ crop_target_size = int(base_target_max_square_size * canvas_scale)
144
+
145
+ scaled_source_width, scaled_source_height = scaled_source.size
146
+ left_source = random.randint(0, scaled_source_width - crop_source_size)
147
+ top_source = random.randint(0, scaled_source_height - crop_source_size)
148
+
149
+ # sourceとtargetの位置合わせ. この場合、sourceとtargetのアスペクト比は同じと仮定
150
+ left_target = left_source * orig_target_width // orig_source_width
151
+ top_target = top_source * orig_target_height // orig_source_height
152
+
153
+ final_source = crop_square(scaled_source, left_source, top_source, crop_source_size).resize(output_size)
154
+ final_target = crop_square(scaled_target, left_target, top_target, crop_target_size).resize(output_size)
155
+
156
+ return final_source, final_target
157
+
158
+ def process_images(
159
+ source_img,
160
+ target_img,
161
+ num_copies,
162
+ output_size,
163
+ is_flip,
164
+ rotation_range,
165
+ min_scale,
166
+ max_scale,
167
+ source_is_avg_color_fill,
168
+ source_is_edge_mode_fill,
169
+ target_is_avg_color_fill,
170
+ target_is_edge_mode_fill,
171
+ expand_to_long_side
172
+ ):
173
+ aug_sources = []
174
+ aug_targets = []
175
+
176
+ for i in range(num_copies):
177
+ # 拡張処理を実行
178
+ aug_source, aug_target = process_image_pair(
179
+ source_img,
180
+ target_img,
181
+ output_size,
182
+ is_flip,
183
+ rotation_range,
184
+ min_scale,
185
+ max_scale,
186
+ source_is_avg_color_fill,
187
+ source_is_edge_mode_fill,
188
+ target_is_avg_color_fill,
189
+ target_is_edge_mode_fill,
190
+ expand_to_long_side
191
+ )
192
+
193
+ aug_sources.append(aug_source)
194
+ aug_targets.append(aug_target)
195
+
196
+ return aug_sources, aug_targets
197
+
198
+ if __name__ == '__main__':
199
+ args = argparse.ArgumentParser()
200
+ args.add_argument('source_folder', type=str, help='source画像フォルダのパス / Path to source image folder')
201
+ args.add_argument('target_folder', type=str, help='target画像フォルダのパス / Path to target image folder')
202
+ args.add_argument('output_folder', type=str, help='出力先フォルダのパス / Path to output folder')
203
+ args.add_argument('--output_size', '-s', type=int, default=1024,
204
+ help='出力画像の一辺のサイズ / Output image size')
205
+ args.add_argument('--num_copies', '-n', type=int, default=1,
206
+ help='出力画像の枚数 / Number of augmented copies')
207
+ args.add_argument('--is_flip', '-f', type=bool, default=True,
208
+ help='フリップを適用するかどうか / Whether to apply random flip')
209
+ args.add_argument('--rotation_range', '-r', type=int, default=0,
210
+ help='回転角度の範囲 / Range of rotation angle')
211
+ args.add_argument('--min_scale', '--ms', type=float, default=1.0,
212
+ help='最小の画像サイズ / Minimum scale of the image')
213
+ args.add_argument('--max_scale', '--xs', type=float, default=1.0,
214
+ help='最大の画像サイズ / Maximum scale of the image')
215
+ args.add_argument('--source_is_avg_color_fill', '--sa', type=bool, default=True,
216
+ help='source画像を平均色で余白を埋めるかどうか / Whether to fill source image padding with average color')
217
+ args.add_argument('--source_is_edge_mode_fill', '--se', type=bool, default=False,
218
+ help='source画像を外周の最頻値で余白を埋めるかどうか / Whether to fill source image padding with edge mode color')
219
+ args.add_argument('--target_is_avg_color_fill', '--ta', type=bool, default=False,
220
+ help='target画像を平均色で余白を埋めるかどうか / Whether to fill target image padding with average color')
221
+ args.add_argument('--target_is_edge_mode_fill', '--te', type=bool, default=False,
222
+ help='target画像を外周の最頻値で余白を埋めるかどうか / Whether to fill target image padding with edge mode color')
223
+ args.add_argument('--expand_to_long_side', '--el', type=bool, default=False,
224
+ help='長辺まで拡張して正方形にするかどうか / Whether to expand the image to a square using the long side')
225
+ args = args.parse_args()
226
+
227
+ output_path_source = os.path.join(args.output_folder, 'aug_source')
228
+ output_path_target = os.path.join(args.output_folder, 'aug_target')
229
+
230
+ # 画像処理
231
+ for image_name in tqdm(os.listdir(args.source_folder)):
232
+ if image_name.endswith('.jpg'):
233
+ source_path = os.path.join(args.source_folder, image_name)
234
+ target_path = os.path.join(args.target_folder, image_name)
235
+
236
+ process_images(
237
+ source_path,
238
+ target_path,
239
+ args.num_copies,
240
+ (args.output_size, args.output_size),
241
+ args.is_flip,
242
+ args.rotation_range,
243
+ args.min_scale,
244
+ args.max_scale,
245
+ args.source_is_avg_color_fill,
246
+ args.source_is_edge_mode_fill,
247
+ args.target_is_avg_color_fill,
248
+ args.target_is_edge_mode_fill,
249
+ args.expand_to_long_side
250
+ )
lineart_util.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from PIL import Image
4
+
5
+ def pad64(x):
6
+ return int(np.ceil(float(x) / 64.0) * 64 - x)
7
+
8
+ def HWC3(x):
9
+ assert x.dtype == np.uint8
10
+ if x.ndim == 2:
11
+ x = x[:, :, None]
12
+ assert x.ndim == 3
13
+ H, W, C = x.shape
14
+ assert C == 1 or C == 3 or C == 4
15
+ if C == 3:
16
+ return x
17
+ if C == 1:
18
+ return np.concatenate([x, x, x], axis=2)
19
+ if C == 4:
20
+ color = x[:, :, 0:3].astype(np.float32)
21
+ alpha = x[:, :, 3:4].astype(np.float32) / 255.0
22
+ y = color * alpha + 255.0 * (1.0 - alpha)
23
+ y = y.clip(0, 255).astype(np.uint8)
24
+ return y
25
+
26
+ def safer_memory(x):
27
+ # Fix many MAC/AMD problems
28
+ return np.ascontiguousarray(x.copy()).copy()
29
+
30
+ def resize_image_with_pad(input_image, resolution, skip_hwc3=False):
31
+ if skip_hwc3:
32
+ img = input_image
33
+ else:
34
+ img = HWC3(input_image)
35
+ H_raw, W_raw, _ = img.shape
36
+ k = float(resolution) / float(min(H_raw, W_raw))
37
+ interpolation = cv2.INTER_CUBIC if k > 1 else cv2.INTER_AREA
38
+ H_target = int(np.round(float(H_raw) * k))
39
+ W_target = int(np.round(float(W_raw) * k))
40
+ img = cv2.resize(img, (W_target, H_target), interpolation=interpolation)
41
+ H_pad, W_pad = pad64(H_target), pad64(W_target)
42
+ img_padded = np.pad(img, [[0, H_pad], [0, W_pad], [0, 0]], mode='edge')
43
+
44
+ def remove_pad(x):
45
+ return safer_memory(x[:H_target, :W_target])
46
+
47
+ return safer_memory(img_padded), remove_pad
48
+
49
+ def scribble_xdog(img, res=512, thr_a=32, **kwargs):
50
+ """
51
+ XDoGを使ってスケッチ画像を生成する
52
+ :param img: np.ndarray, 入力画像
53
+ :param res: int, 出力画像の解像度
54
+ :param thr_a: int, 閾値
55
+
56
+ Returns
57
+ -------
58
+ Image : PIL.Image
59
+ """
60
+ img, remove_pad = resize_image_with_pad(img, res)
61
+ g1 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 0.5)
62
+ g2 = cv2.GaussianBlur(img.astype(np.float32), (0, 0), 5.0)
63
+ dog = (255 - np.min(g2 - g1, axis=2)).clip(0, 255).astype(np.uint8)
64
+ result = np.zeros_like(img, dtype=np.uint8)
65
+ result[2 * (255 - dog) > thr_a] = 255
66
+ result = Image.fromarray(remove_pad(result))
67
+ return result, True
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diffusers>=0.5.1
2
+ numpy==1.23.4
3
+ wandb==0.13.4
4
+ torch
5
+ torchvision
6
+ transformers>=4.21.0
7
+ huggingface-hub>=0.10.0
8
+ Pillow==9.2.0
9
+ tqdm==4.64.1
10
+ ftfy==6.1.1
11
+ bitsandbytes
12
+ pynvml~=11.4.1
13
+ psutil~=5.9.0
14
+ accelerate==0.13.1
15
+ scipy==1.9.3
16
+ pybooru==4.2.2
17
+ webdataset==0.2.86
test_app.py ADDED
@@ -0,0 +1,418 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ from PIL import Image
4
+ import tempfile
5
+ from dataset_aug import process_images
6
+ import convert_source_to_sketch # スケッチ変換用のモジュールをインポート
7
+ import random
8
+
9
+ def process_multiple_images(
10
+ source_images,
11
+ target_images,
12
+ output_size,
13
+ num_copies,
14
+ is_flip,
15
+ rotation_range,
16
+ min_scale,
17
+ max_scale,
18
+ source_is_avg_color_fill,
19
+ source_is_edge_mode_fill,
20
+ target_is_avg_color_fill,
21
+ target_is_edge_mode_fill,
22
+ expand_to_long_side
23
+ ):
24
+ result_source_images = []
25
+ result_target_images = []
26
+
27
+ # 各画像ペアに対して処理を実行
28
+ for source_path, target_path in zip(source_images, target_images):
29
+ # PILイメージとして読み込み
30
+ source_img = Image.open(source_path.name)
31
+ target_img = Image.open(target_path.name)
32
+
33
+ # 拡張処理を実行し、PILイメージのリストを取得
34
+ aug_sources, aug_targets = process_images(
35
+ source_img,
36
+ target_img,
37
+ num_copies=num_copies,
38
+ output_size=(output_size, output_size),
39
+ is_flip=is_flip,
40
+ rotation_range=rotation_range,
41
+ min_scale=min_scale,
42
+ max_scale=max_scale,
43
+ source_is_avg_color_fill=source_is_avg_color_fill,
44
+ source_is_edge_mode_fill=source_is_edge_mode_fill,
45
+ target_is_avg_color_fill=target_is_avg_color_fill,
46
+ target_is_edge_mode_fill=target_is_edge_mode_fill,
47
+ expand_to_long_side=expand_to_long_side
48
+ )
49
+
50
+ # 生成された画像を収集
51
+ result_source_images.extend(aug_sources)
52
+ result_target_images.extend(aug_targets)
53
+
54
+ return result_source_images, result_target_images
55
+
56
+ def update_source_preview(source_files):
57
+ preview_images = []
58
+ if source_files:
59
+ for source in source_files:
60
+ preview_images.append(source.name)
61
+ return preview_images
62
+
63
+ def update_target_preview(target_files):
64
+ preview_images = []
65
+ if target_files:
66
+ for target in target_files:
67
+ preview_images.append(target.name)
68
+ return preview_images
69
+
70
+ def convert_to_sketch(source_files):
71
+ """sourceをスケッチに変換"""
72
+ converted_images = []
73
+ if source_files:
74
+ # 一時ディレクトリを作成(グローバルに保持)
75
+ temp_dir = tempfile.mkdtemp()
76
+ try:
77
+ for source in source_files:
78
+ # スケッチ変換処理
79
+ image = Image.open(source.name)
80
+ sketch = convert_source_to_sketch.convert_pil_to_sketch(image)
81
+
82
+ # 一時ファイルとして保存
83
+ temp_path = os.path.join(temp_dir, os.path.basename(source.name))
84
+ sketch.save(temp_path)
85
+ converted_images.append(temp_path)
86
+ except Exception as e:
87
+ print(f"Error during conversion: {e}")
88
+ # エラー時にも一時ディレクトリを削除
89
+ if os.path.exists(temp_dir):
90
+ import shutil
91
+ shutil.rmtree(temp_dir)
92
+ return []
93
+
94
+ return converted_images
95
+
96
+ # アプリケーション終了時のクリーンアップ処理を修正
97
+ def cleanup_temp_files():
98
+ """一時ファイルをクリーンアップ"""
99
+ temp_root = tempfile.gettempdir()
100
+ for item in os.listdir(temp_root):
101
+ if item.startswith('tmp'):
102
+ item_path = os.path.join(temp_root, item)
103
+ try:
104
+ if os.path.isdir(item_path):
105
+ # ディレクトリ内の画像ファイルをチェック
106
+ for root, dirs, files in os.walk(item_path):
107
+ for file in files:
108
+ if file.endswith(('.jpg', '.png')):
109
+ file_path = os.path.join(root, file)
110
+ try:
111
+ with Image.open(file_path) as img:
112
+ img.verify() # 画像ファイルの整合性チェック
113
+ except Exception as e:
114
+ print(f"Corrupted image found: {file_path} - {e}")
115
+
116
+ import shutil
117
+ shutil.rmtree(item_path)
118
+ except Exception as e:
119
+ print(f"Error cleaning up {item_path}: {e}")
120
+
121
+ def randomize_params():
122
+ """パラメータをランダムに設定"""
123
+ return (
124
+ random.choice([512, 768, 1024, 1536, 2048]), # output_size
125
+ random.randint(1, 5), # num_copies
126
+ random.choice([True, False]), # is_flip
127
+ random.randint(0, 180), # rotation_range
128
+ round(random.uniform(0.1, 1.0), 1), # min_scale
129
+ round(random.uniform(1.0, 2.0), 1), # max_scale
130
+ random.choice([True, False]), # source_is_avg_color_fill
131
+ random.choice([True, False]), # source_is_edge_mode_fill
132
+ random.choice([True, False]), # target_is_avg_color_fill
133
+ random.choice([True, False]), # target_is_edge_mode_fill
134
+ random.choice([True, False]) # expand_to_long_side
135
+ )
136
+
137
+ def reset_params():
138
+ """パラメータを初期設定に戻す"""
139
+ return (
140
+ 1024, # output_size
141
+ 1, # num_copies
142
+ True, # is_flip
143
+ 0, # rotation_range
144
+ 1.0, # min_scale
145
+ 1.0, # max_scale
146
+ True, # source_is_avg_color_fill
147
+ False, # source_is_edge_mode_fill
148
+ False, # target_is_avg_color_fill
149
+ False, # target_is_edge_mode_fill
150
+ False # expand_to_long_side
151
+ )
152
+
153
+ def test_process_image_pair_with_expand_to_long_side():
154
+ """長辺拡張オプションのテスト"""
155
+ # テスト用の画像を作成(長方形の画像)
156
+ source_image = Image.new('RGB', (800, 400), color='white')
157
+ target_image = Image.new('RGB', (800, 400), color='white')
158
+
159
+ result_source, result_target = process_image_pair(
160
+ source_image,
161
+ target_image,
162
+ output_size=(512, 512),
163
+ is_flip=False,
164
+ rotation_range=0,
165
+ min_scale=1.0,
166
+ max_scale=1.0,
167
+ source_is_avg_color_fill=True,
168
+ source_is_edge_mode_fill=False,
169
+ target_is_avg_color_fill=True,
170
+ target_is_edge_mode_fill=False,
171
+ expand_to_long_side=True # 長辺拡張を有効化
172
+ )
173
+
174
+ # 結果が正方形であることを確認
175
+ assert result_source.size[0] == result_source.size[1]
176
+ assert result_target.size[0] == result_target.size[1]
177
+
178
+ # 出力サイズが指定通りであることを確認
179
+ assert result_source.size == (512, 512)
180
+ assert result_target.size == (512, 512)
181
+
182
+ # Gradioインターフェースの作成
183
+ with gr.Blocks() as demo:
184
+ gr.Markdown("# データ拡張テスト")
185
+ gr.Markdown("Code : https://github.com/Yeq6X/pair-images-aug")
186
+
187
+ with gr.Row():
188
+ # 左側のカラム(Source画像とパラメータ)
189
+ with gr.Column():
190
+ with gr.Row():
191
+ # Source画像
192
+ source_files = gr.File(
193
+ label="Source画像を選択",
194
+ file_count="multiple",
195
+ file_types=["image"],
196
+ height=150
197
+ )
198
+ # Target画像
199
+ target_files = gr.File(
200
+ label="Target画像を選択",
201
+ file_count="multiple",
202
+ file_types=["image"],
203
+ height=150
204
+ )
205
+
206
+ # サンプル画像の追加
207
+ gr.Examples(
208
+ examples=[
209
+ [["samples/source/sample1.png", "samples/source/sample2.png"],
210
+ ["samples/target/sample1.png", "samples/target/sample2.png"]],
211
+ ],
212
+ inputs=[source_files, target_files],
213
+ label="サンプル画像セット",
214
+ examples_per_page=5
215
+ )
216
+
217
+ source_preview = gr.Gallery(
218
+ label="Source画像プレビュー",
219
+ show_label=True,
220
+ object_fit="contain",
221
+ columns=4,
222
+ height=300,
223
+ preview=True,
224
+ )
225
+ with gr.Row():
226
+ gr.Markdown("### scribble_xdogで変換")
227
+ convert_src_to_tgt_btn = gr.Button("↓", variant="primary", size="sm")
228
+ convert_tgt_to_src_btn = gr.Button("↑", variant="primary", size="sm")
229
+ gr.Markdown("")
230
+ target_preview = gr.Gallery(
231
+ label="Target画像プレビュー",
232
+ show_label=True,
233
+ object_fit="contain",
234
+ columns=4,
235
+ height=300,
236
+ preview=True
237
+ )
238
+
239
+ # 右側のカラム(Target画像と出力)
240
+ with gr.Column():
241
+ # パラメータ設定部分
242
+ with gr.Group():
243
+ gr.Markdown("### パラメータ設定")
244
+
245
+ # パラメータ操作ボタン
246
+ with gr.Row():
247
+ randomize_btn = gr.Button("🎲 ランダム設定", variant="secondary")
248
+ reset_btn = gr.Button("↺ 初期設定に戻す", variant="secondary")
249
+
250
+ with gr.Row():
251
+ with gr.Column():
252
+ output_size = gr.Slider(
253
+ minimum=256,
254
+ maximum=2048,
255
+ value=1024,
256
+ step=256,
257
+ label="出力画像サイズ"
258
+ )
259
+ num_copies = gr.Slider(
260
+ minimum=1,
261
+ maximum=5,
262
+ value=1,
263
+ step=1,
264
+ label="リピート回数"
265
+ )
266
+ is_flip = gr.Checkbox(
267
+ label="ランダムフリップを適用",
268
+ value=True
269
+ )
270
+ expand_to_long_side = gr.Checkbox(
271
+ label="長辺に合わせて拡張する",
272
+ value=False
273
+ )
274
+ rotation_range = gr.Slider(
275
+ minimum=0,
276
+ maximum=180,
277
+ value=0,
278
+ step=1,
279
+ label="回転角度の範囲"
280
+ )
281
+
282
+ with gr.Column():
283
+ min_scale = gr.Slider(
284
+ minimum=0.1,
285
+ maximum=1.0,
286
+ value=1.0,
287
+ step=0.1,
288
+ label="最小スケール"
289
+ )
290
+ max_scale = gr.Slider(
291
+ minimum=1.0,
292
+ maximum=2.0,
293
+ value=1.0,
294
+ step=0.1,
295
+ label="最大スケール"
296
+ )
297
+ with gr.Row():
298
+ with gr.Column():
299
+ source_is_edge_mode_fill = gr.Checkbox(
300
+ label="Source: 外周の最頻色で埋める",
301
+ value=False
302
+ )
303
+ source_is_avg_color_fill = gr.Checkbox(
304
+ label="Source: 画像の平均色で埋める",
305
+ value=True
306
+ )
307
+ with gr.Column():
308
+ target_is_edge_mode_fill = gr.Checkbox(
309
+ label="Target: 外周の最頻色で埋める",
310
+ value=False
311
+ )
312
+ target_is_avg_color_fill = gr.Checkbox(
313
+ label="Target: 画像の平均色で埋める",
314
+ value=False
315
+ )
316
+
317
+ process_btn = gr.Button("処理開始", variant="primary")
318
+
319
+ # 結果表示
320
+ result_source_gallery = gr.Gallery(
321
+ label="生成結果 (Source)",
322
+ show_label=True,
323
+ object_fit="contain",
324
+ columns=4,
325
+ height=250,
326
+ preview=True,
327
+ type="pil"
328
+ )
329
+ result_target_gallery = gr.Gallery(
330
+ label="生成結果 (Target)",
331
+ show_label=True,
332
+ object_fit="contain",
333
+ columns=4,
334
+ height=250,
335
+ preview=True,
336
+ type="pil"
337
+ )
338
+
339
+ # イベントハンドラ
340
+ source_files.change(
341
+ fn=update_source_preview,
342
+ inputs=[source_files],
343
+ outputs=source_preview
344
+ )
345
+
346
+ target_files.change(
347
+ fn=update_target_preview,
348
+ inputs=[target_files],
349
+ outputs=target_preview
350
+ )
351
+
352
+ convert_src_to_tgt_btn.click(
353
+ fn=convert_to_sketch,
354
+ inputs=[source_files],
355
+ outputs=[target_files]
356
+ )
357
+
358
+ convert_tgt_to_src_btn.click(
359
+ fn=convert_to_sketch,
360
+ inputs=[target_files],
361
+ outputs=[source_files]
362
+ )
363
+
364
+ param_outputs = [
365
+ output_size,
366
+ num_copies,
367
+ is_flip,
368
+ rotation_range,
369
+ min_scale,
370
+ max_scale,
371
+ source_is_avg_color_fill,
372
+ source_is_edge_mode_fill,
373
+ target_is_avg_color_fill,
374
+ target_is_edge_mode_fill,
375
+ expand_to_long_side
376
+ ]
377
+
378
+ randomize_btn.click(
379
+ fn=randomize_params,
380
+ inputs=[],
381
+ outputs=param_outputs
382
+ )
383
+
384
+ reset_btn.click(
385
+ fn=reset_params,
386
+ inputs=[],
387
+ outputs=param_outputs
388
+ )
389
+
390
+ process_btn.click(
391
+ fn=process_multiple_images,
392
+ inputs=[
393
+ source_files,
394
+ target_files,
395
+ output_size,
396
+ num_copies,
397
+ is_flip,
398
+ rotation_range,
399
+ min_scale,
400
+ max_scale,
401
+ source_is_avg_color_fill,
402
+ source_is_edge_mode_fill,
403
+ target_is_avg_color_fill,
404
+ target_is_edge_mode_fill,
405
+ expand_to_long_side
406
+ ],
407
+ outputs=[result_source_gallery, result_target_gallery]
408
+ )
409
+
410
+ if __name__ == "__main__":
411
+ try:
412
+ demo.launch(
413
+ # server_name="0.0.0.0",
414
+ # server_port=8000,
415
+ debug=True
416
+ )
417
+ finally:
418
+ cleanup_temp_files() # アプリケーション終了時にクリーンアップ