Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -25,7 +25,8 @@ from insightface.app import FaceAnalysis
|
|
25 |
from pipeline_controlnet_xs_sd_xl_instantid import StableDiffusionXLInstantIDXSPipeline, UNetControlNetXSModel
|
26 |
|
27 |
from utils.controlnet_xs import ControlNetXSAdapter
|
28 |
-
|
|
|
29 |
|
30 |
import gradio as gr
|
31 |
|
@@ -38,7 +39,8 @@ hf_hub_download(repo_id="RED-AIGC/InstantID-XS", filename="image_proj.bin", loca
|
|
38 |
MAX_SEED = np.iinfo(np.int32).max
|
39 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
40 |
weight_dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
|
41 |
-
|
|
|
42 |
|
43 |
base_model = 'frankjoshua/realvisxlV40_v40Bakedvae'
|
44 |
vae_path = 'madebyollin/sdxl-vae-fp16-fix'
|
@@ -101,20 +103,6 @@ pipe.unet.config.ctrl_learn_time_embedding = True
|
|
101 |
pipe = pipe.to(device)
|
102 |
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
def toggle_lcm_ui(value):
|
107 |
-
if value:
|
108 |
-
return (
|
109 |
-
gr.update(minimum=0, maximum=100, step=1, value=5),
|
110 |
-
gr.update(minimum=0.1, maximum=20.0, step=0.1, value=1.5),
|
111 |
-
)
|
112 |
-
else:
|
113 |
-
return (
|
114 |
-
gr.update(minimum=5, maximum=100, step=1, value=30),
|
115 |
-
gr.update(minimum=0.1, maximum=20.0, step=0.1, value=5),
|
116 |
-
)
|
117 |
-
|
118 |
def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
|
119 |
if randomize_seed:
|
120 |
seed = random.randint(0, MAX_SEED)
|
@@ -128,30 +116,47 @@ def get_example():
|
|
128 |
[
|
129 |
"./examples/1.jpg",
|
130 |
None,
|
131 |
-
"
|
|
|
132 |
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
133 |
],
|
134 |
[
|
135 |
"./examples/1.jpg",
|
136 |
-
"./examples/pose/pose1.
|
137 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
139 |
],
|
140 |
]
|
141 |
return case
|
142 |
|
143 |
-
def run_for_examples(face_file, pose_file, prompt, negative_prompt):
|
144 |
return generate_image(
|
145 |
face_file,
|
146 |
pose_file,
|
|
|
147 |
prompt,
|
148 |
negative_prompt,
|
149 |
20, # num_steps
|
150 |
-
0.
|
151 |
-
0.8, #
|
152 |
-
0.8, # pose_strength
|
153 |
5.0, # guidance_scale
|
154 |
42, # seed
|
|
|
155 |
)
|
156 |
|
157 |
def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
@@ -190,15 +195,7 @@ def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,2
|
|
190 |
out_img_pil = PIL.Image.fromarray(out_img.astype(np.uint8))
|
191 |
return out_img_pil
|
192 |
|
193 |
-
def resize_img(
|
194 |
-
input_image,
|
195 |
-
max_side=1280,
|
196 |
-
min_side=1024,
|
197 |
-
size=None,
|
198 |
-
pad_to_max_side=False,
|
199 |
-
mode=PIL.Image.BILINEAR,
|
200 |
-
base_pixel_number=64,
|
201 |
-
):
|
202 |
w, h = input_image.size
|
203 |
if size is not None:
|
204 |
w_resize_new, h_resize_new = size
|
@@ -221,11 +218,15 @@ def resize_img(
|
|
221 |
input_image = Image.fromarray(res)
|
222 |
return input_image
|
223 |
|
|
|
|
|
|
|
224 |
|
225 |
@spaces.GPU
|
226 |
def generate_image(
|
227 |
face_image_path,
|
228 |
-
pose_image_path
|
|
|
229 |
prompt,
|
230 |
negative_prompt,
|
231 |
num_steps,
|
@@ -238,15 +239,13 @@ def generate_image(
|
|
238 |
):
|
239 |
|
240 |
if face_image_path is None:
|
241 |
-
raise gr.Error(
|
242 |
-
f"Cannot find any input face image! Please upload the face image"
|
243 |
-
)
|
244 |
|
245 |
if prompt is None:
|
246 |
prompt = "a person"
|
247 |
|
248 |
# apply the style template
|
249 |
-
|
250 |
|
251 |
face_image = load_image(face_image_path)
|
252 |
face_image = resize_img(face_image, max_side=max_side)
|
@@ -258,9 +257,7 @@ def generate_image(
|
|
258 |
face_info = app.get(face_image_cv2)
|
259 |
|
260 |
if len(face_info) == 0:
|
261 |
-
raise gr.Error(
|
262 |
-
f"Unable to detect a face in the image. Please upload a different photo with a clear face."
|
263 |
-
)
|
264 |
|
265 |
face_info = sorted(
|
266 |
face_info,
|
@@ -314,7 +311,6 @@ tips = r"""
|
|
314 |
1. If you're not satisfied with the similarity, try increasing the weight of "ControlNet strength" and "Adapter Strength."
|
315 |
2. If you feel that the similarity is not high, you can increase the adapter strength appropriately.
|
316 |
3. If you want to achieve a pose image as similar as possible, please increase the ControlNet strength appropriately.
|
317 |
-
4. If you find that realistic style is not good enough, go for our Github repo and use a more realistic base model.
|
318 |
"""
|
319 |
css = """
|
320 |
.gradio-container {width: 85% !important}
|
@@ -338,22 +334,24 @@ with gr.Blocks(css=css) as demo:
|
|
338 |
label="Prompt",
|
339 |
info="Give simple prompt is enough to achieve good face fidelity",
|
340 |
placeholder="A photo of a person",
|
341 |
-
value="
|
342 |
)
|
343 |
|
344 |
submit = gr.Button("Submit", variant="primary")
|
345 |
-
# enable_LCM = gr.Checkbox(
|
346 |
-
# label="Enable Fast Inference with LCM", value=enable_lcm_arg,
|
347 |
-
# info="LCM speeds up the inference step, the trade-off is the quality of the generated image. It performs better with portrait face images rather than distant faces",
|
348 |
-
# )
|
349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
# strength
|
351 |
controlnet_conditioning_scale = gr.Slider(
|
352 |
label="ControlNet strength (for pose)",
|
353 |
minimum=0.0,
|
354 |
maximum=1.0,
|
355 |
step=0.1,
|
356 |
-
value=0.
|
357 |
)
|
358 |
adapter_strength_ratio = gr.Slider(
|
359 |
label="Adapter strength (for fidelity)",
|
@@ -418,6 +416,7 @@ with gr.Blocks(css=css) as demo:
|
|
418 |
inputs=[
|
419 |
face_file,
|
420 |
pose_file,
|
|
|
421 |
prompt,
|
422 |
negative_prompt,
|
423 |
num_steps,
|
@@ -430,16 +429,9 @@ with gr.Blocks(css=css) as demo:
|
|
430 |
outputs=[gallery, usage_tips],
|
431 |
)
|
432 |
|
433 |
-
# enable_LCM.input(
|
434 |
-
# fn=toggle_lcm_ui,
|
435 |
-
# inputs=[enable_LCM],
|
436 |
-
# outputs=[num_steps, guidance_scale],
|
437 |
-
# queue=False,
|
438 |
-
# )
|
439 |
-
|
440 |
gr.Examples(
|
441 |
examples=get_example(),
|
442 |
-
inputs=[face_file, pose_file, prompt, negative_prompt],
|
443 |
fn=run_for_examples,
|
444 |
outputs=[gallery, usage_tips],
|
445 |
cache_examples=True,
|
|
|
25 |
from pipeline_controlnet_xs_sd_xl_instantid import StableDiffusionXLInstantIDXSPipeline, UNetControlNetXSModel
|
26 |
|
27 |
from utils.controlnet_xs import ControlNetXSAdapter
|
28 |
+
from style import styles
|
29 |
+
|
30 |
|
31 |
import gradio as gr
|
32 |
|
|
|
39 |
MAX_SEED = np.iinfo(np.int32).max
|
40 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
41 |
weight_dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
|
42 |
+
STYLE_NAMES = list(styles.keys())
|
43 |
+
DEFAULT_STYLE_NAME = "Ordinary"
|
44 |
|
45 |
base_model = 'frankjoshua/realvisxlV40_v40Bakedvae'
|
46 |
vae_path = 'madebyollin/sdxl-vae-fp16-fix'
|
|
|
103 |
pipe = pipe.to(device)
|
104 |
|
105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
|
107 |
if randomize_seed:
|
108 |
seed = random.randint(0, MAX_SEED)
|
|
|
116 |
[
|
117 |
"./examples/1.jpg",
|
118 |
None,
|
119 |
+
"Ordinary",
|
120 |
+
"a woman",
|
121 |
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
122 |
],
|
123 |
[
|
124 |
"./examples/1.jpg",
|
125 |
+
"./examples/pose/pose1.jpg",
|
126 |
+
"Hanfu",
|
127 |
+
"a woman",
|
128 |
+
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
129 |
+
],
|
130 |
+
[
|
131 |
+
"./examples/2.jpg",
|
132 |
+
"./examples/pose/pose2.png",
|
133 |
+
"ZangZu",
|
134 |
+
"a woman",
|
135 |
+
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
136 |
+
],
|
137 |
+
[
|
138 |
+
"./examples/3.png",
|
139 |
+
"./examples/pose/pose3.png",
|
140 |
+
"QingQiu",
|
141 |
+
"a woman",
|
142 |
"(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
|
143 |
],
|
144 |
]
|
145 |
return case
|
146 |
|
147 |
+
def run_for_examples(face_file, pose_file, style, prompt, negative_prompt, ):
|
148 |
return generate_image(
|
149 |
face_file,
|
150 |
pose_file,
|
151 |
+
style,
|
152 |
prompt,
|
153 |
negative_prompt,
|
154 |
20, # num_steps
|
155 |
+
0.9, # ControlNet strength
|
156 |
+
0.8, # Adapter strength
|
|
|
157 |
5.0, # guidance_scale
|
158 |
42, # seed
|
159 |
+
1280, # max side
|
160 |
)
|
161 |
|
162 |
def convert_from_cv2_to_image(img: np.ndarray) -> Image:
|
|
|
195 |
out_img_pil = PIL.Image.fromarray(out_img.astype(np.uint8))
|
196 |
return out_img_pil
|
197 |
|
198 |
+
def resize_img(input_image,max_side=1280,min_side=1024,size=None,pad_to_max_side=False,mode=PIL.Image.BILINEAR,base_pixel_number=64,):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
w, h = input_image.size
|
200 |
if size is not None:
|
201 |
w_resize_new, h_resize_new = size
|
|
|
218 |
input_image = Image.fromarray(res)
|
219 |
return input_image
|
220 |
|
221 |
+
def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
|
222 |
+
p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
|
223 |
+
return p.replace("{prompt}", positive), n + ' ' + negative
|
224 |
|
225 |
@spaces.GPU
|
226 |
def generate_image(
|
227 |
face_image_path,
|
228 |
+
pose_image_path
|
229 |
+
style_name,
|
230 |
prompt,
|
231 |
negative_prompt,
|
232 |
num_steps,
|
|
|
239 |
):
|
240 |
|
241 |
if face_image_path is None:
|
242 |
+
raise gr.Error(f"Cannot find any input face image! Please upload the face image")
|
|
|
|
|
243 |
|
244 |
if prompt is None:
|
245 |
prompt = "a person"
|
246 |
|
247 |
# apply the style template
|
248 |
+
prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
|
249 |
|
250 |
face_image = load_image(face_image_path)
|
251 |
face_image = resize_img(face_image, max_side=max_side)
|
|
|
257 |
face_info = app.get(face_image_cv2)
|
258 |
|
259 |
if len(face_info) == 0:
|
260 |
+
raise gr.Error(f"Unable to detect a face in the image. Please upload a different photo with a clear face.")
|
|
|
|
|
261 |
|
262 |
face_info = sorted(
|
263 |
face_info,
|
|
|
311 |
1. If you're not satisfied with the similarity, try increasing the weight of "ControlNet strength" and "Adapter Strength."
|
312 |
2. If you feel that the similarity is not high, you can increase the adapter strength appropriately.
|
313 |
3. If you want to achieve a pose image as similar as possible, please increase the ControlNet strength appropriately.
|
|
|
314 |
"""
|
315 |
css = """
|
316 |
.gradio-container {width: 85% !important}
|
|
|
334 |
label="Prompt",
|
335 |
info="Give simple prompt is enough to achieve good face fidelity",
|
336 |
placeholder="A photo of a person",
|
337 |
+
value="realistic, symmetrical hyperdetailed texture, masterpiece, enhanced details, perfect composition, authentic, natural posture",
|
338 |
)
|
339 |
|
340 |
submit = gr.Button("Submit", variant="primary")
|
|
|
|
|
|
|
|
|
341 |
|
342 |
+
style = gr.Dropdown(
|
343 |
+
label="Style",
|
344 |
+
choices=STYLE_NAMES,
|
345 |
+
value=DEFAULT_STYLE_NAME
|
346 |
+
)
|
347 |
+
|
348 |
# strength
|
349 |
controlnet_conditioning_scale = gr.Slider(
|
350 |
label="ControlNet strength (for pose)",
|
351 |
minimum=0.0,
|
352 |
maximum=1.0,
|
353 |
step=0.1,
|
354 |
+
value=0.9,
|
355 |
)
|
356 |
adapter_strength_ratio = gr.Slider(
|
357 |
label="Adapter strength (for fidelity)",
|
|
|
416 |
inputs=[
|
417 |
face_file,
|
418 |
pose_file,
|
419 |
+
style,
|
420 |
prompt,
|
421 |
negative_prompt,
|
422 |
num_steps,
|
|
|
429 |
outputs=[gallery, usage_tips],
|
430 |
)
|
431 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
432 |
gr.Examples(
|
433 |
examples=get_example(),
|
434 |
+
inputs=[face_file, pose_file, style, prompt, negative_prompt],
|
435 |
fn=run_for_examples,
|
436 |
outputs=[gallery, usage_tips],
|
437 |
cache_examples=True,
|