XuDongZhou commited on
Commit
e9cfa60
·
verified ·
1 Parent(s): cdd07e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -55
app.py CHANGED
@@ -25,7 +25,8 @@ from insightface.app import FaceAnalysis
25
  from pipeline_controlnet_xs_sd_xl_instantid import StableDiffusionXLInstantIDXSPipeline, UNetControlNetXSModel
26
 
27
  from utils.controlnet_xs import ControlNetXSAdapter
28
- # from controlnet_aux import OpenposeDetector
 
29
 
30
  import gradio as gr
31
 
@@ -38,7 +39,8 @@ hf_hub_download(repo_id="RED-AIGC/InstantID-XS", filename="image_proj.bin", loca
38
  MAX_SEED = np.iinfo(np.int32).max
39
  device = "cuda" if torch.cuda.is_available() else "cpu"
40
  weight_dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
41
-
 
42
 
43
  base_model = 'frankjoshua/realvisxlV40_v40Bakedvae'
44
  vae_path = 'madebyollin/sdxl-vae-fp16-fix'
@@ -101,20 +103,6 @@ pipe.unet.config.ctrl_learn_time_embedding = True
101
  pipe = pipe.to(device)
102
 
103
 
104
-
105
-
106
- def toggle_lcm_ui(value):
107
- if value:
108
- return (
109
- gr.update(minimum=0, maximum=100, step=1, value=5),
110
- gr.update(minimum=0.1, maximum=20.0, step=0.1, value=1.5),
111
- )
112
- else:
113
- return (
114
- gr.update(minimum=5, maximum=100, step=1, value=30),
115
- gr.update(minimum=0.1, maximum=20.0, step=0.1, value=5),
116
- )
117
-
118
  def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
119
  if randomize_seed:
120
  seed = random.randint(0, MAX_SEED)
@@ -128,30 +116,47 @@ def get_example():
128
  [
129
  "./examples/1.jpg",
130
  None,
131
- "a woman, daily wear, realistic, symmetrical hyperdetailed texture, masterpiece, enhanced details, perfect composition, authentic, natural posture",
 
132
  "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
133
  ],
134
  [
135
  "./examples/1.jpg",
136
- "./examples/pose/pose1.png",
137
- "a woman, daily wear, realistic, symmetrical hyperdetailed texture, masterpiece, enhanced details, perfect composition, authentic, natural posture",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
139
  ],
140
  ]
141
  return case
142
 
143
- def run_for_examples(face_file, pose_file, prompt, negative_prompt):
144
  return generate_image(
145
  face_file,
146
  pose_file,
 
147
  prompt,
148
  negative_prompt,
149
  20, # num_steps
150
- 0.8, # identitynet_strength_ratio
151
- 0.8, # adapter_strength_ratio
152
- 0.8, # pose_strength
153
  5.0, # guidance_scale
154
  42, # seed
 
155
  )
156
 
157
  def convert_from_cv2_to_image(img: np.ndarray) -> Image:
@@ -190,15 +195,7 @@ def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,2
190
  out_img_pil = PIL.Image.fromarray(out_img.astype(np.uint8))
191
  return out_img_pil
192
 
193
- def resize_img(
194
- input_image,
195
- max_side=1280,
196
- min_side=1024,
197
- size=None,
198
- pad_to_max_side=False,
199
- mode=PIL.Image.BILINEAR,
200
- base_pixel_number=64,
201
- ):
202
  w, h = input_image.size
203
  if size is not None:
204
  w_resize_new, h_resize_new = size
@@ -221,11 +218,15 @@ def resize_img(
221
  input_image = Image.fromarray(res)
222
  return input_image
223
 
 
 
 
224
 
225
  @spaces.GPU
226
  def generate_image(
227
  face_image_path,
228
- pose_image_path,
 
229
  prompt,
230
  negative_prompt,
231
  num_steps,
@@ -238,15 +239,13 @@ def generate_image(
238
  ):
239
 
240
  if face_image_path is None:
241
- raise gr.Error(
242
- f"Cannot find any input face image! Please upload the face image"
243
- )
244
 
245
  if prompt is None:
246
  prompt = "a person"
247
 
248
  # apply the style template
249
- # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
250
 
251
  face_image = load_image(face_image_path)
252
  face_image = resize_img(face_image, max_side=max_side)
@@ -258,9 +257,7 @@ def generate_image(
258
  face_info = app.get(face_image_cv2)
259
 
260
  if len(face_info) == 0:
261
- raise gr.Error(
262
- f"Unable to detect a face in the image. Please upload a different photo with a clear face."
263
- )
264
 
265
  face_info = sorted(
266
  face_info,
@@ -314,7 +311,6 @@ tips = r"""
314
  1. If you're not satisfied with the similarity, try increasing the weight of "ControlNet strength" and "Adapter Strength."
315
  2. If you feel that the similarity is not high, you can increase the adapter strength appropriately.
316
  3. If you want to achieve a pose image as similar as possible, please increase the ControlNet strength appropriately.
317
- 4. If you find that realistic style is not good enough, go for our Github repo and use a more realistic base model.
318
  """
319
  css = """
320
  .gradio-container {width: 85% !important}
@@ -338,22 +334,24 @@ with gr.Blocks(css=css) as demo:
338
  label="Prompt",
339
  info="Give simple prompt is enough to achieve good face fidelity",
340
  placeholder="A photo of a person",
341
- value="a woman, daily wear, realistic, symmetrical hyperdetailed texture, masterpiece, enhanced details, perfect composition, authentic, natural posture",
342
  )
343
 
344
  submit = gr.Button("Submit", variant="primary")
345
- # enable_LCM = gr.Checkbox(
346
- # label="Enable Fast Inference with LCM", value=enable_lcm_arg,
347
- # info="LCM speeds up the inference step, the trade-off is the quality of the generated image. It performs better with portrait face images rather than distant faces",
348
- # )
349
 
 
 
 
 
 
 
350
  # strength
351
  controlnet_conditioning_scale = gr.Slider(
352
  label="ControlNet strength (for pose)",
353
  minimum=0.0,
354
  maximum=1.0,
355
  step=0.1,
356
- value=0.8,
357
  )
358
  adapter_strength_ratio = gr.Slider(
359
  label="Adapter strength (for fidelity)",
@@ -418,6 +416,7 @@ with gr.Blocks(css=css) as demo:
418
  inputs=[
419
  face_file,
420
  pose_file,
 
421
  prompt,
422
  negative_prompt,
423
  num_steps,
@@ -430,16 +429,9 @@ with gr.Blocks(css=css) as demo:
430
  outputs=[gallery, usage_tips],
431
  )
432
 
433
- # enable_LCM.input(
434
- # fn=toggle_lcm_ui,
435
- # inputs=[enable_LCM],
436
- # outputs=[num_steps, guidance_scale],
437
- # queue=False,
438
- # )
439
-
440
  gr.Examples(
441
  examples=get_example(),
442
- inputs=[face_file, pose_file, prompt, negative_prompt],
443
  fn=run_for_examples,
444
  outputs=[gallery, usage_tips],
445
  cache_examples=True,
 
25
  from pipeline_controlnet_xs_sd_xl_instantid import StableDiffusionXLInstantIDXSPipeline, UNetControlNetXSModel
26
 
27
  from utils.controlnet_xs import ControlNetXSAdapter
28
+ from style import styles
29
+
30
 
31
  import gradio as gr
32
 
 
39
  MAX_SEED = np.iinfo(np.int32).max
40
  device = "cuda" if torch.cuda.is_available() else "cpu"
41
  weight_dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
42
+ STYLE_NAMES = list(styles.keys())
43
+ DEFAULT_STYLE_NAME = "Ordinary"
44
 
45
  base_model = 'frankjoshua/realvisxlV40_v40Bakedvae'
46
  vae_path = 'madebyollin/sdxl-vae-fp16-fix'
 
103
  pipe = pipe.to(device)
104
 
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
107
  if randomize_seed:
108
  seed = random.randint(0, MAX_SEED)
 
116
  [
117
  "./examples/1.jpg",
118
  None,
119
+ "Ordinary",
120
+ "a woman",
121
  "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
122
  ],
123
  [
124
  "./examples/1.jpg",
125
+ "./examples/pose/pose1.jpg",
126
+ "Hanfu",
127
+ "a woman",
128
+ "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
129
+ ],
130
+ [
131
+ "./examples/2.jpg",
132
+ "./examples/pose/pose2.png",
133
+ "ZangZu",
134
+ "a woman",
135
+ "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
136
+ ],
137
+ [
138
+ "./examples/3.png",
139
+ "./examples/pose/pose3.png",
140
+ "QingQiu",
141
+ "a woman",
142
  "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
143
  ],
144
  ]
145
  return case
146
 
147
+ def run_for_examples(face_file, pose_file, style, prompt, negative_prompt, ):
148
  return generate_image(
149
  face_file,
150
  pose_file,
151
+ style,
152
  prompt,
153
  negative_prompt,
154
  20, # num_steps
155
+ 0.9, # ControlNet strength
156
+ 0.8, # Adapter strength
 
157
  5.0, # guidance_scale
158
  42, # seed
159
+ 1280, # max side
160
  )
161
 
162
  def convert_from_cv2_to_image(img: np.ndarray) -> Image:
 
195
  out_img_pil = PIL.Image.fromarray(out_img.astype(np.uint8))
196
  return out_img_pil
197
 
198
+ def resize_img(input_image,max_side=1280,min_side=1024,size=None,pad_to_max_side=False,mode=PIL.Image.BILINEAR,base_pixel_number=64,):
 
 
 
 
 
 
 
 
199
  w, h = input_image.size
200
  if size is not None:
201
  w_resize_new, h_resize_new = size
 
218
  input_image = Image.fromarray(res)
219
  return input_image
220
 
221
+ def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
222
+ p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
223
+ return p.replace("{prompt}", positive), n + ' ' + negative
224
 
225
  @spaces.GPU
226
  def generate_image(
227
  face_image_path,
228
+ pose_image_path
229
+ style_name,
230
  prompt,
231
  negative_prompt,
232
  num_steps,
 
239
  ):
240
 
241
  if face_image_path is None:
242
+ raise gr.Error(f"Cannot find any input face image! Please upload the face image")
 
 
243
 
244
  if prompt is None:
245
  prompt = "a person"
246
 
247
  # apply the style template
248
+ prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
249
 
250
  face_image = load_image(face_image_path)
251
  face_image = resize_img(face_image, max_side=max_side)
 
257
  face_info = app.get(face_image_cv2)
258
 
259
  if len(face_info) == 0:
260
+ raise gr.Error(f"Unable to detect a face in the image. Please upload a different photo with a clear face.")
 
 
261
 
262
  face_info = sorted(
263
  face_info,
 
311
  1. If you're not satisfied with the similarity, try increasing the weight of "ControlNet strength" and "Adapter Strength."
312
  2. If you feel that the similarity is not high, you can increase the adapter strength appropriately.
313
  3. If you want to achieve a pose image as similar as possible, please increase the ControlNet strength appropriately.
 
314
  """
315
  css = """
316
  .gradio-container {width: 85% !important}
 
334
  label="Prompt",
335
  info="Give simple prompt is enough to achieve good face fidelity",
336
  placeholder="A photo of a person",
337
+ value="realistic, symmetrical hyperdetailed texture, masterpiece, enhanced details, perfect composition, authentic, natural posture",
338
  )
339
 
340
  submit = gr.Button("Submit", variant="primary")
 
 
 
 
341
 
342
+ style = gr.Dropdown(
343
+ label="Style",
344
+ choices=STYLE_NAMES,
345
+ value=DEFAULT_STYLE_NAME
346
+ )
347
+
348
  # strength
349
  controlnet_conditioning_scale = gr.Slider(
350
  label="ControlNet strength (for pose)",
351
  minimum=0.0,
352
  maximum=1.0,
353
  step=0.1,
354
+ value=0.9,
355
  )
356
  adapter_strength_ratio = gr.Slider(
357
  label="Adapter strength (for fidelity)",
 
416
  inputs=[
417
  face_file,
418
  pose_file,
419
+ style,
420
  prompt,
421
  negative_prompt,
422
  num_steps,
 
429
  outputs=[gallery, usage_tips],
430
  )
431
 
 
 
 
 
 
 
 
432
  gr.Examples(
433
  examples=get_example(),
434
+ inputs=[face_file, pose_file, style, prompt, negative_prompt],
435
  fn=run_for_examples,
436
  outputs=[gallery, usage_tips],
437
  cache_examples=True,