Spaces:

RED-AIGC
/

InstantID-XS

Running on Zero

App Files Files Community

XuDongZhou commited on Dec 21, 2024

Commit

e9cfa60

verified ·

1 Parent(s): cdd07e9

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -55

app.py CHANGED Viewed

@@ -25,7 +25,8 @@ from insightface.app import FaceAnalysis
 from pipeline_controlnet_xs_sd_xl_instantid import StableDiffusionXLInstantIDXSPipeline, UNetControlNetXSModel
 from utils.controlnet_xs import ControlNetXSAdapter
-# from controlnet_aux import OpenposeDetector
 import gradio as gr
@@ -38,7 +39,8 @@ hf_hub_download(repo_id="RED-AIGC/InstantID-XS", filename="image_proj.bin", loca
 MAX_SEED = np.iinfo(np.int32).max
 device = "cuda" if torch.cuda.is_available() else "cpu"
 weight_dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
 base_model = 'frankjoshua/realvisxlV40_v40Bakedvae'
 vae_path = 'madebyollin/sdxl-vae-fp16-fix'
@@ -101,20 +103,6 @@ pipe.unet.config.ctrl_learn_time_embedding = True
 pipe = pipe.to(device)
-def toggle_lcm_ui(value):
-    if value:
-        return (
-            gr.update(minimum=0, maximum=100, step=1, value=5),
-            gr.update(minimum=0.1, maximum=20.0, step=0.1, value=1.5),
-        )
-    else:
-        return (
-            gr.update(minimum=5, maximum=100, step=1, value=30),
-            gr.update(minimum=0.1, maximum=20.0, step=0.1, value=5),
-        )
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
@@ -128,30 +116,47 @@ def get_example():
         [
             "./examples/1.jpg",
             None,
-            "a woman, daily wear, realistic, symmetrical hyperdetailed texture, masterpiece, enhanced details, perfect composition, authentic, natural posture",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
         ],
         [
             "./examples/1.jpg",
-            "./examples/pose/pose1.png",
-            "a woman, daily wear, realistic, symmetrical hyperdetailed texture, masterpiece, enhanced details, perfect composition, authentic, natural posture",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
         ],
     ]
     return case
-def run_for_examples(face_file, pose_file, prompt, negative_prompt):
     return generate_image(
         face_file,
         pose_file,
         prompt,
         negative_prompt,
         20,  # num_steps
-        0.8,  # identitynet_strength_ratio
-        0.8,  # adapter_strength_ratio
-        0.8,  # pose_strength
         5.0,  # guidance_scale
         42,  # seed
     )
 def convert_from_cv2_to_image(img: np.ndarray) -> Image:
@@ -190,15 +195,7 @@ def draw_kps(image_pil, kps, color_list=[(255,0,0), (0,255,0), (0,0,255), (255,2
     out_img_pil = PIL.Image.fromarray(out_img.astype(np.uint8))
     return out_img_pil
-def resize_img(
-    input_image,
-    max_side=1280,
-    min_side=1024,
-    size=None,
-    pad_to_max_side=False,
-    mode=PIL.Image.BILINEAR,
-    base_pixel_number=64,
-):
     w, h = input_image.size
     if size is not None:
         w_resize_new, h_resize_new = size
@@ -221,11 +218,15 @@ def resize_img(
         input_image = Image.fromarray(res)
     return input_image
 @spaces.GPU
 def generate_image(
     face_image_path,
-    pose_image_path,
     prompt,
     negative_prompt,
     num_steps,
@@ -238,15 +239,13 @@ def generate_image(
 ):
     if face_image_path is None:
-        raise gr.Error(
-            f"Cannot find any input face image! Please upload the face image"
-        )
     if prompt is None:
         prompt = "a person"
     # apply the style template
-    # prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
     face_image = load_image(face_image_path)
     face_image = resize_img(face_image, max_side=max_side)
@@ -258,9 +257,7 @@ def generate_image(
     face_info = app.get(face_image_cv2)
     if len(face_info) == 0:
-        raise gr.Error(
-            f"Unable to detect a face in the image. Please upload a different photo with a clear face."
-        )
     face_info = sorted(
         face_info,
@@ -314,7 +311,6 @@ tips = r"""
 1. If you're not satisfied with the similarity, try increasing the weight of "ControlNet strength" and "Adapter Strength."
 2. If you feel that the similarity is not high, you can increase the adapter strength appropriately.
 3. If you want to achieve a pose image as similar as possible, please increase the ControlNet strength appropriately.
-4. If you find that realistic style is not good enough, go for our Github repo and use a more realistic base model.
 """
 css = """
 .gradio-container {width: 85% !important}
@@ -338,22 +334,24 @@ with gr.Blocks(css=css) as demo:
                 label="Prompt",
                 info="Give simple prompt is enough to achieve good face fidelity",
                 placeholder="A photo of a person",
-                value="a woman, daily wear, realistic, symmetrical hyperdetailed texture, masterpiece, enhanced details, perfect composition, authentic, natural posture",
             )
             submit = gr.Button("Submit", variant="primary")
-            # enable_LCM = gr.Checkbox(
-            #     label="Enable Fast Inference with LCM", value=enable_lcm_arg,
-            #     info="LCM speeds up the inference step, the trade-off is the quality of the generated image. It performs better with portrait face images rather than distant faces",
-            # )
             # strength
             controlnet_conditioning_scale = gr.Slider(
                 label="ControlNet strength (for pose)",
                 minimum=0.0,
                 maximum=1.0,
                 step=0.1,
-                value=0.8,
             )
             adapter_strength_ratio = gr.Slider(
                 label="Adapter strength (for fidelity)",
@@ -418,6 +416,7 @@ with gr.Blocks(css=css) as demo:
             inputs=[
                 face_file,
                 pose_file,
                 prompt,
                 negative_prompt,
                 num_steps,
@@ -430,16 +429,9 @@ with gr.Blocks(css=css) as demo:
             outputs=[gallery, usage_tips],
         )
-        # enable_LCM.input(
-        #     fn=toggle_lcm_ui,
-        #     inputs=[enable_LCM],
-        #     outputs=[num_steps, guidance_scale],
-        #     queue=False,
-        # )
     gr.Examples(
         examples=get_example(),
-        inputs=[face_file, pose_file, prompt, negative_prompt],
         fn=run_for_examples,
         outputs=[gallery, usage_tips],
         cache_examples=True,

 from pipeline_controlnet_xs_sd_xl_instantid import StableDiffusionXLInstantIDXSPipeline, UNetControlNetXSModel
 from utils.controlnet_xs import ControlNetXSAdapter
+from style import styles
 import gradio as gr
 MAX_SEED = np.iinfo(np.int32).max
 device = "cuda" if torch.cuda.is_available() else "cpu"
 weight_dtype = torch.float16 if str(device).__contains__("cuda") else torch.float32
+STYLE_NAMES = list(styles.keys())
+DEFAULT_STYLE_NAME = "Ordinary"
 base_model = 'frankjoshua/realvisxlV40_v40Bakedvae'
 vae_path = 'madebyollin/sdxl-vae-fp16-fix'
 pipe = pipe.to(device)
 def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
         [
             "./examples/1.jpg",
             None,
+            "Ordinary",
+            "a woman",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
         ],
         [
             "./examples/1.jpg",
+            "./examples/pose/pose1.jpg",
+            "Hanfu",
+            "a woman",
+            "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
+        ],
+        [
+            "./examples/2.jpg",
+            "./examples/pose/pose2.png",
+            "ZangZu",
+            "a woman",
+            "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
+        ],
+        [
+            "./examples/3.png",
+            "./examples/pose/pose3.png",
+            "QingQiu",
+            "a woman",
             "(lowres, low quality, worst quality:1.2), (text:1.2), watermark, (frame:1.2), deformed, ugly, deformed eyes, blur, out of focus, blurry, deformed cat, deformed, photo, anthropomorphic cat, monochrome, photo, pet collar, gun, weapon, blue, 3d, drones, drone, buildings in background, green",
         ],
     ]
     return case
+def run_for_examples(face_file, pose_file, style, prompt, negative_prompt, ):
     return generate_image(
         face_file,
         pose_file,
+        style,
         prompt,
         negative_prompt,
         20,  # num_steps
+        0.9,  # ControlNet strength
+        0.8,  # Adapter strength
         5.0,  # guidance_scale
         42,  # seed
+        1280, # max side
     )
 def convert_from_cv2_to_image(img: np.ndarray) -> Image:
     out_img_pil = PIL.Image.fromarray(out_img.astype(np.uint8))
     return out_img_pil
+def resize_img(input_image,max_side=1280,min_side=1024,size=None,pad_to_max_side=False,mode=PIL.Image.BILINEAR,base_pixel_number=64,):
     w, h = input_image.size
     if size is not None:
         w_resize_new, h_resize_new = size
         input_image = Image.fromarray(res)
     return input_image
+def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str, str]:
+    p, n = styles.get(style_name, styles[DEFAULT_STYLE_NAME])
+    return p.replace("{prompt}", positive), n + ' ' + negative
 @spaces.GPU
 def generate_image(
     face_image_path,
+    pose_image_path
+    style_name,
     prompt,
     negative_prompt,
     num_steps,
 ):
     if face_image_path is None:
+        raise gr.Error(f"Cannot find any input face image! Please upload the face image")
     if prompt is None:
         prompt = "a person"
     # apply the style template
+    prompt, negative_prompt = apply_style(style_name, prompt, negative_prompt)
     face_image = load_image(face_image_path)
     face_image = resize_img(face_image, max_side=max_side)
     face_info = app.get(face_image_cv2)
     if len(face_info) == 0:
+        raise gr.Error(f"Unable to detect a face in the image. Please upload a different photo with a clear face.")
     face_info = sorted(
         face_info,
 1. If you're not satisfied with the similarity, try increasing the weight of "ControlNet strength" and "Adapter Strength."
 2. If you feel that the similarity is not high, you can increase the adapter strength appropriately.
 3. If you want to achieve a pose image as similar as possible, please increase the ControlNet strength appropriately.
 """
 css = """
 .gradio-container {width: 85% !important}
                 label="Prompt",
                 info="Give simple prompt is enough to achieve good face fidelity",
                 placeholder="A photo of a person",
+                value="realistic, symmetrical hyperdetailed texture, masterpiece, enhanced details, perfect composition, authentic, natural posture",
             )
             submit = gr.Button("Submit", variant="primary")
+            style = gr.Dropdown(
+                label="Style",
+                choices=STYLE_NAMES,
+                value=DEFAULT_STYLE_NAME
+            )
             # strength
             controlnet_conditioning_scale = gr.Slider(
                 label="ControlNet strength (for pose)",
                 minimum=0.0,
                 maximum=1.0,
                 step=0.1,
+                value=0.9,
             )
             adapter_strength_ratio = gr.Slider(
                 label="Adapter strength (for fidelity)",
             inputs=[
                 face_file,
                 pose_file,
+                style,
                 prompt,
                 negative_prompt,
                 num_steps,
             outputs=[gallery, usage_tips],
         )
     gr.Examples(
         examples=get_example(),
+        inputs=[face_file, pose_file, style, prompt, negative_prompt],
         fn=run_for_examples,
         outputs=[gallery, usage_tips],
         cache_examples=True,