Stable-Flow

Running on Zero

App Files Files Community

linoyts HF staff commited on 20 days ago

Commit

deebc0f

verified ·

1 Parent(s): a3386d2

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -109

app.py CHANGED Viewed

@@ -14,10 +14,13 @@ from huggingface_hub import hf_hub_download
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev",
-                                         custom_pipeline="pipeline_flux_rf_inversion",
                                           torch_dtype=torch.bfloat16)
 #pipe.enable_lora()
@@ -44,65 +47,85 @@ def resize_img(image, max_size=1024):
     new_height = int(height * scaling_factor)
     return image.resize((new_width, new_height), Image.LANCZOS)
-def check_style(stylezation, enable_hyper_flux):
-    if stylezation == "text/image guided stylzation":
-        return 0.9, 0.5, 0, 6, 28, 28, False
-    else:
-        if enable_hyper_flux:
-            return 0.9, 0.5, 0, 4, 8, 8, False
-        else:
-            return 0.9, 0.5, 2, 7, 28, 28, False
 def check_hyper_flux_lora(enable_hyper_flux):
     if enable_hyper_flux:
         pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"), lora_scale=0.125)
         pipe.fuse_lora(lora_scale=0.125)
-        return 8, 8, 4
     else:
         pipe.unfuse_lora()
-        return 28, 28, 6
 @spaces.GPU(duration=85)
-def invert_and_edit(image,
-                    prompt,
-                    eta,
-                    gamma,
-                    start_timestep,
-                    stop_timestep,
                     num_inversion_steps,
                     num_inference_steps,
                     seed,
                     randomize_seed,
-                    eta_decay,
-                    decay_power,
                     width = 1024,
                     height = 1024,
-                    inverted_latents = None,
-                    image_latents = None,
-                    latent_image_ids = None,
                     do_inversion = True,
                    ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     if do_inversion:
-        inverted_latents, image_latents, latent_image_ids = pipe.invert(image, num_inversion_steps=num_inversion_steps, gamma=gamma)
         do_inversion = False
-    output = pipe(prompt,
-    inverted_latents = inverted_latents.to(DEVICE),
-    image_latents = image_latents.to(DEVICE),
-    latent_image_ids = latent_image_ids.to(DEVICE),
-    start_timestep = start_timestep/num_inference_steps,
-    stop_timestep = stop_timestep/num_inference_steps,
-    num_inference_steps = num_inference_steps,
-    eta=eta,
-    decay_eta = eta_decay,
-    eta_decay_power = decay_power,
-    ).images[0]
-    return output, inverted_latents.cpu(), image_latents.cpu(), latent_image_ids.cpu(), do_inversion, seed
 # UI CSS
 css = """
@@ -116,19 +139,14 @@ css = """
 with gr.Blocks(css=css) as demo:
     inverted_latents = gr.State()
-    image_latents = gr.State()
-    latent_image_ids = gr.State()
     do_inversion = gr.State(True)
     with gr.Column(elem_id="col-container"):
-        gr.Markdown(f"""# RF inversion 🖌️🏞️
 ### Edit real images with FLUX.1 [dev]
-following the algorithm proposed in [*Semantic Image Inversion and Editing using
-Stochastic Rectified Differential Equations* by Rout et al.](https://rf-inversion.github.io/data/rf-inversion.pdf)
-based on the implementations of [@raven38](https://github.com/raven38) & [@DarkMnDragon](https://github.com/DarkMnDragon) 🙌🏻
-[[non-commercial license](https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md)] [[project page](https://rf-inversion.github.io/) [[arxiv](https://arxiv.org/pdf/2410.10792)]
         """)
         with gr.Row():
@@ -137,39 +155,20 @@ based on the implementations of [@raven38](https://github.com/raven38) & [@DarkM
                     label="Input Image",
                     type="pil"
                 )
-                prompt = gr.Text(
                     label="Edit Prompt",
                     max_lines=1,
                     placeholder="describe the edited output",
                 )
                 with gr.Row():
                     enable_hyper_flux = gr.Checkbox(label="8-step LoRA", value=False, info="may reduce edit quality", visible=False)
-                    stylezation = gr.Radio(["local subject edits", "text/image guided stylzation"], label="edit type", info="")
-                with gr.Row():
-                    start_timestep = gr.Slider(
-                    label="start timestep",
-                    info = "increase to enhance fidelity, decrease to enhance realism",
-                    minimum=0,
-                    maximum=28,
-                    step=1,
-                    value=0,
-                )
-                    stop_timestep = gr.Slider(
-                        label="stop timestep",
-                        info = "increase to enhace fidelity to original image",
-                        minimum=0,
-                        maximum=28,
-                        step=1,
-                        value=6,
-                    )
-                    eta = gr.Slider(
-                        label="eta",
-                        info = "lower eta to ehnace the edits",
-                        minimum=0.0,
-                        maximum=1.0,
-                        step=0.01,
-                        value=0.9,
-                    )
                 run_button = gr.Button("Edit", variant="primary")
@@ -193,32 +192,18 @@ based on the implementations of [@raven38](https://github.com/raven38) & [@DarkM
                             minimum=1,
                             maximum=50,
                             step=1,
-                            value=28,
-                        )
-                eta_decay = gr.Checkbox(label="eta decay", value=False)
-                decay_power = gr.Slider(
-                            label="eta decay power",
-                            minimum=0,
-                            maximum=5,
-                            step=1,
-                            value=1,
                         )
             with gr.Row():
-                gamma = gr.Slider(
-                    label="gamma",
-                    info = "increase gamma to enhance realism",
-                    minimum=0.0,
-                    maximum=1.0,
-                    step=0.01,
-                    value=0.5,
-                )
                 num_inversion_steps = gr.Slider(
                         label="num inversion steps",
                         minimum=1,
                         maximum=50,
                         step=1,
-                        value=28,
                     )
             with gr.Row():
@@ -244,34 +229,27 @@ based on the implementations of [@raven38](https://github.com/raven38) & [@DarkM
         fn=invert_and_edit,
         inputs=[
             input_image,
-            prompt,
-            eta,
-            gamma,
-            start_timestep,
-            stop_timestep,
             num_inversion_steps,
             num_inference_steps,
             seed,
             randomize_seed,
-            eta_decay,
-            decay_power,
             width,
             height,
             inverted_latents,
-            image_latents,
-            latent_image_ids,
             do_inversion
         ],
-        outputs=[result, inverted_latents, image_latents, latent_image_ids, do_inversion, seed],
     )
-    gr.Examples(
-                examples=get_examples(),
-                inputs=[input_image,result, prompt,eta,gamma,start_timestep, stop_timestep, num_inversion_steps, num_inference_steps,  seed, randomize_seed, eta_decay, decay_power, enable_hyper_flux,stylezation ],
-                outputs=[result],
-            )
     input_image.change(
         fn=reset_do_inversion,
@@ -288,16 +266,11 @@ based on the implementations of [@raven38](https://github.com/raven38) & [@DarkM
         outputs=[do_inversion]
     )
-    stylezation.change(
-        fn=check_style,
-        inputs=[stylezation],
-        outputs=[eta, gamma, start_timestep, stop_timestep, num_inversion_steps, num_inference_steps, eta_decay]
-    )
     enable_hyper_flux.change(
         fn=check_hyper_flux_lora,
         inputs=[enable_hyper_flux],
-        outputs=[num_inversion_steps, num_inference_steps, stop_timestep]
     )

 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 1024
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+import numpy as np
+MULTIMODAL_VITAL_LAYERS = [0, 1, 17, 18]
+SINGLE_MODAL_VITAL_LAYERS = list(np.array([28, 53, 54, 56, 25]) - 19)
 pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev",
                                           torch_dtype=torch.bfloat16)
 #pipe.enable_lora()
     new_height = int(height * scaling_factor)
     return image.resize((new_width, new_height), Image.LANCZOS)
+@torch.no_grad()
+def image2latent(image, latent_nudging_scalar = 1.15):
+    image = pipe.image_processor.preprocess(image, height=1024, width=1024,).type(pipe.vae.dtype).to("cuda")
+    latents = pipe.vae.encode(image)["latent_dist"].mean
+    latents = (latents - pipe.vae.config.shift_factor) * pipe.vae.config.scaling_factor
+    latents = latents * latent_nudging_scalar
+    height = pipe.default_sample_size * pipe.vae_scale_factor
+    width = pipe.default_sample_size * pipe.vae_scale_factor
+    num_channels_latents = pipe.transformer.config.in_channels // 4
+    height = 2 * (height // (pipe.vae_scale_factor * 2))
+    width = 2 * (width // (pipe.vae_scale_factor * 2))
+    latents = pipe._pack_latents(
+        latents=latents,
+        batch_size=1,
+        num_channels_latents=num_channels_latents,
+        height=height,
+        width=width
+    )
+    return latents
 def check_hyper_flux_lora(enable_hyper_flux):
     if enable_hyper_flux:
         pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"), lora_scale=0.125)
         pipe.fuse_lora(lora_scale=0.125)
+        return 8, 8
     else:
         pipe.unfuse_lora()
+        return 28, 28
 @spaces.GPU(duration=85)
+def invert_and_edit(image,
+                    source_prompt
+                    edit_prompt,
                     num_inversion_steps,
                     num_inference_steps,
                     seed,
                     randomize_seed,
                     width = 1024,
                     height = 1024,
+                    inverted_latent_list = None,
                     do_inversion = True,
                    ):
     if randomize_seed:
         seed = random.randint(0, MAX_SEED)
     if do_inversion:
+        inverted_latent_list = pipe(
+            source_prompt,
+            height=1024,
+            width=1024,
+            guidance_scale=1,
+            output_type="pil",
+            num_inference_steps=50,
+            max_sequence_length=512,
+            latents=image2latent(image),
+            invert_image=True
+        )
         do_inversion = False
+    output = pipe(
+            [source_prompt, edit_prompt]
+            height=1024,
+            width=1024,
+            guidance_scale=[1] + [3] * (len(prompts) - 1),
+            output_type="pil",
+            num_inference_steps=50,
+            max_sequence_length=512,
+            latents=inverted_latent_list[-1].tile(len(prompts), 1, 1),
+            inverted_latent_list=inverted_latent_list,
+            mm_copy_blocks=MULTIMODAL_VITAL_LAYERS,
+            single_copy_blocks=SINGLE_MODAL_VITAL_LAYERS,
+        ).images
+    return output, inverted_latent_list.cpu(), do_inversion, seed
 # UI CSS
 css = """
 with gr.Blocks(css=css) as demo:
     inverted_latents = gr.State()
     do_inversion = gr.State(True)
     with gr.Column(elem_id="col-container"):
+        gr.Markdown(f"""# Stable Flow 🖌️🏞️
 ### Edit real images with FLUX.1 [dev]
+following the algorithm proposed in [*Stable Flow: Vital Layers for Training-Free Image Editing* by Avrahami et al.](https://arxiv.org/pdf/2411.14430)
+[[non-commercial license](https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md)] [[project page](https://omriavrahami.com/stable-flow/) [[arxiv](https://arxiv.org/pdf/2411.14430)]
         """)
         with gr.Row():
                     label="Input Image",
                     type="pil"
                 )
+                source_prompt = gr.Text(
+                    label="Source Prompt",
+                    max_lines=1,
+                    placeholder="describe the edited output",
+                )
+                edit_prompt = gr.Text(
                     label="Edit Prompt",
                     max_lines=1,
                     placeholder="describe the edited output",
                 )
                 with gr.Row():
                     enable_hyper_flux = gr.Checkbox(label="8-step LoRA", value=False, info="may reduce edit quality", visible=False)
                 run_button = gr.Button("Edit", variant="primary")
                             minimum=1,
                             maximum=50,
                             step=1,
+                            value=18,
                         )
             with gr.Row():
                 num_inversion_steps = gr.Slider(
                         label="num inversion steps",
                         minimum=1,
                         maximum=50,
                         step=1,
+                        value=50,
                     )
             with gr.Row():
         fn=invert_and_edit,
         inputs=[
             input_image,
+            source_prompt,
+            edit_prompt,
             num_inversion_steps,
             num_inference_steps,
             seed,
             randomize_seed,
             width,
             height,
             inverted_latents,
             do_inversion
         ],
+        outputs=[result, inverted_latents, do_inversion, seed],
     )
+    # gr.Examples(
+    #             examples=get_examples(),
+    #             inputs=[input_image,result, prompt, num_inversion_steps, num_inference_steps, seed, randomize_seed, enable_hyper_flux ],
+    #             outputs=[result],
+    #         )
     input_image.change(
         fn=reset_do_inversion,
         outputs=[do_inversion]
     )
     enable_hyper_flux.change(
         fn=check_hyper_flux_lora,
         inputs=[enable_hyper_flux],
+        outputs=[num_inversion_steps, num_inference_steps]
     )