linoyts HF staff commited on
Commit
deebc0f
·
verified ·
1 Parent(s): a3386d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -109
app.py CHANGED
@@ -14,10 +14,13 @@ from huggingface_hub import hf_hub_download
14
  MAX_SEED = np.iinfo(np.int32).max
15
  MAX_IMAGE_SIZE = 1024
16
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
17
 
18
 
19
  pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev",
20
- custom_pipeline="pipeline_flux_rf_inversion",
21
  torch_dtype=torch.bfloat16)
22
 
23
  #pipe.enable_lora()
@@ -44,65 +47,85 @@ def resize_img(image, max_size=1024):
44
  new_height = int(height * scaling_factor)
45
  return image.resize((new_width, new_height), Image.LANCZOS)
46
 
47
- def check_style(stylezation, enable_hyper_flux):
48
- if stylezation == "text/image guided stylzation":
49
- return 0.9, 0.5, 0, 6, 28, 28, False
50
- else:
51
- if enable_hyper_flux:
52
- return 0.9, 0.5, 0, 4, 8, 8, False
53
- else:
54
- return 0.9, 0.5, 2, 7, 28, 28, False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def check_hyper_flux_lora(enable_hyper_flux):
57
  if enable_hyper_flux:
58
  pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"), lora_scale=0.125)
59
  pipe.fuse_lora(lora_scale=0.125)
60
- return 8, 8, 4
61
  else:
62
  pipe.unfuse_lora()
63
- return 28, 28, 6
64
 
65
  @spaces.GPU(duration=85)
66
- def invert_and_edit(image,
67
- prompt,
68
- eta,
69
- gamma,
70
- start_timestep,
71
- stop_timestep,
72
  num_inversion_steps,
73
  num_inference_steps,
74
  seed,
75
  randomize_seed,
76
- eta_decay,
77
- decay_power,
78
  width = 1024,
79
  height = 1024,
80
- inverted_latents = None,
81
- image_latents = None,
82
- latent_image_ids = None,
83
  do_inversion = True,
84
 
85
  ):
86
  if randomize_seed:
87
  seed = random.randint(0, MAX_SEED)
88
  if do_inversion:
89
- inverted_latents, image_latents, latent_image_ids = pipe.invert(image, num_inversion_steps=num_inversion_steps, gamma=gamma)
 
 
 
 
 
 
 
 
 
 
90
  do_inversion = False
91
 
92
 
93
- output = pipe(prompt,
94
- inverted_latents = inverted_latents.to(DEVICE),
95
- image_latents = image_latents.to(DEVICE),
96
- latent_image_ids = latent_image_ids.to(DEVICE),
97
- start_timestep = start_timestep/num_inference_steps,
98
- stop_timestep = stop_timestep/num_inference_steps,
99
- num_inference_steps = num_inference_steps,
100
- eta=eta,
101
- decay_eta = eta_decay,
102
- eta_decay_power = decay_power,
103
- ).images[0]
 
 
104
 
105
- return output, inverted_latents.cpu(), image_latents.cpu(), latent_image_ids.cpu(), do_inversion, seed
106
 
107
  # UI CSS
108
  css = """
@@ -116,19 +139,14 @@ css = """
116
  with gr.Blocks(css=css) as demo:
117
 
118
  inverted_latents = gr.State()
119
- image_latents = gr.State()
120
- latent_image_ids = gr.State()
121
  do_inversion = gr.State(True)
122
 
123
  with gr.Column(elem_id="col-container"):
124
- gr.Markdown(f"""# RF inversion 🖌️🏞️
125
  ### Edit real images with FLUX.1 [dev]
126
- following the algorithm proposed in [*Semantic Image Inversion and Editing using
127
- Stochastic Rectified Differential Equations* by Rout et al.](https://rf-inversion.github.io/data/rf-inversion.pdf)
128
-
129
- based on the implementations of [@raven38](https://github.com/raven38) & [@DarkMnDragon](https://github.com/DarkMnDragon) 🙌🏻
130
 
131
- [[non-commercial license](https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md)] [[project page](https://rf-inversion.github.io/) [[arxiv](https://arxiv.org/pdf/2410.10792)]
132
  """)
133
 
134
  with gr.Row():
@@ -137,39 +155,20 @@ based on the implementations of [@raven38](https://github.com/raven38) & [@DarkM
137
  label="Input Image",
138
  type="pil"
139
  )
140
- prompt = gr.Text(
 
 
 
 
 
141
  label="Edit Prompt",
142
  max_lines=1,
143
  placeholder="describe the edited output",
144
  )
145
  with gr.Row():
146
  enable_hyper_flux = gr.Checkbox(label="8-step LoRA", value=False, info="may reduce edit quality", visible=False)
147
- stylezation = gr.Radio(["local subject edits", "text/image guided stylzation"], label="edit type", info="")
148
- with gr.Row():
149
- start_timestep = gr.Slider(
150
- label="start timestep",
151
- info = "increase to enhance fidelity, decrease to enhance realism",
152
- minimum=0,
153
- maximum=28,
154
- step=1,
155
- value=0,
156
- )
157
- stop_timestep = gr.Slider(
158
- label="stop timestep",
159
- info = "increase to enhace fidelity to original image",
160
- minimum=0,
161
- maximum=28,
162
- step=1,
163
- value=6,
164
- )
165
- eta = gr.Slider(
166
- label="eta",
167
- info = "lower eta to ehnace the edits",
168
- minimum=0.0,
169
- maximum=1.0,
170
- step=0.01,
171
- value=0.9,
172
- )
173
 
174
  run_button = gr.Button("Edit", variant="primary")
175
 
@@ -193,32 +192,18 @@ based on the implementations of [@raven38](https://github.com/raven38) & [@DarkM
193
  minimum=1,
194
  maximum=50,
195
  step=1,
196
- value=28,
197
- )
198
- eta_decay = gr.Checkbox(label="eta decay", value=False)
199
- decay_power = gr.Slider(
200
- label="eta decay power",
201
- minimum=0,
202
- maximum=5,
203
- step=1,
204
- value=1,
205
  )
 
206
 
207
  with gr.Row():
208
- gamma = gr.Slider(
209
- label="gamma",
210
- info = "increase gamma to enhance realism",
211
- minimum=0.0,
212
- maximum=1.0,
213
- step=0.01,
214
- value=0.5,
215
- )
216
  num_inversion_steps = gr.Slider(
217
  label="num inversion steps",
218
  minimum=1,
219
  maximum=50,
220
  step=1,
221
- value=28,
222
  )
223
 
224
  with gr.Row():
@@ -244,34 +229,27 @@ based on the implementations of [@raven38](https://github.com/raven38) & [@DarkM
244
  fn=invert_and_edit,
245
  inputs=[
246
  input_image,
247
- prompt,
248
- eta,
249
- gamma,
250
- start_timestep,
251
- stop_timestep,
252
  num_inversion_steps,
253
  num_inference_steps,
254
  seed,
255
  randomize_seed,
256
- eta_decay,
257
- decay_power,
258
  width,
259
  height,
260
  inverted_latents,
261
- image_latents,
262
- latent_image_ids,
263
  do_inversion
264
 
265
  ],
266
- outputs=[result, inverted_latents, image_latents, latent_image_ids, do_inversion, seed],
267
  )
268
 
269
- gr.Examples(
270
- examples=get_examples(),
271
- inputs=[input_image,result, prompt,eta,gamma,start_timestep, stop_timestep, num_inversion_steps, num_inference_steps, seed, randomize_seed, eta_decay, decay_power, enable_hyper_flux,stylezation ],
272
- outputs=[result],
273
 
274
- )
275
 
276
  input_image.change(
277
  fn=reset_do_inversion,
@@ -288,16 +266,11 @@ based on the implementations of [@raven38](https://github.com/raven38) & [@DarkM
288
  outputs=[do_inversion]
289
  )
290
 
291
- stylezation.change(
292
- fn=check_style,
293
- inputs=[stylezation],
294
- outputs=[eta, gamma, start_timestep, stop_timestep, num_inversion_steps, num_inference_steps, eta_decay]
295
- )
296
 
297
  enable_hyper_flux.change(
298
  fn=check_hyper_flux_lora,
299
  inputs=[enable_hyper_flux],
300
- outputs=[num_inversion_steps, num_inference_steps, stop_timestep]
301
  )
302
 
303
 
 
14
  MAX_SEED = np.iinfo(np.int32).max
15
  MAX_IMAGE_SIZE = 1024
16
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
17
+ import numpy as np
18
+ MULTIMODAL_VITAL_LAYERS = [0, 1, 17, 18]
19
+ SINGLE_MODAL_VITAL_LAYERS = list(np.array([28, 53, 54, 56, 25]) - 19)
20
 
21
 
22
  pipe = DiffusionPipeline.from_pretrained("black-forest-labs/FLUX.1-dev",
23
+
24
  torch_dtype=torch.bfloat16)
25
 
26
  #pipe.enable_lora()
 
47
  new_height = int(height * scaling_factor)
48
  return image.resize((new_width, new_height), Image.LANCZOS)
49
 
50
+ @torch.no_grad()
51
+ def image2latent(image, latent_nudging_scalar = 1.15):
52
+ image = pipe.image_processor.preprocess(image, height=1024, width=1024,).type(pipe.vae.dtype).to("cuda")
53
+ latents = pipe.vae.encode(image)["latent_dist"].mean
54
+ latents = (latents - pipe.vae.config.shift_factor) * pipe.vae.config.scaling_factor
55
+ latents = latents * latent_nudging_scalar
56
+
57
+ height = pipe.default_sample_size * pipe.vae_scale_factor
58
+ width = pipe.default_sample_size * pipe.vae_scale_factor
59
+
60
+ num_channels_latents = pipe.transformer.config.in_channels // 4
61
+ height = 2 * (height // (pipe.vae_scale_factor * 2))
62
+ width = 2 * (width // (pipe.vae_scale_factor * 2))
63
+
64
+ latents = pipe._pack_latents(
65
+ latents=latents,
66
+ batch_size=1,
67
+ num_channels_latents=num_channels_latents,
68
+ height=height,
69
+ width=width
70
+ )
71
+
72
+ return latents
73
 
74
  def check_hyper_flux_lora(enable_hyper_flux):
75
  if enable_hyper_flux:
76
  pipe.load_lora_weights(hf_hub_download("ByteDance/Hyper-SD", "Hyper-FLUX.1-dev-8steps-lora.safetensors"), lora_scale=0.125)
77
  pipe.fuse_lora(lora_scale=0.125)
78
+ return 8, 8
79
  else:
80
  pipe.unfuse_lora()
81
+ return 28, 28
82
 
83
  @spaces.GPU(duration=85)
84
+ def invert_and_edit(image,
85
+ source_prompt
86
+ edit_prompt,
 
 
 
87
  num_inversion_steps,
88
  num_inference_steps,
89
  seed,
90
  randomize_seed,
 
 
91
  width = 1024,
92
  height = 1024,
93
+ inverted_latent_list = None,
 
 
94
  do_inversion = True,
95
 
96
  ):
97
  if randomize_seed:
98
  seed = random.randint(0, MAX_SEED)
99
  if do_inversion:
100
+ inverted_latent_list = pipe(
101
+ source_prompt,
102
+ height=1024,
103
+ width=1024,
104
+ guidance_scale=1,
105
+ output_type="pil",
106
+ num_inference_steps=50,
107
+ max_sequence_length=512,
108
+ latents=image2latent(image),
109
+ invert_image=True
110
+ )
111
  do_inversion = False
112
 
113
 
114
+ output = pipe(
115
+ [source_prompt, edit_prompt]
116
+ height=1024,
117
+ width=1024,
118
+ guidance_scale=[1] + [3] * (len(prompts) - 1),
119
+ output_type="pil",
120
+ num_inference_steps=50,
121
+ max_sequence_length=512,
122
+ latents=inverted_latent_list[-1].tile(len(prompts), 1, 1),
123
+ inverted_latent_list=inverted_latent_list,
124
+ mm_copy_blocks=MULTIMODAL_VITAL_LAYERS,
125
+ single_copy_blocks=SINGLE_MODAL_VITAL_LAYERS,
126
+ ).images
127
 
128
+ return output, inverted_latent_list.cpu(), do_inversion, seed
129
 
130
  # UI CSS
131
  css = """
 
139
  with gr.Blocks(css=css) as demo:
140
 
141
  inverted_latents = gr.State()
 
 
142
  do_inversion = gr.State(True)
143
 
144
  with gr.Column(elem_id="col-container"):
145
+ gr.Markdown(f"""# Stable Flow 🖌️🏞️
146
  ### Edit real images with FLUX.1 [dev]
147
+ following the algorithm proposed in [*Stable Flow: Vital Layers for Training-Free Image Editing* by Avrahami et al.](https://arxiv.org/pdf/2411.14430)
 
 
 
148
 
149
+ [[non-commercial license](https://huggingface.co/black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md)] [[project page](https://omriavrahami.com/stable-flow/) [[arxiv](https://arxiv.org/pdf/2411.14430)]
150
  """)
151
 
152
  with gr.Row():
 
155
  label="Input Image",
156
  type="pil"
157
  )
158
+ source_prompt = gr.Text(
159
+ label="Source Prompt",
160
+ max_lines=1,
161
+ placeholder="describe the edited output",
162
+ )
163
+ edit_prompt = gr.Text(
164
  label="Edit Prompt",
165
  max_lines=1,
166
  placeholder="describe the edited output",
167
  )
168
  with gr.Row():
169
  enable_hyper_flux = gr.Checkbox(label="8-step LoRA", value=False, info="may reduce edit quality", visible=False)
170
+
171
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
  run_button = gr.Button("Edit", variant="primary")
174
 
 
192
  minimum=1,
193
  maximum=50,
194
  step=1,
195
+ value=18,
 
 
 
 
 
 
 
 
196
  )
197
+
198
 
199
  with gr.Row():
200
+
 
 
 
 
 
 
 
201
  num_inversion_steps = gr.Slider(
202
  label="num inversion steps",
203
  minimum=1,
204
  maximum=50,
205
  step=1,
206
+ value=50,
207
  )
208
 
209
  with gr.Row():
 
229
  fn=invert_and_edit,
230
  inputs=[
231
  input_image,
232
+ source_prompt,
233
+ edit_prompt,
 
 
 
234
  num_inversion_steps,
235
  num_inference_steps,
236
  seed,
237
  randomize_seed,
 
 
238
  width,
239
  height,
240
  inverted_latents,
 
 
241
  do_inversion
242
 
243
  ],
244
+ outputs=[result, inverted_latents, do_inversion, seed],
245
  )
246
 
247
+ # gr.Examples(
248
+ # examples=get_examples(),
249
+ # inputs=[input_image,result, prompt, num_inversion_steps, num_inference_steps, seed, randomize_seed, enable_hyper_flux ],
250
+ # outputs=[result],
251
 
252
+ # )
253
 
254
  input_image.change(
255
  fn=reset_do_inversion,
 
266
  outputs=[do_inversion]
267
  )
268
 
 
 
 
 
 
269
 
270
  enable_hyper_flux.change(
271
  fn=check_hyper_flux_lora,
272
  inputs=[enable_hyper_flux],
273
+ outputs=[num_inversion_steps, num_inference_steps]
274
  )
275
 
276