Barak1 commited on
Commit
b6d3fe5
·
1 Parent(s): 6d0cebc

runs without errors

Browse files
Files changed (6) hide show
  1. .gitignore +1 -0
  2. app.py +81 -79
  3. elephent.jpg +0 -0
  4. src/config.py +2 -1
  5. src/editor.py +6 -6
  6. src/sdxl_inversion_pipeline.py +4 -1
.gitignore CHANGED
@@ -6,3 +6,4 @@
6
  *.iml
7
  out
8
  gen
 
 
6
  *.iml
7
  out
8
  gen
9
+ *.pyc
app.py CHANGED
@@ -1,7 +1,5 @@
1
  import gradio as gr
2
- import numpy as np
3
- import random
4
- from diffusers import DiffusionPipeline
5
  import torch
6
  from src.euler_scheduler import MyEulerAncestralDiscreteScheduler
7
  from diffusers.pipelines.auto_pipeline import AutoPipelineForImage2Image
@@ -11,15 +9,13 @@ from src.editor import ImageEditorDemo
11
 
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
 
14
-
15
  scheduler_class = MyEulerAncestralDiscreteScheduler
16
 
17
-
18
  pipe_inversion = SDXLDDIMPipeline.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True).to(device)
19
  pipe_inference = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True).to(device)
20
- pipe_inference.scheduler = scheduler_class.from_config(pipe_inference.scheduler.config)
21
- pipe_inversion.scheduler = scheduler_class.from_config(pipe_inversion.scheduler.config)
22
- pipe_inversion.scheduler_inference = scheduler_class.from_config(pipe_inference.scheduler.config)
23
 
24
 
25
  # if torch.cuda.is_available():
@@ -32,104 +28,110 @@ pipe_inversion.scheduler_inference = scheduler_class.from_config(pipe_inference
32
  # pipe = pipe.to(device)
33
 
34
 
35
- def infer(input_image, description_prompt, target_prompt, guidance_scale, num_inference_steps=4, num_inversion_steps=4, inversion_max_step=0.6):
 
 
36
  config = RunConfig(num_inference_steps=num_inference_steps,
37
  num_inversion_steps=num_inversion_steps,
38
- guidance_scale=guidance_scale,
39
  inversion_max_step=inversion_max_step)
40
-
41
- editor = ImageEditorDemo(pipe_inversion, pipe_inference, input_image, description_prompt, config)
42
 
43
  image = editor.edit(target_prompt)
44
  return image
45
 
 
46
  examples = [
47
  "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
48
  "An astronaut riding a green horse",
49
  "A delicious ceviche cheesecake slice",
50
  ]
51
 
52
- css="""
53
- #col-container {
54
- margin: 0 auto;
55
- max-width: 520px;
56
- }
57
- """
 
 
 
 
58
 
59
  if torch.cuda.is_available():
60
  power_device = "GPU"
61
  else:
62
  power_device = "CPU"
63
 
64
- with gr.Blocks(css=css) as demo:
65
-
66
  gr.Markdown(f"""
67
- # RNRI briel and links on device: {power_device}.
 
 
 
68
  """)
69
- with gr.Column(elem_id="col-container"):
70
-
71
- with gr.Row():
72
- input_image = gr.Image(label="Input image", sources=['upload', 'webcam', 'clipboard'], type="pil")
73
-
74
- with gr.Row():
75
-
76
- description_prompt = gr.Text(
77
- label="Image description",
78
- show_label=False,
79
- max_lines=1,
80
- placeholder="Enter your image description",
81
- container=False,
82
- )
83
-
84
-
85
- with gr.Row():
86
-
87
- target_prompt = gr.Text(
88
- label="Edit prompt",
89
- show_label=False,
90
- max_lines=1,
91
- placeholder="Enter your edit prompt",
92
- container=False,
93
- )
94
-
95
-
96
- with gr.Accordion("Advanced Settings", open=False):
97
-
98
  with gr.Row():
99
-
100
- guidance_scale = gr.Slider(
101
- label="Guidance scale",
102
- minimum=0.0,
103
- maximum=10.0,
104
- step=0.1,
105
- value=1.2,
106
  )
107
-
108
- num_inference_steps = gr.Slider(
109
- label="Number of RNRI iterations",
110
- minimum=1,
111
- maximum=12,
112
- step=1,
113
- value=4,
 
114
  )
115
 
116
-
117
- with gr.Row():
118
- run_button = gr.Button("Edit", scale=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- with gr.Column(elem_id="col-container"):
121
-
122
- result = gr.Image(label="Result", show_label=False)
123
-
124
- # gr.Examples(
125
- # examples = examples,
126
- # inputs = [prompt]
127
- # )
 
 
128
 
129
  run_button.click(
130
- fn = infer,
131
- inputs = [input_image, description_prompt, target_prompt, guidance_scale, num_inference_steps, num_inference_steps],
132
- outputs = [result]
 
133
  )
134
 
135
- demo.queue().launch()
 
 
 
 
1
  import gradio as gr
2
+
 
 
3
  import torch
4
  from src.euler_scheduler import MyEulerAncestralDiscreteScheduler
5
  from diffusers.pipelines.auto_pipeline import AutoPipelineForImage2Image
 
9
 
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
 
 
12
  scheduler_class = MyEulerAncestralDiscreteScheduler
13
 
 
14
  pipe_inversion = SDXLDDIMPipeline.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True).to(device)
15
  pipe_inference = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", use_safetensors=True).to(device)
16
+ pipe_inference.scheduler = scheduler_class.from_config(pipe_inference.scheduler.config)
17
+ pipe_inversion.scheduler = scheduler_class.from_config(pipe_inversion.scheduler.config)
18
+ pipe_inversion.scheduler_inference = scheduler_class.from_config(pipe_inference.scheduler.config)
19
 
20
 
21
  # if torch.cuda.is_available():
 
28
  # pipe = pipe.to(device)
29
 
30
 
31
+ def infer(input_image, description_prompt, target_prompt, edit_guidance_scale, num_inference_steps=4,
32
+ num_inversion_steps=4,
33
+ inversion_max_step=0.6):
34
  config = RunConfig(num_inference_steps=num_inference_steps,
35
  num_inversion_steps=num_inversion_steps,
36
+ edit_guidance_scale=edit_guidance_scale,
37
  inversion_max_step=inversion_max_step)
38
+
39
+ editor = ImageEditorDemo(pipe_inversion, pipe_inference, input_image, description_prompt, config, device)
40
 
41
  image = editor.edit(target_prompt)
42
  return image
43
 
44
+
45
  examples = [
46
  "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
47
  "An astronaut riding a green horse",
48
  "A delicious ceviche cheesecake slice",
49
  ]
50
 
51
+ # css = """
52
+ # #col-container-1 {
53
+ # margin: 0 auto;
54
+ # max-width: 520px;
55
+ # }
56
+ # #col-container-2 {
57
+ # margin: 0 auto;
58
+ # max-width: 520px;
59
+ # }
60
+ # """
61
 
62
  if torch.cuda.is_available():
63
  power_device = "GPU"
64
  else:
65
  power_device = "CPU"
66
 
67
+ # with gr.Blocks(css=css) as demo:
68
+ with gr.Blocks() as demo:
69
  gr.Markdown(f"""
70
+ This is a demo for our [paper]("https://arxiv.org/abs/2312.12540") **RNRI: Regularized Newton Raphson Inversion for Text-to-Image Diffusion Models**.
71
+ Image editing using our RNRI for inversion demonstrates significant speed-up and improved quality compared to previous state-of-the-art methods.
72
+ RNRI can be applied to a variety of diffusion models, including SDXL, DDIM, and others.
73
+ Take a look at our [project page]("https://barakmam.github.io/rnri.github.io/").
74
  """)
75
+ with gr.Row():
76
+ with gr.Column(elem_id="col-container-1"):
77
+ with gr.Row():
78
+ input_image = gr.Image(label="Input image", sources=['upload', 'webcam', 'clipboard'], type="pil")
79
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  with gr.Row():
81
+ description_prompt = gr.Text(
82
+ label="Image description",
83
+ show_label=False,
84
+ max_lines=1,
85
+ placeholder="Enter your image description",
86
+ container=False,
 
87
  )
88
+
89
+ with gr.Row():
90
+ target_prompt = gr.Text(
91
+ label="Edit prompt",
92
+ show_label=False,
93
+ max_lines=1,
94
+ placeholder="Enter your edit prompt",
95
+ container=False,
96
  )
97
 
98
+ with gr.Accordion("Advanced Settings", open=False):
99
+ with gr.Row():
100
+ edit_guidance_scale = gr.Slider(
101
+ label="Guidance scale",
102
+ minimum=0.0,
103
+ maximum=10.0,
104
+ step=0.1,
105
+ value=1.2,
106
+ )
107
+
108
+ num_inference_steps = gr.Slider(
109
+ label="Number of RNRI iterations",
110
+ minimum=1,
111
+ maximum=12,
112
+ step=1,
113
+ value=4,
114
+ )
115
 
116
+ with gr.Row():
117
+ run_button = gr.Button("Edit", scale=1)
118
+
119
+ with gr.Column(elem_id="col-container-2"):
120
+ result = gr.Image(label="Result")
121
+
122
+ # gr.Examples(
123
+ # examples = examples,
124
+ # inputs = [prompt]
125
+ # )
126
 
127
  run_button.click(
128
+ fn=infer,
129
+ inputs=[input_image, description_prompt, target_prompt, edit_guidance_scale, num_inference_steps,
130
+ num_inference_steps],
131
+ outputs=[result]
132
  )
133
 
134
+ demo.queue().launch()
135
+
136
+ # im = infer(input_image, description_prompt, target_prompt, edit_guidance_scale, num_inference_steps=4, num_inversion_steps=4,
137
+ # inversion_max_step=0.6)
elephent.jpg ADDED
src/config.py CHANGED
@@ -9,7 +9,8 @@ class RunConfig:
9
 
10
  num_inversion_steps: int = 100
11
 
12
- guidance_scale: float = 0.0
 
13
 
14
  inversion_max_step: float = 1.0
15
 
 
9
 
10
  num_inversion_steps: int = 100
11
 
12
+ inversion_guidance_scale: float = 0.0
13
+ edit_guidance_scale: float = 1.2
14
 
15
  inversion_max_step: float = 1.0
16
 
src/editor.py CHANGED
@@ -35,7 +35,7 @@ def load_im_into_format_from_path(im_path):
35
 
36
 
37
  class ImageEditorDemo:
38
- def __init__(self, pipe_inversion, pipe_inference, input_image, description_prompt, cfg):
39
  self.pipe_inversion = pipe_inversion
40
  self.pipe_inference = pipe_inference
41
  self.original_image = load_im_into_format_from_path(input_image).convert("RGB")
@@ -44,7 +44,7 @@ class ImageEditorDemo:
44
  img_size = (512,512)
45
  VQAE_SCALE = 8
46
  latents_size = (1, 4, img_size[0] // VQAE_SCALE, img_size[1] // VQAE_SCALE)
47
- noise = [randn_tensor(latents_size, dtype=torch.float16, device=torch.device("cuda:0"), generator=g_cpu) for i
48
  in range(cfg.num_inversion_steps)]
49
  pipe_inversion.scheduler.set_noise_list(noise)
50
  pipe_inference.scheduler.set_noise_list(noise)
@@ -55,10 +55,10 @@ class ImageEditorDemo:
55
  self.pipe_inversion.cfg = cfg
56
  self.pipe_inference.cfg = cfg
57
  self.inv_hp = [2, 0.1, 0.2]
58
- self.edit_cfg = 1.2
59
 
60
- self.pipe_inference.to("cuda")
61
- self.pipe_inversion.to("cuda")
62
 
63
  self.last_latent = self.invert(self.original_image, description_prompt)
64
  self.original_latent = self.last_latent
@@ -68,7 +68,7 @@ class ImageEditorDemo:
68
  num_inversion_steps=self.cfg.num_inversion_steps,
69
  num_inference_steps=self.cfg.num_inference_steps,
70
  image=init_image,
71
- guidance_scale=self.cfg.guidance_scale,
72
  callback_on_step_end=inversion_callback,
73
  strength=self.cfg.inversion_max_step,
74
  denoising_start=1.0 - self.cfg.inversion_max_step,
 
35
 
36
 
37
  class ImageEditorDemo:
38
+ def __init__(self, pipe_inversion, pipe_inference, input_image, description_prompt, cfg, device):
39
  self.pipe_inversion = pipe_inversion
40
  self.pipe_inference = pipe_inference
41
  self.original_image = load_im_into_format_from_path(input_image).convert("RGB")
 
44
  img_size = (512,512)
45
  VQAE_SCALE = 8
46
  latents_size = (1, 4, img_size[0] // VQAE_SCALE, img_size[1] // VQAE_SCALE)
47
+ noise = [randn_tensor(latents_size, dtype=torch.float16, device=torch.device(device), generator=g_cpu) for i
48
  in range(cfg.num_inversion_steps)]
49
  pipe_inversion.scheduler.set_noise_list(noise)
50
  pipe_inference.scheduler.set_noise_list(noise)
 
55
  self.pipe_inversion.cfg = cfg
56
  self.pipe_inference.cfg = cfg
57
  self.inv_hp = [2, 0.1, 0.2]
58
+ self.edit_cfg = cfg.edit_guidance_scale
59
 
60
+ self.pipe_inference.to(device)
61
+ self.pipe_inversion.to(device)
62
 
63
  self.last_latent = self.invert(self.original_image, description_prompt)
64
  self.original_latent = self.last_latent
 
68
  num_inversion_steps=self.cfg.num_inversion_steps,
69
  num_inference_steps=self.cfg.num_inference_steps,
70
  image=init_image,
71
+ guidance_scale=self.cfg.inversion_guidance_scale,
72
  callback_on_step_end=inversion_callback,
73
  strength=self.cfg.inversion_max_step,
74
  denoising_start=1.0 - self.cfg.inversion_max_step,
src/sdxl_inversion_pipeline.py CHANGED
@@ -304,7 +304,10 @@ class SDXLDDIMPipeline(StableDiffusionXLImg2ImgPipeline):
304
 
305
  def get_timestamp_dist(self, z_0, timesteps):
306
  timesteps = timesteps.to(z_0.device)
307
- sigma = self.scheduler.sigmas.cuda()[:-1][self.scheduler.timesteps == timesteps]
 
 
 
308
  z_0 = z_0.reshape(-1, 1)
309
 
310
  def gaussian_pdf(x):
 
304
 
305
  def get_timestamp_dist(self, z_0, timesteps):
306
  timesteps = timesteps.to(z_0.device)
307
+ if "cuda" in str(z_0.device):
308
+ sigma = self.scheduler.sigmas.cuda()[:-1][self.scheduler.timesteps == timesteps]
309
+ else:
310
+ sigma = self.scheduler.sigmas[:-1][self.scheduler.timesteps == timesteps]
311
  z_0 = z_0.reshape(-1, 1)
312
 
313
  def gaussian_pdf(x):