Files changed (1) hide show
  1. app.py +13 -254
app.py CHANGED
@@ -1,10 +1,8 @@
1
  import spaces
2
  import torch
3
  import gradio as gr
4
- from gradio import processing_utils, utils
5
  from PIL import Image
6
  import random
7
-
8
  from diffusers import (
9
  DiffusionPipeline,
10
  AutoencoderKL,
@@ -18,12 +16,7 @@ from diffusers import (
18
  )
19
  import tempfile
20
  import time
21
- from share_btn import community_icon_html, loading_icon_html, share_js
22
- import user_history
23
- from illusion_style import css
24
  import os
25
- from transformers import CLIPImageProcessor
26
- from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
27
 
28
  BASE_MODEL = "SG161222/Realistic_Vision_V5.1_noVAE"
29
 
@@ -31,258 +24,24 @@ BASE_MODEL = "SG161222/Realistic_Vision_V5.1_noVAE"
31
  vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
32
  controlnet = ControlNetModel.from_pretrained("monster-labs/control_v1p_sd15_qrcode_monster", torch_dtype=torch.float16)
33
 
34
- # Initialize the safety checker conditionally
35
- SAFETY_CHECKER_ENABLED = os.environ.get("SAFETY_CHECKER", "0") == "1"
36
- safety_checker = None
37
- feature_extractor = None
38
- if SAFETY_CHECKER_ENABLED:
39
- safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to("cuda")
40
- feature_extractor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
41
 
42
  main_pipe = StableDiffusionControlNetPipeline.from_pretrained(
43
  BASE_MODEL,
44
  controlnet=controlnet,
45
  vae=vae,
46
- safety_checker=safety_checker,
47
- feature_extractor=feature_extractor,
 
48
  torch_dtype=torch.float16,
49
  ).to("cuda")
50
 
51
- # Function to check NSFW images
52
- #def check_nsfw_images(images: list[Image.Image]) -> tuple[list[Image.Image], list[bool]]:
53
- # if SAFETY_CHECKER_ENABLED:
54
- # safety_checker_input = feature_extractor(images, return_tensors="pt").to("cuda")
55
- # has_nsfw_concepts = safety_checker(
56
- # images=[images],
57
- # clip_input=safety_checker_input.pixel_values.to("cuda")
58
- # )
59
- # return images, has_nsfw_concepts
60
- # else:
61
- # return images, [False] * len(images)
62
-
63
- #main_pipe.unet = torch.compile(main_pipe.unet, mode="reduce-overhead", fullgraph=True)
64
- #main_pipe.unet.to(memory_format=torch.channels_last)
65
- #main_pipe.unet = torch.compile(main_pipe.unet, mode="reduce-overhead", fullgraph=True)
66
- #model_id = "stabilityai/sd-x2-latent-upscaler"
67
- image_pipe = StableDiffusionControlNetImg2ImgPipeline(**main_pipe.components)
68
-
69
-
70
- #image_pipe.unet = torch.compile(image_pipe.unet, mode="reduce-overhead", fullgraph=True)
71
- #upscaler = StableDiffusionLatentUpscalePipeline.from_pretrained(model_id, torch_dtype=torch.float16)
72
- #upscaler.to("cuda")
73
-
74
-
75
- # Sampler map
76
- SAMPLER_MAP = {
77
- "DPM++ Karras SDE": lambda config: DPMSolverMultistepScheduler.from_config(config, use_karras=True, algorithm_type="sde-dpmsolver++"),
78
- "Euler": lambda config: EulerDiscreteScheduler.from_config(config),
79
- }
80
-
81
- def center_crop_resize(img, output_size=(512, 512)):
82
- width, height = img.size
83
-
84
- # Calculate dimensions to crop to the center
85
- new_dimension = min(width, height)
86
- left = (width - new_dimension)/2
87
- top = (height - new_dimension)/2
88
- right = (width + new_dimension)/2
89
- bottom = (height + new_dimension)/2
90
-
91
- # Crop and resize
92
- img = img.crop((left, top, right, bottom))
93
- img = img.resize(output_size)
94
-
95
- return img
96
-
97
- def common_upscale(samples, width, height, upscale_method, crop=False):
98
- if crop == "center":
99
- old_width = samples.shape[3]
100
- old_height = samples.shape[2]
101
- old_aspect = old_width / old_height
102
- new_aspect = width / height
103
- x = 0
104
- y = 0
105
- if old_aspect > new_aspect:
106
- x = round((old_width - old_width * (new_aspect / old_aspect)) / 2)
107
- elif old_aspect < new_aspect:
108
- y = round((old_height - old_height * (old_aspect / new_aspect)) / 2)
109
- s = samples[:,:,y:old_height-y,x:old_width-x]
110
- else:
111
- s = samples
112
-
113
- return torch.nn.functional.interpolate(s, size=(height, width), mode=upscale_method)
114
-
115
- def upscale(samples, upscale_method, scale_by):
116
- #s = samples.copy()
117
- width = round(samples["images"].shape[3] * scale_by)
118
- height = round(samples["images"].shape[2] * scale_by)
119
- s = common_upscale(samples["images"], width, height, upscale_method, "disabled")
120
- return (s)
121
-
122
- def check_inputs(prompt: str, control_image: Image.Image):
123
- if control_image is None:
124
- raise gr.Error("Please select or upload an Input Illusion")
125
- if prompt is None or prompt == "":
126
- raise gr.Error("Prompt is required")
127
-
128
- def convert_to_pil(base64_image):
129
- pil_image = Image.open(base64_image)
130
- return pil_image
131
-
132
- def convert_to_base64(pil_image):
133
- with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
134
- image.save(temp_file.name)
135
- return temp_file.name
136
-
137
- # Inference function
138
- @spaces.GPU
139
- def inference(
140
- control_image: Image.Image,
141
- prompt: str,
142
- negative_prompt: str,
143
- guidance_scale: float = 8.0,
144
- controlnet_conditioning_scale: float = 1,
145
- control_guidance_start: float = 1,
146
- control_guidance_end: float = 1,
147
- upscaler_strength: float = 0.5,
148
- seed: int = -1,
149
- sampler = "DPM++ Karras SDE",
150
- progress = gr.Progress(track_tqdm=True),
151
- profile: gr.OAuthProfile | None = None,
152
- ):
153
- start_time = time.time()
154
- start_time_struct = time.localtime(start_time)
155
- start_time_formatted = time.strftime("%H:%M:%S", start_time_struct)
156
- print(f"Inference started at {start_time_formatted}")
157
-
158
- # Generate the initial image
159
- #init_image = init_pipe(prompt).images[0]
160
-
161
- # Rest of your existing code
162
- control_image_small = center_crop_resize(control_image)
163
- control_image_large = center_crop_resize(control_image, (1024, 1024))
164
-
165
- main_pipe.scheduler = SAMPLER_MAP[sampler](main_pipe.scheduler.config)
166
- my_seed = random.randint(0, 2**32 - 1) if seed == -1 else seed
167
- generator = torch.Generator(device="cuda").manual_seed(my_seed)
168
-
169
- out = main_pipe(
170
- prompt=prompt,
171
- negative_prompt=negative_prompt,
172
- image=control_image_small,
173
- guidance_scale=float(guidance_scale),
174
- controlnet_conditioning_scale=float(controlnet_conditioning_scale),
175
- generator=generator,
176
- control_guidance_start=float(control_guidance_start),
177
- control_guidance_end=float(control_guidance_end),
178
- num_inference_steps=15,
179
- output_type="latent"
180
- )
181
- upscaled_latents = upscale(out, "nearest-exact", 2)
182
- out_image = image_pipe(
183
- prompt=prompt,
184
- negative_prompt=negative_prompt,
185
- control_image=control_image_large,
186
- image=upscaled_latents,
187
- guidance_scale=float(guidance_scale),
188
- generator=generator,
189
- num_inference_steps=20,
190
- strength=upscaler_strength,
191
- control_guidance_start=float(control_guidance_start),
192
- control_guidance_end=float(control_guidance_end),
193
- controlnet_conditioning_scale=float(controlnet_conditioning_scale)
194
- )
195
- end_time = time.time()
196
- end_time_struct = time.localtime(end_time)
197
- end_time_formatted = time.strftime("%H:%M:%S", end_time_struct)
198
- print(f"Inference ended at {end_time_formatted}, taking {end_time-start_time}s")
199
-
200
- # Save image + metadata
201
- user_history.save_image(
202
- label=prompt,
203
- image=out_image["images"][0],
204
- profile=profile,
205
- metadata={
206
- "prompt": prompt,
207
- "negative_prompt": negative_prompt,
208
- "guidance_scale": guidance_scale,
209
- "controlnet_conditioning_scale": controlnet_conditioning_scale,
210
- "control_guidance_start": control_guidance_start,
211
- "control_guidance_end": control_guidance_end,
212
- "upscaler_strength": upscaler_strength,
213
- "seed": seed,
214
- "sampler": sampler,
215
- },
216
- )
217
-
218
- return out_image["images"][0], gr.update(visible=True), gr.update(visible=True), my_seed
219
-
220
- with gr.Blocks() as app:
221
- gr.Markdown(
222
- '''
223
- <div style="text-align: center;">
224
- <h1>Illusion Diffusion HQ 🌀</h1>
225
- <p style="font-size:16px;">Generate stunning high quality illusion artwork with Stable Diffusion</p>
226
- <p>Illusion Diffusion is back up with a safety checker! Because I have been asked, if you would like to support me, consider using <a href="https://deforum.studio">deforum.studio</a></p>
227
- <p>A space by AP <a href="https://twitter.com/angrypenguinPNG">Follow me on Twitter</a> with big contributions from <a href="https://twitter.com/multimodalart">multimodalart</a></p>
228
- <p>This project works by using <a href="https://huggingface.co/monster-labs/control_v1p_sd15_qrcode_monster">Monster Labs QR Control Net</a>. Given a prompt and your pattern, we use a QR code conditioned controlnet to create a stunning illusion! Credit to: <a href="https://twitter.com/MrUgleh">MrUgleh</a> for discovering the workflow :)</p>
229
- </div>
230
- '''
231
- )
232
-
233
-
234
- state_img_input = gr.State()
235
- state_img_output = gr.State()
236
- with gr.Row():
237
- with gr.Column():
238
- control_image = gr.Image(label="Input Illusion", type="pil", elem_id="control_image")
239
- controlnet_conditioning_scale = gr.Slider(minimum=0.0, maximum=5.0, step=0.01, value=0.8, label="Illusion strength", elem_id="illusion_strength", info="ControlNet conditioning scale")
240
- gr.Examples(examples=["checkers.png", "checkers_mid.jpg", "pattern.png", "ultra_checkers.png", "spiral.jpeg", "funky.jpeg" ], inputs=control_image)
241
- prompt = gr.Textbox(label="Prompt", elem_id="prompt", info="Type what you want to generate", placeholder="Medieval village scene with busy streets and castle in the distance")
242
- negative_prompt = gr.Textbox(label="Negative Prompt", info="Type what you don't want to see", value="low quality", elem_id="negative_prompt")
243
- with gr.Accordion(label="Advanced Options", open=False):
244
- guidance_scale = gr.Slider(minimum=0.0, maximum=50.0, step=0.25, value=7.5, label="Guidance Scale")
245
- sampler = gr.Dropdown(choices=list(SAMPLER_MAP.keys()), value="Euler")
246
- control_start = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=0, label="Start of ControlNet")
247
- control_end = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1, label="End of ControlNet")
248
- strength = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, value=1, label="Strength of the upscaler")
249
- seed = gr.Slider(minimum=-1, maximum=9999999999, step=1, value=-1, label="Seed", info="-1 means random seed")
250
- used_seed = gr.Number(label="Last seed used",interactive=False)
251
- run_btn = gr.Button("Run")
252
- with gr.Column():
253
- result_image = gr.Image(label="Illusion Diffusion Output", interactive=False, elem_id="output")
254
- with gr.Group(elem_id="share-btn-container", visible=False) as share_group:
255
- community_icon = gr.HTML(community_icon_html)
256
- loading_icon = gr.HTML(loading_icon_html)
257
- share_button = gr.Button("Share to community", elem_id="share-btn")
258
-
259
- prompt.submit(
260
- check_inputs,
261
- inputs=[prompt, control_image],
262
- queue=False
263
- ).success(
264
- inference,
265
- inputs=[control_image, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
266
- outputs=[result_image, result_image, share_group, used_seed])
267
-
268
- run_btn.click(
269
- check_inputs,
270
- inputs=[prompt, control_image],
271
- queue=False
272
- ).success(
273
- inference,
274
- inputs=[control_image, prompt, negative_prompt, guidance_scale, controlnet_conditioning_scale, control_start, control_end, strength, seed, sampler],
275
- outputs=[result_image, result_image, share_group, used_seed])
276
-
277
- share_button.click(None, [], [], js=share_js)
278
-
279
- with gr.Blocks(css=css) as app_with_history:
280
- with gr.Tab("Demo"):
281
- app.render()
282
- with gr.Tab("Past generations"):
283
- user_history.render()
284
-
285
- app_with_history.queue(max_size=20,api_open=False )
286
-
287
- if __name__ == "__main__":
288
- app_with_history.launch(max_threads=400)
 
1
  import spaces
2
  import torch
3
  import gradio as gr
 
4
  from PIL import Image
5
  import random
 
6
  from diffusers import (
7
  DiffusionPipeline,
8
  AutoencoderKL,
 
16
  )
17
  import tempfile
18
  import time
 
 
 
19
  import os
 
 
20
 
21
  BASE_MODEL = "SG161222/Realistic_Vision_V5.1_noVAE"
22
 
 
24
  vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16)
25
  controlnet = ControlNetModel.from_pretrained("monster-labs/control_v1p_sd15_qrcode_monster", torch_dtype=torch.float16)
26
 
27
+ # Commenting out safety checker initialization
28
+ # SAFETY_CHECKER_ENABLED = os.environ.get("SAFETY_CHECKER", "0") == "1"
29
+ # safety_checker = None
30
+ # feature_extractor = None
31
+ # if SAFETY_CHECKER_ENABLED:
32
+ # safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to("cuda")
33
+ # feature_extractor = CLIPImageProcessor.from_pretrained("openai/clip-vit-base-patch32")
34
 
35
  main_pipe = StableDiffusionControlNetPipeline.from_pretrained(
36
  BASE_MODEL,
37
  controlnet=controlnet,
38
  vae=vae,
39
+ # Remove safety checker and feature extractor from pipeline components
40
+ # safety_checker=safety_checker,
41
+ # feature_extractor=feature_extractor,
42
  torch_dtype=torch.float16,
43
  ).to("cuda")
44
 
45
+ def inference(control_image: Image.Image, prompt: str, negative_prompt: str, guidance_scale: float = 8.0, controlnet_conditioning_scale: float = 1, upscaler_strength: float = 0.5, seed: int = -1):
46
+ # Inference logic remains unchanged
47
+ ...