taki0112 commited on
Commit
cb415f3
ยท
1 Parent(s): 24dc2ac
Files changed (1) hide show
  1. app.py +256 -25
app.py CHANGED
@@ -2,11 +2,17 @@ import torch
2
  from pipelines.inverted_ve_pipeline import STYLE_DESCRIPTION_DICT, create_image_grid
3
  import gradio as gr
4
  import os, json
 
 
5
 
 
6
  from pipelines.pipeline_stable_diffusion_xl import StableDiffusionXLPipeline
7
- from diffusers import AutoencoderKL
 
8
  from random import randint
9
  from utils import init_latent
 
 
10
 
11
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
12
  if device == 'cpu':
@@ -34,14 +40,30 @@ def memory_efficient(model):
34
  except AttributeError:
35
  print("enable_xformers_memory_efficient_attention is not supported.")
36
 
 
37
  vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype)
 
 
 
 
 
38
  model = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch_dtype)
39
 
40
  print("vae")
41
  memory_efficient(vae)
 
 
 
 
42
  print("SDXL")
43
  memory_efficient(model)
44
 
 
 
 
 
 
 
45
 
46
  # controlnet_scale, canny thres 1, 2 (2 > 1, 2:1, 3:1)
47
 
@@ -50,6 +72,62 @@ def parse_config(config):
50
  config = json.load(f)
51
  return config
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def load_example_style():
55
  folder_path = 'assets/ref'
@@ -70,13 +148,134 @@ def load_example_style():
70
  return examples
71
 
72
  def style_fn(image_path, style_name, content_text, output_number, diffusion_step=50):
73
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- :param style_name: ์–ด๋–ค json ํŒŒ์ผ ๋ถ€๋ฅผ๊ฑฐ๋ƒ ?
76
- :param content_text: ์–ด๋–ค ์ฝ˜ํ…์ธ ๋กœ ๋ณ€ํ™”๋ฅผ ์›ํ•˜๋‹ˆ ?
77
- :param output_number: ๋ช‡๊ฐœ ์ƒ์„ฑํ• ๊ฑฐ๋‹ˆ ?
78
- :return:
79
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  config_path = './config/{}.json'.format(style_name)
81
  config = parse_config(config_path)
82
 
@@ -84,7 +283,6 @@ def style_fn(image_path, style_name, content_text, output_number, diffusion_step
84
  inf_seeds = [randint(0, 10**10) for _ in range(int(output_number))]
85
  # inf_seeds = [i for i in range(int(output_number))]
86
 
87
-
88
  activate_layer_indices_list = config['inference_info']['activate_layer_indices_list']
89
  activate_step_indices_list = config['inference_info']['activate_step_indices_list']
90
  ref_seed = config['reference_info']['ref_seeds'][0]
@@ -106,6 +304,9 @@ def style_fn(image_path, style_name, content_text, output_number, diffusion_step
106
 
107
  use_advanced_sampling = config['inference_info']['use_advanced_sampling']
108
 
 
 
 
109
  style_description_pos, style_description_neg = STYLE_DESCRIPTION_DICT[style_name][0], \
110
  STYLE_DESCRIPTION_DICT[style_name][1]
111
 
@@ -126,34 +327,39 @@ def style_fn(image_path, style_name, content_text, output_number, diffusion_step
126
 
127
  for activate_step_indices in activate_step_indices_list:
128
 
129
- str_activate_layer, str_activate_step = model.activate_layer(
130
  activate_layer_indices=activate_layer_indices,
131
  attn_map_save_steps=attn_map_save_steps,
132
- activate_step_indices=activate_step_indices, use_shared_attention=use_shared_attention,
 
133
  adain_queries=adain_queries,
134
  adain_keys=adain_keys,
135
  adain_values=adain_values,
136
  )
137
- # ref_latent = model.get_init_latent(ref_seed, precomputed_path=None)
138
- ref_latent = init_latent(model, device_name=device, dtype=torch_dtype, seed=ref_seed)
 
139
  latents = [ref_latent]
140
 
141
  for inf_seed in inf_seeds:
142
- # latents.append(model.get_init_latent(inf_seed, precomputed_path=None))
143
- inf_latent = init_latent(model, device_name=device, dtype=torch_dtype, seed=inf_seed)
144
  latents.append(inf_latent)
145
 
 
146
  latents = torch.cat(latents, dim=0)
147
  latents.to(device)
148
 
149
- images = model(
150
  prompt=ref_prompt,
151
  negative_prompt=style_description_neg,
152
  guidance_scale=guidance_scale,
153
  num_inference_steps=diffusion_step,
 
154
  latents=latents,
155
  num_images_per_prompt=len(inf_seeds) + 1,
156
  target_prompt=inf_prompt,
 
157
  use_inf_negative_prompt=use_inf_negative_prompt,
158
  use_advanced_sampling=use_advanced_sampling
159
  )[0][1:]
@@ -162,40 +368,65 @@ def style_fn(image_path, style_name, content_text, output_number, diffusion_step
162
  n_col = len(inf_seeds) # ์›๋ณธ์ถ”๊ฐ€ํ•˜๋ ค๋ฉด + 1
163
 
164
  # make grid
165
- grid = create_image_grid(images, n_row, n_col, padding=10)
166
-
167
- torch.cuda.empty_cache()
168
 
169
  return grid
170
 
 
171
  description_md = """
172
 
173
  ### We introduce `Visual Style Prompting`, which reflects the style of a reference image to the images generated by a pretrained text-to-image diffusion model without finetuning or optimization (e.g., Figure N).
174
  ### ๐Ÿ“– [[Paper](https://arxiv.org/abs/2402.12974)] | โœจ [[Project page](https://curryjung.github.io/VisualStylePrompt)] | โœจ [[Code](https://github.com/naver-ai/Visual-Style-Prompting)]
175
- ### ๐Ÿ”ฅ [[w/ Controlnet ver](https://huggingface.co/spaces/naver-ai/VisualStylePrompting_Controlnet)]
176
  ---
177
- ### To try out our vanilla demo,
178
  1. Choose a `style reference` from the collection of images below.
179
  2. Enter the `text prompt`.
180
  3. Choose the `number of outputs`.
181
-
182
- ### To achieve faster results, we recommend lowering the diffusion steps to 30.
 
 
 
 
 
 
 
 
 
183
  ### Enjoy ! ๐Ÿ˜„
184
  """
185
 
186
  iface_style = gr.Interface(
187
  fn=style_fn,
188
  inputs=[
189
- gr.components.Image(label="Style Image"),
190
  gr.components.Textbox(label='Style name', visible=False),
191
  gr.components.Textbox(label="Text prompt", placeholder="Enter Text prompt"),
192
  gr.components.Textbox(label="Number of outputs", placeholder="Enter Number of outputs"),
193
  gr.components.Slider(minimum=10, maximum=50, step=10, value=50, label="Diffusion steps")
194
  ],
195
- outputs=gr.components.Image(type="pil"),
196
  title="๐ŸŽจ Visual Style Prompting (default)",
197
  description=description_md,
198
  examples=load_example_style(),
199
  )
200
 
201
- iface_style.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from pipelines.inverted_ve_pipeline import STYLE_DESCRIPTION_DICT, create_image_grid
3
  import gradio as gr
4
  import os, json
5
+ import numpy as np
6
+ from PIL import Image
7
 
8
+ from pipelines.pipeline_controlnet_sd_xl import StableDiffusionXLControlNetPipeline
9
  from pipelines.pipeline_stable_diffusion_xl import StableDiffusionXLPipeline
10
+ from diffusers import ControlNetModel, AutoencoderKL
11
+ from transformers import DPTFeatureExtractor, DPTForDepthEstimation
12
  from random import randint
13
  from utils import init_latent
14
+ from transformers import Blip2Processor, Blip2ForConditionalGeneration
15
+ from diffusers import DDIMScheduler
16
 
17
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
18
  if device == 'cpu':
 
40
  except AttributeError:
41
  print("enable_xformers_memory_efficient_attention is not supported.")
42
 
43
+ controlnet = ControlNetModel.from_pretrained("diffusers/controlnet-depth-sdxl-1.0", torch_dtype=torch_dtype)
44
  vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype)
45
+
46
+ model_controlnet = StableDiffusionXLControlNetPipeline.from_pretrained(
47
+ "stabilityai/stable-diffusion-xl-base-1.0", controlnet=controlnet, vae=vae, torch_dtype=torch_dtype
48
+ )
49
+
50
  model = StableDiffusionXLPipeline.from_pretrained("stabilityai/stable-diffusion-xl-base-1.0", torch_dtype=torch_dtype)
51
 
52
  print("vae")
53
  memory_efficient(vae)
54
+ print("control")
55
+ memory_efficient(controlnet)
56
+ print("ControlNet-SDXL")
57
+ memory_efficient(model_controlnet)
58
  print("SDXL")
59
  memory_efficient(model)
60
 
61
+ depth_estimator = DPTForDepthEstimation.from_pretrained("Intel/dpt-hybrid-midas").to(device)
62
+ feature_extractor = DPTFeatureExtractor.from_pretrained("Intel/dpt-hybrid-midas")
63
+
64
+ blip_processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
65
+ blip_model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch_dtype).to(device)
66
+
67
 
68
  # controlnet_scale, canny thres 1, 2 (2 > 1, 2:1, 3:1)
69
 
 
72
  config = json.load(f)
73
  return config
74
 
75
+ def get_depth_map(image):
76
+ image = feature_extractor(images=image, return_tensors="pt").pixel_values.to(device)
77
+ with torch.no_grad(), torch.autocast(device):
78
+ depth_map = depth_estimator(image).predicted_depth
79
+
80
+ depth_map = torch.nn.functional.interpolate(
81
+ depth_map.unsqueeze(1),
82
+ size=(1024, 1024),
83
+ mode="bicubic",
84
+ align_corners=False,
85
+ )
86
+ depth_min = torch.amin(depth_map, dim=[1, 2, 3], keepdim=True)
87
+ depth_max = torch.amax(depth_map, dim=[1, 2, 3], keepdim=True)
88
+ depth_map = (depth_map - depth_min) / (depth_max - depth_min)
89
+ image = torch.cat([depth_map] * 3, dim=1)
90
+
91
+ image = image.permute(0, 2, 3, 1).cpu().numpy()[0]
92
+ image = Image.fromarray((image * 255.0).clip(0, 255).astype(np.uint8))
93
+ return image
94
+
95
+
96
+ def get_depth_edge_array(depth_img_path):
97
+ depth_image_tmp = Image.fromarray(depth_img_path)
98
+
99
+ # get depth map
100
+ depth_map = get_depth_map(depth_image_tmp)
101
+
102
+ return depth_map
103
+
104
+ def blip_inf_prompt(image):
105
+ inputs = blip_processor(images=image, return_tensors="pt").to(device, torch.float16)
106
+
107
+ generated_ids = blip_model.generate(**inputs)
108
+ generated_text = blip_processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
109
+
110
+ return generated_text
111
+
112
+ def load_example_controlnet():
113
+ folder_path = 'assets/ref'
114
+ examples = []
115
+ for filename in os.listdir(folder_path):
116
+ if filename.endswith((".png")):
117
+ image_path = os.path.join(folder_path, filename)
118
+ image_name = os.path.basename(image_path)
119
+ style_name = image_name.split('_')[1]
120
+
121
+ config_path = './config/{}.json'.format(style_name)
122
+ config = parse_config(config_path)
123
+ inf_object_name = config["inference_info"]["inf_object_list"][0]
124
+
125
+ canny_path = './assets/depth_dir/gundam.png'
126
+ image_info = [image_path, canny_path, style_name, inf_object_name, 1, 0.5, 50]
127
+
128
+ examples.append(image_info)
129
+
130
+ return examples
131
 
132
  def load_example_style():
133
  folder_path = 'assets/ref'
 
148
  return examples
149
 
150
  def style_fn(image_path, style_name, content_text, output_number, diffusion_step=50):
151
+ user_image_flag = not style_name.strip() # empty
152
+
153
+ if not user_image_flag:
154
+ real_img = None
155
+ config_path = './config/{}.json'.format(style_name)
156
+ config = parse_config(config_path)
157
+
158
+ inf_object = content_text
159
+ inf_seeds = [randint(0, 10**10) for _ in range(int(output_number))]
160
+
161
+ activate_layer_indices_list = config['inference_info']['activate_layer_indices_list']
162
+ activate_step_indices_list = config['inference_info']['activate_step_indices_list']
163
+ ref_seed = config['reference_info']['ref_seeds'][0]
164
+
165
+ attn_map_save_steps = config['inference_info']['attn_map_save_steps']
166
+ guidance_scale = config['guidance_scale']
167
+ use_inf_negative_prompt = config['inference_info']['use_negative_prompt']
168
+
169
+ ref_object = config["reference_info"]["ref_object_list"][0]
170
+ ref_with_style_description = config['reference_info']['with_style_description']
171
+ inf_with_style_description = config['inference_info']['with_style_description']
172
+
173
+ use_shared_attention = config['inference_info']['use_shared_attention']
174
+ adain_queries = config['inference_info']['adain_queries']
175
+ adain_keys = config['inference_info']['adain_keys']
176
+ adain_values = config['inference_info']['adain_values']
177
+
178
+ use_advanced_sampling = config['inference_info']['use_advanced_sampling']
179
+ use_prompt_as_null = False
180
+
181
+ style_name = config["style_name_list"][0]
182
+ style_description_pos, style_description_neg = STYLE_DESCRIPTION_DICT[style_name][0], \
183
+ STYLE_DESCRIPTION_DICT[style_name][1]
184
+ if ref_with_style_description:
185
+ ref_prompt = style_description_pos.replace("{object}", ref_object)
186
+ else:
187
+ ref_prompt = ref_object
188
+
189
+ if inf_with_style_description:
190
+ inf_prompt = style_description_pos.replace("{object}", inf_object)
191
+ else:
192
+ inf_prompt = inf_object
193
+ else:
194
+ model.scheduler = DDIMScheduler.from_config(model.scheduler.config)
195
+ origin_real_img = Image.open(image_path).resize((1024, 1024), resample=Image.BICUBIC)
196
+ real_img = np.array(origin_real_img).astype(np.float32) / 255.0
197
+
198
+ style_name = 'default'
199
+
200
+ config_path = './config/{}.json'.format(style_name)
201
+ config = parse_config(config_path)
202
+
203
+ inf_object = content_text
204
+ inf_seeds = [randint(0, 10**10) for _ in range(int(output_number))]
205
+
206
+ activate_layer_indices_list = config['inference_info']['activate_layer_indices_list']
207
+ activate_step_indices_list = config['inference_info']['activate_step_indices_list']
208
+ ref_seed = 0
209
+
210
+ attn_map_save_steps = config['inference_info']['attn_map_save_steps']
211
+ guidance_scale = config['guidance_scale']
212
+ use_inf_negative_prompt = False
213
+
214
+ use_shared_attention = config['inference_info']['use_shared_attention']
215
+ adain_queries = config['inference_info']['adain_queries']
216
+ adain_keys = config['inference_info']['adain_keys']
217
+ adain_values = config['inference_info']['adain_values']
218
+
219
+ use_advanced_sampling = False
220
+ use_prompt_as_null = True
221
+
222
+ ref_prompt = blip_inf_prompt(origin_real_img)
223
+ inf_prompt = inf_object
224
+ style_description_neg = None
225
+
226
+
227
+ # Inference
228
+ with torch.inference_mode():
229
+ grid = None
230
+
231
+ for activate_layer_indices in activate_layer_indices_list:
232
 
233
+ for activate_step_indices in activate_step_indices_list:
234
+
235
+ str_activate_layer, str_activate_step = model.activate_layer(
236
+ activate_layer_indices=activate_layer_indices,
237
+ attn_map_save_steps=attn_map_save_steps,
238
+ activate_step_indices=activate_step_indices, use_shared_attention=use_shared_attention,
239
+ adain_queries=adain_queries,
240
+ adain_keys=adain_keys,
241
+ adain_values=adain_values,
242
+ )
243
+
244
+ ref_latent = init_latent(model, device_name=device, dtype=torch_dtype, seed=ref_seed)
245
+ latents = [ref_latent]
246
+ num_images_per_prompt = len(inf_seeds) + 1
247
+
248
+ for inf_seed in inf_seeds:
249
+ # latents.append(model.get_init_latent(inf_seed, precomputed_path=None))
250
+ inf_latent = init_latent(model, device_name=device, dtype=torch_dtype, seed=inf_seed)
251
+ latents.append(inf_latent)
252
+
253
+ latents = torch.cat(latents, dim=0)
254
+ latents.to(device)
255
+
256
+ images = model(
257
+ prompt=ref_prompt,
258
+ negative_prompt=style_description_neg,
259
+ guidance_scale=guidance_scale,
260
+ num_inference_steps=diffusion_step,
261
+ latents=latents,
262
+ num_images_per_prompt=num_images_per_prompt,
263
+ target_prompt=inf_prompt,
264
+ use_inf_negative_prompt=use_inf_negative_prompt,
265
+ use_advanced_sampling=use_advanced_sampling,
266
+ use_prompt_as_null=use_prompt_as_null,
267
+ image=real_img
268
+ )[0][1:]
269
+
270
+ n_row = 1
271
+ n_col = len(inf_seeds) + 1 # ์›๋ณธ์ถ”๊ฐ€ํ•˜๋ ค๋ฉด + 1
272
+
273
+ # make grid
274
+ grid = create_image_grid(images, n_row, n_col, padding=10)
275
+
276
+ return grid
277
+
278
+ def controlnet_fn(image_path, depth_image_path, style_name, content_text, output_number, controlnet_scale=0.5, diffusion_step=50):
279
  config_path = './config/{}.json'.format(style_name)
280
  config = parse_config(config_path)
281
 
 
283
  inf_seeds = [randint(0, 10**10) for _ in range(int(output_number))]
284
  # inf_seeds = [i for i in range(int(output_number))]
285
 
 
286
  activate_layer_indices_list = config['inference_info']['activate_layer_indices_list']
287
  activate_step_indices_list = config['inference_info']['activate_step_indices_list']
288
  ref_seed = config['reference_info']['ref_seeds'][0]
 
304
 
305
  use_advanced_sampling = config['inference_info']['use_advanced_sampling']
306
 
307
+ #get canny edge array
308
+ depth_image = get_depth_edge_array(depth_image_path)
309
+
310
  style_description_pos, style_description_neg = STYLE_DESCRIPTION_DICT[style_name][0], \
311
  STYLE_DESCRIPTION_DICT[style_name][1]
312
 
 
327
 
328
  for activate_step_indices in activate_step_indices_list:
329
 
330
+ str_activate_layer, str_activate_step = model_controlnet.activate_layer(
331
  activate_layer_indices=activate_layer_indices,
332
  attn_map_save_steps=attn_map_save_steps,
333
+ activate_step_indices=activate_step_indices,
334
+ use_shared_attention=use_shared_attention,
335
  adain_queries=adain_queries,
336
  adain_keys=adain_keys,
337
  adain_values=adain_values,
338
  )
339
+
340
+ # ref_latent = model_controlnet.get_init_latent(ref_seed, precomputed_path=None)
341
+ ref_latent = init_latent(model_controlnet, device_name=device, dtype=torch_dtype, seed=ref_seed)
342
  latents = [ref_latent]
343
 
344
  for inf_seed in inf_seeds:
345
+ # latents.append(model_controlnet.get_init_latent(inf_seed, precomputed_path=None))
346
+ inf_latent = init_latent(model_controlnet, device_name=device, dtype=torch_dtype, seed=inf_seed)
347
  latents.append(inf_latent)
348
 
349
+
350
  latents = torch.cat(latents, dim=0)
351
  latents.to(device)
352
 
353
+ images = model_controlnet.generated_ve_inference(
354
  prompt=ref_prompt,
355
  negative_prompt=style_description_neg,
356
  guidance_scale=guidance_scale,
357
  num_inference_steps=diffusion_step,
358
+ controlnet_conditioning_scale=controlnet_scale,
359
  latents=latents,
360
  num_images_per_prompt=len(inf_seeds) + 1,
361
  target_prompt=inf_prompt,
362
+ image=depth_image,
363
  use_inf_negative_prompt=use_inf_negative_prompt,
364
  use_advanced_sampling=use_advanced_sampling
365
  )[0][1:]
 
368
  n_col = len(inf_seeds) # ์›๋ณธ์ถ”๊ฐ€ํ•˜๋ ค๋ฉด + 1
369
 
370
  # make grid
371
+ grid = create_image_grid(images, n_row, n_col)
 
 
372
 
373
  return grid
374
 
375
+
376
  description_md = """
377
 
378
  ### We introduce `Visual Style Prompting`, which reflects the style of a reference image to the images generated by a pretrained text-to-image diffusion model without finetuning or optimization (e.g., Figure N).
379
  ### ๐Ÿ“– [[Paper](https://arxiv.org/abs/2402.12974)] | โœจ [[Project page](https://curryjung.github.io/VisualStylePrompt)] | โœจ [[Code](https://github.com/naver-ai/Visual-Style-Prompting)]
 
380
  ---
381
+ ### ๐Ÿ”ฅ To try out our vanilla demo,
382
  1. Choose a `style reference` from the collection of images below.
383
  2. Enter the `text prompt`.
384
  3. Choose the `number of outputs`.
385
+ ---
386
+ ### โœจ Visual Style Prompting also works on `ControlNet` which specifies the shape of the results by depthmap or keypoints.
387
+ ### โ€ผ๏ธ w/ ControlNet ver does not support user style images.
388
+ ### ๐Ÿ”ฅ To try out our demo with ControlNet,
389
+ 1. Upload an `image for depth control`. An off-the-shelf model will produce the depthmap from it.
390
+ 2. Choose `ControlNet scale` which determines the alignment to the depthmap.
391
+ 3. Choose a `style reference` from the collection of images below.
392
+ 4. Enter the `text prompt`. (`Empty text` is okay, but a depthmap description helps.)
393
+ 5. Choose the `number of outputs`.
394
+
395
+ ### ๐Ÿ‘‰ To achieve faster results, we recommend lowering the diffusion steps to 30.
396
  ### Enjoy ! ๐Ÿ˜„
397
  """
398
 
399
  iface_style = gr.Interface(
400
  fn=style_fn,
401
  inputs=[
402
+ gr.components.Image(label="Style Image", type="filepath"),
403
  gr.components.Textbox(label='Style name', visible=False),
404
  gr.components.Textbox(label="Text prompt", placeholder="Enter Text prompt"),
405
  gr.components.Textbox(label="Number of outputs", placeholder="Enter Number of outputs"),
406
  gr.components.Slider(minimum=10, maximum=50, step=10, value=50, label="Diffusion steps")
407
  ],
408
+ outputs=gr.components.Image(label="Generated Image"),
409
  title="๐ŸŽจ Visual Style Prompting (default)",
410
  description=description_md,
411
  examples=load_example_style(),
412
  )
413
 
414
+ iface_controlnet = gr.Interface(
415
+ fn=controlnet_fn,
416
+ inputs=[
417
+ gr.components.Image(label="Style image"),
418
+ gr.components.Image(label="Depth image"),
419
+ gr.components.Textbox(label='Style name', visible=False),
420
+ gr.components.Textbox(label="Text prompt", placeholder="Enter Text prompt"),
421
+ gr.components.Textbox(label="Number of outputs", placeholder="Enter Number of outputs"),
422
+ gr.components.Slider(minimum=0.5, maximum=10, step=0.5, value=0.5, label="Controlnet scale"),
423
+ gr.components.Slider(minimum=10, maximum=50, step=10, value=50, label="Diffusion steps")
424
+ ],
425
+ outputs=gr.components.Image(label="Generated Image"),
426
+ title="๐ŸŽจ Visual Style Prompting (w/ ControlNet)",
427
+ description=description_md,
428
+ examples=load_example_controlnet(),
429
+ )
430
+
431
+ iface = gr.TabbedInterface([iface_style, iface_controlnet], ["Vanilla", "w/ ControlNet"])
432
+ iface.launch(debug=True)