1inkusFace commited on
Commit
cf4d4ba
·
verified ·
1 Parent(s): d7dfa1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -47
app.py CHANGED
@@ -38,8 +38,6 @@ torch.backends.cudnn.benchmark = False
38
 
39
  hftoken = os.getenv("HF_TOKEN")
40
 
41
- #image_encoder_path = "google/siglip-so400m-patch14-384"
42
- #image_encoder_path_b = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
43
  ipadapter_path = hf_hub_download(repo_id="InstantX/SD3.5-Large-IP-Adapter", filename="ip-adapter.bin")
44
  model_path = 'ford442/stable-diffusion-3.5-large-bf16'
45
 
@@ -82,8 +80,6 @@ pipe = StableDiffusion3Pipeline.from_pretrained(
82
 
83
  pipe.to(device)
84
 
85
- #pipe.to(device=device, dtype=torch.bfloat16)
86
-
87
  upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
88
 
89
  MAX_SEED = np.iinfo(np.int32).max
@@ -99,7 +95,7 @@ def infer(
99
  height,
100
  guidance_scale,
101
  num_inference_steps,
102
- latent_file, # Add latents file input
103
  ip_scale,
104
  image_encoder_path,
105
  progress=gr.Progress(track_tqdm=True),
@@ -110,30 +106,20 @@ def infer(
110
  generator = torch.Generator(device='cuda').manual_seed(seed)
111
  enhanced_prompt = prompt
112
  enhanced_prompt_2 = prompt
113
-
114
- if latent_file: # Check if a latent file is provided
115
- # initial_latents = pipe.prepare_latents(
116
- # batch_size=1,
117
- # num_channels_latents=pipe.transformer.in_channels,
118
- # height=pipe.transformer.config.sample_size[0],
119
- # width=pipe.transformer.config.sample_size[1],
120
- # dtype=pipe.transformer.dtype,
121
- # device=pipe.device,
122
- # generator=generator,
123
- # )
124
  sd_image_a = Image.open(latent_file.name).convert('RGB')
125
  print("-- using image file and loading ip-adapter --")
 
126
  pipe.init_ipadapter(
127
  ip_adapter_path=ipadapter_path,
128
  image_encoder_path=image_encoder_path,
129
  nb_token=64,
130
  )
131
  print('-- generating image --')
132
- #with torch.no_grad():
133
  sd_image = pipe(
134
  width=width,
135
  height=height,
136
- prompt=enhanced_prompt, # This conversion is fine
137
  negative_prompt=negative_prompt_1,
138
  num_inference_steps=num_inference_steps,
139
  guidance_scale=guidance_scale,
@@ -147,9 +133,8 @@ def infer(
147
  upload_to_ftp(rv_path)
148
  else:
149
  print('-- generating image --')
150
- #with torch.no_grad():
151
  sd_image = pipe(
152
- prompt=prompt, # This conversion is fine
153
  prompt_2=enhanced_prompt_2,
154
  prompt_3=enhanced_prompt,
155
  negative_prompt=negative_prompt_1,
@@ -159,33 +144,14 @@ def infer(
159
  num_inference_steps=num_inference_steps,
160
  width=width,
161
  height=height,
162
- # latents=None,
163
- # output_type='latent',
164
  generator=generator,
165
  max_sequence_length=512
166
  ).images[0]
167
  print('-- got image --')
168
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
169
- #sd35_image = pipe.vae.decode(sd_image / 0.18215).sample
170
- # sd35_image = sd35_image.cpu().permute(0, 2, 3, 1).float().detach().numpy()
171
- # sd35_image = (sd35_image * 255).round().astype("uint8")
172
- # image_pil = Image.fromarray(sd35_image[0])
173
- # sd35_path = f"sd35_{seed}.png"
174
- # image_pil.save(sd35_path,optimize=False,compress_level=0)
175
- # upload_to_ftp(sd35_path)
176
  sd35_path = f"sd35l_{timestamp}.png"
177
  sd_image.save(sd35_path,optimize=False,compress_level=0)
178
  upload_to_ftp(sd35_path)
179
- # Convert the generated image to a tensor
180
- #generated_image_tensor = torch.tensor([np.array(sd_image).transpose(2, 0, 1)]).to('cuda') / 255.0
181
- # Encode the generated image into latents
182
- #with torch.no_grad():
183
- # generated_latents = pipe.vae.encode(generated_image_tensor.to(torch.bfloat16)).latent_dist.sample().mul_(0.18215)
184
- #latent_path = f"sd35m_{seed}.pt"
185
- # Save the latents to a .pt file
186
- #torch.save(generated_latents, latent_path)
187
- #upload_to_ftp(latent_path)
188
- # pipe.unet.to('cpu')
189
  upscaler_2.to(torch.device('cuda'))
190
  with torch.no_grad():
191
  upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
@@ -214,8 +180,8 @@ body{
214
 
215
  with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
216
  with gr.Column(elem_id="col-container"):
217
- gr.Markdown(" # Text-to-Text-to-Image StableDiffusion 3.5 Large")
218
- expanded_prompt_output = gr.Textbox(label="Prompt", lines=5) # Add this line
219
  with gr.Row():
220
  prompt = gr.Text(
221
  label="Prompt",
@@ -227,7 +193,7 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
227
  run_button = gr.Button("Run", scale=0, variant="primary")
228
  result = gr.Image(label="Result", show_label=False)
229
  with gr.Accordion("Advanced Settings", open=True):
230
- latent_file = gr.File(label="Image File (optional)") # Add latents file input
231
  image_encoder_path = gr.Dropdown(
232
  ["google/siglip-so400m-patch14-384", "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"],
233
  label="CLIP Model",
@@ -266,28 +232,28 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
266
  minimum=256,
267
  maximum=MAX_IMAGE_SIZE,
268
  step=32,
269
- value=768, # Replace with defaults that work for your model
270
  )
271
  height = gr.Slider(
272
  label="Height",
273
  minimum=256,
274
  maximum=MAX_IMAGE_SIZE,
275
  step=32,
276
- value=768, # Replace with defaults that work for your model
277
  )
278
  guidance_scale = gr.Slider(
279
  label="Guidance scale",
280
  minimum=0.0,
281
  maximum=30.0,
282
  step=0.1,
283
- value=4.2, # Replace with defaults that work for your model
284
  )
285
  num_inference_steps = gr.Slider(
286
  label="Number of inference steps",
287
  minimum=1,
288
  maximum=500,
289
  step=1,
290
- value=220, # Replace with defaults that work for your model
291
  )
292
  gr.Examples(examples=examples, inputs=[prompt])
293
  gr.on(
@@ -302,7 +268,7 @@ with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
302
  height,
303
  guidance_scale,
304
  num_inference_steps,
305
- latent_file, # Add latent_file to the inputs
306
  ip_scale,
307
  image_encoder_path,
308
  ],
 
38
 
39
  hftoken = os.getenv("HF_TOKEN")
40
 
 
 
41
  ipadapter_path = hf_hub_download(repo_id="InstantX/SD3.5-Large-IP-Adapter", filename="ip-adapter.bin")
42
  model_path = 'ford442/stable-diffusion-3.5-large-bf16'
43
 
 
80
 
81
  pipe.to(device)
82
 
 
 
83
  upscaler_2 = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
84
 
85
  MAX_SEED = np.iinfo(np.int32).max
 
95
  height,
96
  guidance_scale,
97
  num_inference_steps,
98
+ latent_file,
99
  ip_scale,
100
  image_encoder_path,
101
  progress=gr.Progress(track_tqdm=True),
 
106
  generator = torch.Generator(device='cuda').manual_seed(seed)
107
  enhanced_prompt = prompt
108
  enhanced_prompt_2 = prompt
109
+ if latent_file:
 
 
 
 
 
 
 
 
 
 
110
  sd_image_a = Image.open(latent_file.name).convert('RGB')
111
  print("-- using image file and loading ip-adapter --")
112
+ sd_image_a.resize((height,width), Image.LANCZOS)
113
  pipe.init_ipadapter(
114
  ip_adapter_path=ipadapter_path,
115
  image_encoder_path=image_encoder_path,
116
  nb_token=64,
117
  )
118
  print('-- generating image --')
 
119
  sd_image = pipe(
120
  width=width,
121
  height=height,
122
+ prompt=enhanced_prompt,
123
  negative_prompt=negative_prompt_1,
124
  num_inference_steps=num_inference_steps,
125
  guidance_scale=guidance_scale,
 
133
  upload_to_ftp(rv_path)
134
  else:
135
  print('-- generating image --')
 
136
  sd_image = pipe(
137
+ prompt=prompt,
138
  prompt_2=enhanced_prompt_2,
139
  prompt_3=enhanced_prompt,
140
  negative_prompt=negative_prompt_1,
 
144
  num_inference_steps=num_inference_steps,
145
  width=width,
146
  height=height,
 
 
147
  generator=generator,
148
  max_sequence_length=512
149
  ).images[0]
150
  print('-- got image --')
151
  timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
 
 
 
 
 
 
 
152
  sd35_path = f"sd35l_{timestamp}.png"
153
  sd_image.save(sd35_path,optimize=False,compress_level=0)
154
  upload_to_ftp(sd35_path)
 
 
 
 
 
 
 
 
 
 
155
  upscaler_2.to(torch.device('cuda'))
156
  with torch.no_grad():
157
  upscale2 = upscaler_2(sd_image, tiling=True, tile_width=256, tile_height=256)
 
180
 
181
  with gr.Blocks(theme=gr.themes.Origin(),css=css) as demo:
182
  with gr.Column(elem_id="col-container"):
183
+ gr.Markdown(" # StableDiffusion 3.5 Large with IP Adapter")
184
+ expanded_prompt_output = gr.Textbox(label="Prompt", lines=5)
185
  with gr.Row():
186
  prompt = gr.Text(
187
  label="Prompt",
 
193
  run_button = gr.Button("Run", scale=0, variant="primary")
194
  result = gr.Image(label="Result", show_label=False)
195
  with gr.Accordion("Advanced Settings", open=True):
196
+ latent_file = gr.File(label="Image File (optional)")
197
  image_encoder_path = gr.Dropdown(
198
  ["google/siglip-so400m-patch14-384", "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"],
199
  label="CLIP Model",
 
232
  minimum=256,
233
  maximum=MAX_IMAGE_SIZE,
234
  step=32,
235
+ value=768,
236
  )
237
  height = gr.Slider(
238
  label="Height",
239
  minimum=256,
240
  maximum=MAX_IMAGE_SIZE,
241
  step=32,
242
+ value=768,
243
  )
244
  guidance_scale = gr.Slider(
245
  label="Guidance scale",
246
  minimum=0.0,
247
  maximum=30.0,
248
  step=0.1,
249
+ value=4.2,
250
  )
251
  num_inference_steps = gr.Slider(
252
  label="Number of inference steps",
253
  minimum=1,
254
  maximum=500,
255
  step=1,
256
+ value=50,
257
  )
258
  gr.Examples(examples=examples, inputs=[prompt])
259
  gr.on(
 
268
  height,
269
  guidance_scale,
270
  num_inference_steps,
271
+ latent_file,
272
  ip_scale,
273
  image_encoder_path,
274
  ],