1inkusFace commited on
Commit
db32f90
·
verified ·
1 Parent(s): 0c3fea5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -10
app.py CHANGED
@@ -20,8 +20,9 @@ from gradio import themes
20
  from image_gen_aux import UpscaleWithModel
21
  from ip_adapter import IPAdapterXL
22
  from huggingface_hub import snapshot_download
 
23
  import torch
24
- from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
25
  from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline, Phi3ForCausalLM
26
 
27
  torch.backends.cuda.matmul.allow_tf32 = False
@@ -104,6 +105,8 @@ def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str
104
  if not negative:
105
  negative = ""
106
  return p.replace("{prompt}", positive), n + negative
 
 
107
 
108
  def load_and_prepare_model():
109
  #vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", safety_checker=None)
@@ -165,15 +168,15 @@ checkpoint = "ford442/Phi-3.5-mini-instruct-bf16"
165
  captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
166
  #captioner_3 = pipeline(model="ford442/blip-image-to-text-large-bf16",device='cuda', task="image-to-text")
167
  model5 = Blip2ForConditionalGeneration.from_pretrained("ford442/blip2-image-to-text-bf16").to('cuda')
168
- processor5 = Blip2Processor.from_pretrained("ford442/blip2-image-to-text-bf16")
169
  txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, device_map='cuda', add_prefix_space=False)
170
  txt_tokenizer.tokenizer_legacy=False
171
  model = Phi3ForCausalLM.from_pretrained(checkpoint).to('cuda:0')
172
  #model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map='cuda') #.to('cuda')
173
 
174
  ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
175
- text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
176
- text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
177
 
178
  MAX_SEED = np.iinfo(np.int32).max
179
 
@@ -291,7 +294,7 @@ def expand_prompt(prompt):
291
  outputs = model.generate(
292
  input_ids=input_ids,
293
  attention_mask=attention_mask,
294
- max_new_tokens=512,
295
  temperature=0.2,
296
  top_p=0.9,
297
  do_sample=True,
@@ -304,7 +307,7 @@ def expand_prompt(prompt):
304
  outputs_2 = model.generate(
305
  input_ids=input_ids_2,
306
  attention_mask=attention_mask_2,
307
- max_new_tokens=512,
308
  temperature=0.2,
309
  top_p=0.9,
310
  do_sample=True,
@@ -346,8 +349,7 @@ def generate_30(
346
  samples=1,
347
  progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
348
  ):
349
- pipe.text_encoder=text_encoder
350
- pipe.text_encoder_2=text_encoder_2
351
  seed = random.randint(0, MAX_SEED)
352
  generator = torch.Generator(device='cuda').manual_seed(seed)
353
  if latent_file is not None: # Check if a latent file is provided
@@ -401,11 +403,22 @@ def generate_30(
401
  print(caption)
402
  print(caption_2)
403
  print("-- generating further caption --")
404
-
 
 
 
 
405
  expand_prompt(prompt)
406
  expand_prompt(caption)
407
  expanded = expand_prompt(caption_2)
408
-
 
 
 
 
 
 
 
409
  print('-- generating image --')
410
  sd_image = ip_model.generate(
411
  pil_image_1=sd_image_a,
 
20
  from image_gen_aux import UpscaleWithModel
21
  from ip_adapter import IPAdapterXL
22
  from huggingface_hub import snapshot_download
23
+ import gc
24
  import torch
25
+ from diffusers import UNet2DConditionModel, AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
26
  from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline, Phi3ForCausalLM
27
 
28
  torch.backends.cuda.matmul.allow_tf32 = False
 
105
  if not negative:
106
  negative = ""
107
  return p.replace("{prompt}", positive), n + negative
108
+
109
+ unetX = UNet2DConditionModel.from_pretrained("'ford442/RealVisXL_V5.0_BF16", low_cpu_mem_usage=False, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
110
 
111
  def load_and_prepare_model():
112
  #vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", safety_checker=None)
 
168
  captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
169
  #captioner_3 = pipeline(model="ford442/blip-image-to-text-large-bf16",device='cuda', task="image-to-text")
170
  model5 = Blip2ForConditionalGeneration.from_pretrained("ford442/blip2-image-to-text-bf16").to('cuda')
171
+ processor5 = Blip2Processor.from_pretrained("ford442/blip2-image-to-text-bf16", device_map='cuda')
172
  txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, device_map='cuda', add_prefix_space=False)
173
  txt_tokenizer.tokenizer_legacy=False
174
  model = Phi3ForCausalLM.from_pretrained(checkpoint).to('cuda:0')
175
  #model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map='cuda') #.to('cuda')
176
 
177
  ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
178
+ text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True) #.to(device=device, dtype=torch.bfloat16)
179
+ text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True) #.to(device=device, dtype=torch.bfloat16)
180
 
181
  MAX_SEED = np.iinfo(np.int32).max
182
 
 
294
  outputs = model.generate(
295
  input_ids=input_ids,
296
  attention_mask=attention_mask,
297
+ max_new_tokens=384,
298
  temperature=0.2,
299
  top_p=0.9,
300
  do_sample=True,
 
307
  outputs_2 = model.generate(
308
  input_ids=input_ids_2,
309
  attention_mask=attention_mask_2,
310
+ max_new_tokens=384,
311
  temperature=0.2,
312
  top_p=0.9,
313
  do_sample=True,
 
349
  samples=1,
350
  progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
351
  ):
352
+
 
353
  seed = random.randint(0, MAX_SEED)
354
  generator = torch.Generator(device='cuda').manual_seed(seed)
355
  if latent_file is not None: # Check if a latent file is provided
 
403
  print(caption)
404
  print(caption_2)
405
  print("-- generating further caption --")
406
+ del captioner_2
407
+ del model5
408
+ del processor5
409
+ gc.collect()
410
+ torch.cuda.clear_cache()
411
  expand_prompt(prompt)
412
  expand_prompt(caption)
413
  expanded = expand_prompt(caption_2)
414
+ del model
415
+ del txt_tokenizer
416
+ gc.collect()
417
+ torch.cuda.clear_cache()
418
+ pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16)
419
+ pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
420
+ pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
421
+
422
  print('-- generating image --')
423
  sd_image = ip_model.generate(
424
  pil_image_1=sd_image_a,