RealVis_v5.0_BF16_IP_B

Sleeping

App Files Files Community

1inkusFace commited on 24 days ago

Commit

db32f90

verified ·

1 Parent(s): 0c3fea5

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -10

app.py CHANGED Viewed

@@ -20,8 +20,9 @@ from gradio import themes
 from image_gen_aux import UpscaleWithModel
 from ip_adapter import IPAdapterXL
 from huggingface_hub import snapshot_download
 import torch
-from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
 from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline, Phi3ForCausalLM
 torch.backends.cuda.matmul.allow_tf32 = False
@@ -104,6 +105,8 @@ def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str
     if not negative:
         negative = ""
     return p.replace("{prompt}", positive), n + negative
 def load_and_prepare_model():
     #vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", safety_checker=None)
@@ -165,15 +168,15 @@ checkpoint = "ford442/Phi-3.5-mini-instruct-bf16"
 captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
 #captioner_3 = pipeline(model="ford442/blip-image-to-text-large-bf16",device='cuda', task="image-to-text")
 model5 = Blip2ForConditionalGeneration.from_pretrained("ford442/blip2-image-to-text-bf16").to('cuda')
-processor5 = Blip2Processor.from_pretrained("ford442/blip2-image-to-text-bf16")
 txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, device_map='cuda', add_prefix_space=False)
 txt_tokenizer.tokenizer_legacy=False
 model = Phi3ForCausalLM.from_pretrained(checkpoint).to('cuda:0')
 #model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map='cuda') #.to('cuda')
 ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
-text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True).to(device=device, dtype=torch.bfloat16)
-text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True).to(device=device, dtype=torch.bfloat16)
 MAX_SEED = np.iinfo(np.int32).max
@@ -291,7 +294,7 @@ def expand_prompt(prompt):
         outputs = model.generate(
             input_ids=input_ids,
             attention_mask=attention_mask,
-            max_new_tokens=512,
             temperature=0.2,
             top_p=0.9,
             do_sample=True,
@@ -304,7 +307,7 @@ def expand_prompt(prompt):
         outputs_2 = model.generate(
             input_ids=input_ids_2,
             attention_mask=attention_mask_2,
-            max_new_tokens=512,
             temperature=0.2,
             top_p=0.9,
             do_sample=True,
@@ -346,8 +349,7 @@ def generate_30(
     samples=1,
     progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
 ):
-    pipe.text_encoder=text_encoder
-    pipe.text_encoder_2=text_encoder_2
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     if latent_file is not None:  # Check if a latent file is provided
@@ -401,11 +403,22 @@ def generate_30(
         print(caption)
         print(caption_2)
         print("-- generating further caption --")
         expand_prompt(prompt)
         expand_prompt(caption)
         expanded = expand_prompt(caption_2)
         print('-- generating image --')
         sd_image = ip_model.generate(
                 pil_image_1=sd_image_a,

 from image_gen_aux import UpscaleWithModel
 from ip_adapter import IPAdapterXL
 from huggingface_hub import snapshot_download
+import gc
 import torch
+from diffusers import UNet2DConditionModel, AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
 from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline, Phi3ForCausalLM
 torch.backends.cuda.matmul.allow_tf32 = False
     if not negative:
         negative = ""
     return p.replace("{prompt}", positive), n + negative
+unetX = UNet2DConditionModel.from_pretrained("'ford442/RealVisXL_V5.0_BF16", low_cpu_mem_usage=False, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
 def load_and_prepare_model():
     #vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", safety_checker=None)
 captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
 #captioner_3 = pipeline(model="ford442/blip-image-to-text-large-bf16",device='cuda', task="image-to-text")
 model5 = Blip2ForConditionalGeneration.from_pretrained("ford442/blip2-image-to-text-bf16").to('cuda')
+processor5 = Blip2Processor.from_pretrained("ford442/blip2-image-to-text-bf16", device_map='cuda')
 txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, device_map='cuda', add_prefix_space=False)
 txt_tokenizer.tokenizer_legacy=False
 model = Phi3ForCausalLM.from_pretrained(checkpoint).to('cuda:0')
 #model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map='cuda') #.to('cuda')
 ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
+text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True) #.to(device=device, dtype=torch.bfloat16)
+text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True) #.to(device=device, dtype=torch.bfloat16)
 MAX_SEED = np.iinfo(np.int32).max
         outputs = model.generate(
             input_ids=input_ids,
             attention_mask=attention_mask,
+            max_new_tokens=384,
             temperature=0.2,
             top_p=0.9,
             do_sample=True,
         outputs_2 = model.generate(
             input_ids=input_ids_2,
             attention_mask=attention_mask_2,
+            max_new_tokens=384,
             temperature=0.2,
             top_p=0.9,
             do_sample=True,
     samples=1,
     progress=gr.Progress(track_tqdm=True)  # Add progress as a keyword argument
 ):
     seed = random.randint(0, MAX_SEED)
     generator = torch.Generator(device='cuda').manual_seed(seed)
     if latent_file is not None:  # Check if a latent file is provided
         print(caption)
         print(caption_2)
         print("-- generating further caption --")
+        del captioner_2
+        del model5
+        del processor5
+        gc.collect()
+        torch.cuda.clear_cache()
         expand_prompt(prompt)
         expand_prompt(caption)
         expanded = expand_prompt(caption_2)
+        del model
+        del txt_tokenizer
+        gc.collect()
+        torch.cuda.clear_cache()
+        pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16)
+        pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
+        pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
         print('-- generating image --')
         sd_image = ip_model.generate(
                 pil_image_1=sd_image_a,