Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -20,8 +20,9 @@ from gradio import themes
|
|
20 |
from image_gen_aux import UpscaleWithModel
|
21 |
from ip_adapter import IPAdapterXL
|
22 |
from huggingface_hub import snapshot_download
|
|
|
23 |
import torch
|
24 |
-
from diffusers import AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
25 |
from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline, Phi3ForCausalLM
|
26 |
|
27 |
torch.backends.cuda.matmul.allow_tf32 = False
|
@@ -104,6 +105,8 @@ def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str
|
|
104 |
if not negative:
|
105 |
negative = ""
|
106 |
return p.replace("{prompt}", positive), n + negative
|
|
|
|
|
107 |
|
108 |
def load_and_prepare_model():
|
109 |
#vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", safety_checker=None)
|
@@ -165,15 +168,15 @@ checkpoint = "ford442/Phi-3.5-mini-instruct-bf16"
|
|
165 |
captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
|
166 |
#captioner_3 = pipeline(model="ford442/blip-image-to-text-large-bf16",device='cuda', task="image-to-text")
|
167 |
model5 = Blip2ForConditionalGeneration.from_pretrained("ford442/blip2-image-to-text-bf16").to('cuda')
|
168 |
-
processor5 = Blip2Processor.from_pretrained("ford442/blip2-image-to-text-bf16")
|
169 |
txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, device_map='cuda', add_prefix_space=False)
|
170 |
txt_tokenizer.tokenizer_legacy=False
|
171 |
model = Phi3ForCausalLM.from_pretrained(checkpoint).to('cuda:0')
|
172 |
#model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map='cuda') #.to('cuda')
|
173 |
|
174 |
ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
175 |
-
text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True)
|
176 |
-
text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True)
|
177 |
|
178 |
MAX_SEED = np.iinfo(np.int32).max
|
179 |
|
@@ -291,7 +294,7 @@ def expand_prompt(prompt):
|
|
291 |
outputs = model.generate(
|
292 |
input_ids=input_ids,
|
293 |
attention_mask=attention_mask,
|
294 |
-
max_new_tokens=
|
295 |
temperature=0.2,
|
296 |
top_p=0.9,
|
297 |
do_sample=True,
|
@@ -304,7 +307,7 @@ def expand_prompt(prompt):
|
|
304 |
outputs_2 = model.generate(
|
305 |
input_ids=input_ids_2,
|
306 |
attention_mask=attention_mask_2,
|
307 |
-
max_new_tokens=
|
308 |
temperature=0.2,
|
309 |
top_p=0.9,
|
310 |
do_sample=True,
|
@@ -346,8 +349,7 @@ def generate_30(
|
|
346 |
samples=1,
|
347 |
progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
|
348 |
):
|
349 |
-
|
350 |
-
pipe.text_encoder_2=text_encoder_2
|
351 |
seed = random.randint(0, MAX_SEED)
|
352 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
353 |
if latent_file is not None: # Check if a latent file is provided
|
@@ -401,11 +403,22 @@ def generate_30(
|
|
401 |
print(caption)
|
402 |
print(caption_2)
|
403 |
print("-- generating further caption --")
|
404 |
-
|
|
|
|
|
|
|
|
|
405 |
expand_prompt(prompt)
|
406 |
expand_prompt(caption)
|
407 |
expanded = expand_prompt(caption_2)
|
408 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
409 |
print('-- generating image --')
|
410 |
sd_image = ip_model.generate(
|
411 |
pil_image_1=sd_image_a,
|
|
|
20 |
from image_gen_aux import UpscaleWithModel
|
21 |
from ip_adapter import IPAdapterXL
|
22 |
from huggingface_hub import snapshot_download
|
23 |
+
import gc
|
24 |
import torch
|
25 |
+
from diffusers import UNet2DConditionModel, AutoencoderKL, StableDiffusionXLPipeline, EulerAncestralDiscreteScheduler
|
26 |
from transformers import AutoTokenizer, AutoModelForCausalLM, CLIPTextModelWithProjection, CLIPTextModel, Blip2Processor, Blip2ForConditionalGeneration, pipeline, Phi3ForCausalLM
|
27 |
|
28 |
torch.backends.cuda.matmul.allow_tf32 = False
|
|
|
105 |
if not negative:
|
106 |
negative = ""
|
107 |
return p.replace("{prompt}", positive), n + negative
|
108 |
+
|
109 |
+
unetX = UNet2DConditionModel.from_pretrained("'ford442/RealVisXL_V5.0_BF16", low_cpu_mem_usage=False, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
|
110 |
|
111 |
def load_and_prepare_model():
|
112 |
#vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", safety_checker=None)
|
|
|
168 |
captioner_2 = pipeline(model="Salesforce/blip-image-captioning-base",device='cuda', task="image-to-text")
|
169 |
#captioner_3 = pipeline(model="ford442/blip-image-to-text-large-bf16",device='cuda', task="image-to-text")
|
170 |
model5 = Blip2ForConditionalGeneration.from_pretrained("ford442/blip2-image-to-text-bf16").to('cuda')
|
171 |
+
processor5 = Blip2Processor.from_pretrained("ford442/blip2-image-to-text-bf16", device_map='cuda')
|
172 |
txt_tokenizer = AutoTokenizer.from_pretrained(checkpoint, device_map='cuda', add_prefix_space=False)
|
173 |
txt_tokenizer.tokenizer_legacy=False
|
174 |
model = Phi3ForCausalLM.from_pretrained(checkpoint).to('cuda:0')
|
175 |
#model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map='cuda') #.to('cuda')
|
176 |
|
177 |
ip_model = IPAdapterXL(pipe, local_folder, ip_ckpt, device)
|
178 |
+
text_encoder=CLIPTextModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder',token=True) #.to(device=device, dtype=torch.bfloat16)
|
179 |
+
text_encoder_2=CLIPTextModelWithProjection.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='text_encoder_2',token=True) #.to(device=device, dtype=torch.bfloat16)
|
180 |
|
181 |
MAX_SEED = np.iinfo(np.int32).max
|
182 |
|
|
|
294 |
outputs = model.generate(
|
295 |
input_ids=input_ids,
|
296 |
attention_mask=attention_mask,
|
297 |
+
max_new_tokens=384,
|
298 |
temperature=0.2,
|
299 |
top_p=0.9,
|
300 |
do_sample=True,
|
|
|
307 |
outputs_2 = model.generate(
|
308 |
input_ids=input_ids_2,
|
309 |
attention_mask=attention_mask_2,
|
310 |
+
max_new_tokens=384,
|
311 |
temperature=0.2,
|
312 |
top_p=0.9,
|
313 |
do_sample=True,
|
|
|
349 |
samples=1,
|
350 |
progress=gr.Progress(track_tqdm=True) # Add progress as a keyword argument
|
351 |
):
|
352 |
+
|
|
|
353 |
seed = random.randint(0, MAX_SEED)
|
354 |
generator = torch.Generator(device='cuda').manual_seed(seed)
|
355 |
if latent_file is not None: # Check if a latent file is provided
|
|
|
403 |
print(caption)
|
404 |
print(caption_2)
|
405 |
print("-- generating further caption --")
|
406 |
+
del captioner_2
|
407 |
+
del model5
|
408 |
+
del processor5
|
409 |
+
gc.collect()
|
410 |
+
torch.cuda.clear_cache()
|
411 |
expand_prompt(prompt)
|
412 |
expand_prompt(caption)
|
413 |
expanded = expand_prompt(caption_2)
|
414 |
+
del model
|
415 |
+
del txt_tokenizer
|
416 |
+
gc.collect()
|
417 |
+
torch.cuda.clear_cache()
|
418 |
+
pipe.text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16)
|
419 |
+
pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
|
420 |
+
pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
|
421 |
+
|
422 |
print('-- generating image --')
|
423 |
sd_image = ip_model.generate(
|
424 |
pil_image_1=sd_image_a,
|