RealVis_v5.0_BF16_IP_B

Running on Zero

App Files Files Community

1inkusFace commited on 21 days ago

Commit

13b3516

verified ·

1 Parent(s): 04af224

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -47

app.py CHANGED Viewed

@@ -106,8 +106,9 @@ def apply_style(style_name: str, positive: str, negative: str = "") -> Tuple[str
         negative = ""
     return p.replace("{prompt}", positive), n + negative
 def load_and_prepare_model():
-    unetX = UNet2DConditionModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='unet', low_cpu_mem_usage=False, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
     vaeX = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False, low_cpu_mem_usage=False, torch_dtype=torch.float32, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
     pipe = StableDiffusionXLPipeline.from_pretrained(
         'ford442/RealVisXL_V5.0_BF16',
@@ -248,14 +249,14 @@ def captioning(img):
     output_prompt=[]
     # Initial caption generation without a prompt:
     inputsa = processor5(images=img, return_tensors="pt").to('cuda')
-    generated_ids = model5.generate(**inputsa, min_length=42, max_length=128)
     generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
     output_prompt.append(generated_text)
     print(generated_text)
     # Loop through prompts array:
     for prompt in prompts_array:
         inputs = processor5(images=img, text=prompt, return_tensors="pt").to('cuda')
-        generated_ids = model5.generate(**inputs, min_length=32, max_length=64) # Adjust max_length if needed
         generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
         response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
         output_prompt.append(response_text)
@@ -296,7 +297,7 @@ def expand_prompt(prompt):
         outputs = model.generate(
             input_ids=input_ids,
             attention_mask=attention_mask,
-            max_new_tokens=256,
             temperature=0.2,
             top_p=0.9,
             do_sample=True,
@@ -304,12 +305,12 @@ def expand_prompt(prompt):
         enhanced_prompt = txt_tokenizer.decode(outputs[0], skip_special_tokens=True)
         print('-- generated prompt 1 --')
         print(enhanced_prompt)
         enhanced_prompt = filter_text(enhanced_prompt,prompt)
         enhanced_prompt = filter_text(enhanced_prompt,user_prompt_rewrite)
         enhanced_prompt = filter_text(enhanced_prompt,system_prompt_rewrite)
         print('-- filtered prompt --')
         print(enhanced_prompt)
         input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {enhanced_prompt}"
         encoded_inputs_2 = txt_tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True).to("cuda:0")
         input_ids_2 = encoded_inputs_2["input_ids"].to("cuda:0")
@@ -332,7 +333,8 @@ def expand_prompt(prompt):
         print('-- filtered prompt 2 --')
         print(enhanced_prompt_2)
         enh_prompt=[enhanced_prompt,enhanced_prompt_2]
-        return enh_prompt
 @spaces.GPU(duration=40)
 def generate_30(
@@ -416,16 +418,10 @@ def generate_30(
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         filename= f'rv_IP_{timestamp}.png'
         print("-- using image file --")
-        caption = flatten_and_stringify(caption)
-        caption = " ".join(caption)
-        caption_2 = flatten_and_stringify(caption_2)
-        caption_2 = " ".join(caption_2)
-        print(caption)
-        print(caption_2)
         print("-- generating further caption --")
         global model5
         global processor5
@@ -435,9 +431,7 @@ def generate_30(
         gc.collect()
         torch.cuda.empty_cache()
         expanded = expand_prompt(caption)
-        expanded_1 = expanded[0]
-        expanded_2 = expanded[1]
-        new_prompt = prompt+' '+expanded_1+' '+expanded_2
         print("-- ------------ --")
         print("-- FINAL PROMPT --")
         print(new_prompt)
@@ -451,6 +445,7 @@ def generate_30(
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
         pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
@@ -573,17 +568,10 @@ def generate_60(
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         filename= f'rv_IP_{timestamp}.png'
         print("-- using image file --")
-        caption = flatten_and_stringify(caption)
-        caption = " ".join(caption)
-        caption_2 = flatten_and_stringify(caption_2)
-        caption_2 = " ".join(caption_2)
-        print(caption)
-        print(caption_2)
         print("-- generating further caption --")
         global model5
         global processor5
@@ -593,9 +581,7 @@ def generate_60(
         gc.collect()
         torch.cuda.empty_cache()
         expanded = expand_prompt(caption)
-        expanded_1 = expanded[0]
-        expanded_2 = expanded[1]
-        new_prompt = prompt+' '+expanded_1+' '+expanded_2
         print("-- ------------ --")
         print("-- FINAL PROMPT --")
         print(new_prompt)
@@ -609,6 +595,7 @@ def generate_60(
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
         pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
@@ -731,17 +718,10 @@ def generate_90(
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         filename= f'rv_IP_{timestamp}.png'
         print("-- using image file --")
-        caption = flatten_and_stringify(caption)
-        caption = " ".join(caption)
-        caption_2 = flatten_and_stringify(caption_2)
-        caption_2 = " ".join(caption_2)
-        print(caption)
-        print(caption_2)
         print("-- generating further caption --")
         global model5
         global processor5
@@ -751,9 +731,7 @@ def generate_90(
         gc.collect()
         torch.cuda.empty_cache()
         expanded = expand_prompt(caption)
-        expanded_1 = expanded[0]
-        expanded_2 = expanded[1]
-        new_prompt = prompt+' '+expanded_1+' '+expanded_2
         print("-- ------------ --")
         print("-- FINAL PROMPT --")
         print(new_prompt)
@@ -767,6 +745,7 @@ def generate_90(
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
         pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)

         negative = ""
     return p.replace("{prompt}", positive), n + negative
+unetX = UNet2DConditionModel.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='unet', low_cpu_mem_usage=False, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
 def load_and_prepare_model():
     vaeX = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False, low_cpu_mem_usage=False, torch_dtype=torch.float32, token=True) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
     pipe = StableDiffusionXLPipeline.from_pretrained(
         'ford442/RealVisXL_V5.0_BF16',
     output_prompt=[]
     # Initial caption generation without a prompt:
     inputsa = processor5(images=img, return_tensors="pt").to('cuda')
+    generated_ids = model5.generate(**inputsa, min_length=42, max_length=64)
     generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
     output_prompt.append(generated_text)
     print(generated_text)
     # Loop through prompts array:
     for prompt in prompts_array:
         inputs = processor5(images=img, text=prompt, return_tensors="pt").to('cuda')
+        generated_ids = model5.generate(**inputs, min_length=32, max_length=42) # Adjust max_length if needed
         generated_text = processor5.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
         response_text = generated_text.replace(prompt, "").strip() #Or could try .split(prompt, 1)[-1].strip()
         output_prompt.append(response_text)
         outputs = model.generate(
             input_ids=input_ids,
             attention_mask=attention_mask,
+            max_new_tokens=384,
             temperature=0.2,
             top_p=0.9,
             do_sample=True,
         enhanced_prompt = txt_tokenizer.decode(outputs[0], skip_special_tokens=True)
         print('-- generated prompt 1 --')
         print(enhanced_prompt)
         enhanced_prompt = filter_text(enhanced_prompt,prompt)
         enhanced_prompt = filter_text(enhanced_prompt,user_prompt_rewrite)
         enhanced_prompt = filter_text(enhanced_prompt,system_prompt_rewrite)
         print('-- filtered prompt --')
         print(enhanced_prompt)
+        '''
         input_text_2 = f"{system_prompt_rewrite} {user_prompt_rewrite_2} {enhanced_prompt}"
         encoded_inputs_2 = txt_tokenizer(input_text_2, return_tensors="pt", return_attention_mask=True).to("cuda:0")
         input_ids_2 = encoded_inputs_2["input_ids"].to("cuda:0")
         print('-- filtered prompt 2 --')
         print(enhanced_prompt_2)
         enh_prompt=[enhanced_prompt,enhanced_prompt_2]
+        '''
+        return enhanced_prompt
 @spaces.GPU(duration=40)
 def generate_30(
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         filename= f'rv_IP_{timestamp}.png'
         print("-- using image file --")
+        captions = caption+caption_2
+        captions = flatten_and_stringify(captions)
+        captions = " ".join(captions)
+        print(captions)
         print("-- generating further caption --")
         global model5
         global processor5
         gc.collect()
         torch.cuda.empty_cache()
         expanded = expand_prompt(caption)
+        new_prompt = prompt+' '+expanded
         print("-- ------------ --")
         print("-- FINAL PROMPT --")
         print(new_prompt)
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
+        global unetX
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
         pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         filename= f'rv_IP_{timestamp}.png'
         print("-- using image file --")
+        captions = caption+caption_2
+        captions = flatten_and_stringify(captions)
+        captions = " ".join(captions)
+        print(captions)
         print("-- generating further caption --")
         global model5
         global processor5
         gc.collect()
         torch.cuda.empty_cache()
         expanded = expand_prompt(caption)
+        new_prompt = prompt+' '+expanded
         print("-- ------------ --")
         print("-- FINAL PROMPT --")
         print(new_prompt)
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
+        global unetX
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
         pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)
         timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
         filename= f'rv_IP_{timestamp}.png'
         print("-- using image file --")
+        captions = caption+caption_2
+        captions = flatten_and_stringify(captions)
+        captions = " ".join(captions)
+        print(captions)
         print("-- generating further caption --")
         global model5
         global processor5
         gc.collect()
         torch.cuda.empty_cache()
         expanded = expand_prompt(caption)
+        new_prompt = prompt+' '+expanded
         print("-- ------------ --")
         print("-- FINAL PROMPT --")
         print(new_prompt)
         torch.cuda.empty_cache()
         global text_encoder_1
         global text_encoder_2
+        global unetX
         pipe.text_encoder=text_encoder_1.to(device=device, dtype=torch.bfloat16)
         pipe.text_encoder_2=text_encoder_2.to(device=device, dtype=torch.bfloat16)
         pipe.unet=unetX.to(device=device, dtype=torch.bfloat16)