RealVis_v5.0_BF16_IP_B

Running on Zero

App Files Files Community

1inkusFace commited on 12 days ago

Commit

10ce5fa

verified ·

1 Parent(s): 837ba80

Update ip_adapter/ip_adapter.py

Browse files

Files changed (1) hide show

ip_adapter/ip_adapter.py +59 -2

ip_adapter/ip_adapter.py CHANGED Viewed

@@ -125,6 +125,10 @@ class IPAdapter:
         self,
         pil_image,
         prompt=None,
         negative_prompt=None,
         scale=1.0,
         num_samples=4,
@@ -163,11 +167,29 @@ class IPAdapter:
         uncond_image_prompt_embeds = uncond_image_prompt_embeds.repeat(1, num_samples, 1)
         uncond_image_prompt_embeds = uncond_image_prompt_embeds.view(bs_embed * num_samples, seq_len, -1)
         with torch.inference_mode():
             prompt_embeds = self.pipe._encode_prompt(
                 prompt, device=self.device, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
             negative_prompt_embeds_, prompt_embeds_ = prompt_embeds.chunk(2)
             prompt_embeds = torch.cat([prompt_embeds_, image_prompt_embeds], dim=1)
             negative_prompt_embeds = torch.cat([negative_prompt_embeds_, uncond_image_prompt_embeds], dim=1)
@@ -204,6 +226,10 @@ class IPAdapterXL(IPAdapter):
         pil_image_4=None,
         pil_image_5=None,
         prompt=None,
         negative_prompt=None,
         text_scale=1.0,
         ip_scale=1.0,
@@ -280,11 +306,42 @@ class IPAdapterXL(IPAdapter):
         uncond_image_prompt_embeds = torch.cat(uncond_image_prompt_embeds_list).mean(dim=0).unsqueeze(0)
         uncond_image_prompt_embeds = uncond_image_prompt_embeds.repeat(1, num_samples, 1)
         uncond_image_prompt_embeds = uncond_image_prompt_embeds.view(bs_embed * num_samples, seq_len, -1)
         with torch.inference_mode():
             prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds = self.pipe.encode_prompt(
                 prompt, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
             prompt_embeds = prompt_embeds * text_scale
             prompt_embeds = torch.cat([prompt_embeds, image_prompt_embeds], dim=1)
             negative_prompt_embeds = torch.cat([negative_prompt_embeds, uncond_image_prompt_embeds], dim=1)

         self,
         pil_image,
         prompt=None,
+        prompt2=None,
+        prompt3=None,
+        prompt4=None,
+        prompt5=None,
         negative_prompt=None,
         scale=1.0,
         num_samples=4,
         uncond_image_prompt_embeds = uncond_image_prompt_embeds.repeat(1, num_samples, 1)
         uncond_image_prompt_embeds = uncond_image_prompt_embeds.view(bs_embed * num_samples, seq_len, -1)
+        prompt_embeds_list=[]
         with torch.inference_mode():
             prompt_embeds = self.pipe._encode_prompt(
                 prompt, device=self.device, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
             negative_prompt_embeds_, prompt_embeds_ = prompt_embeds.chunk(2)
+            prompt_embeds_list.append(prompt_embeds)
+            if prompt2 is not None:
+                prompt_embeds = self.pipe._encode_prompt(
+                    prompt2, device=self.device, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
+            prompt_embeds_list.append(prompt_embeds)
+            if prompt3 is not None:
+                prompt_embeds = self.pipe._encode_prompt(
+                    prompt3, device=self.device, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
+            prompt_embeds_list.append(prompt_embeds)
+            if prompt4 is not None:
+                prompt_embeds = self.pipe._encode_prompt(
+                    prompt4, device=self.device, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
+            prompt_embeds_list.append(prompt_embeds)
+            if prompt5 is not None:
+                prompt_embeds = self.pipe._encode_prompt(
+                    prompt5, device=self.device, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
+            prompt_embeds_list.append(prompt_embeds)
             prompt_embeds = torch.cat([prompt_embeds_, image_prompt_embeds], dim=1)
             negative_prompt_embeds = torch.cat([negative_prompt_embeds_, uncond_image_prompt_embeds], dim=1)
         pil_image_4=None,
         pil_image_5=None,
         prompt=None,
+        prompt2=None,
+        prompt3=None,
+        prompt4=None,
+        prompt5=None,
         negative_prompt=None,
         text_scale=1.0,
         ip_scale=1.0,
         uncond_image_prompt_embeds = torch.cat(uncond_image_prompt_embeds_list).mean(dim=0).unsqueeze(0)
         uncond_image_prompt_embeds = uncond_image_prompt_embeds.repeat(1, num_samples, 1)
         uncond_image_prompt_embeds = uncond_image_prompt_embeds.view(bs_embed * num_samples, seq_len, -1)
+        prompt_embeds_list=[]
+        pooled_prompt_embeds_list=[]
         with torch.inference_mode():
             prompt_embeds, negative_prompt_embeds, pooled_prompt_embeds, negative_pooled_prompt_embeds = self.pipe.encode_prompt(
                 prompt, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
+            prompt_embeds_list.append(prompt_embeds)
+            pooled_prompt_embeds_list.append(pooled_prompt_embeds)
+            if prompt2 is not None:
+                prompt_embeds, negative_prompt_embeds_, pooled_prompt_embeds, negative_pooled_prompt_embeds_ = self.pipe.encode_prompt(
+                    prompt2, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
+                prompt_embeds_list.append(prompt_embeds)
+                pooled_prompt_embeds_list.append(pooled_prompt_embeds)
+            if prompt3 is not None:
+                prompt_embeds, negative_prompt_embeds_, pooled_prompt_embeds, negative_pooled_prompt_embeds_ = self.pipe.encode_prompt(
+                    prompt3, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
+                prompt_embeds_list.append(prompt_embeds)
+                pooled_prompt_embeds_list.append(pooled_prompt_embeds)
+            if prompt4 is not None:
+                prompt_embeds, negative_prompt_embeds_, pooled_prompt_embeds, negative_pooled_prompt_embeds_ = self.pipe.encode_prompt(
+                    prompt4, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
+                prompt_embeds_list.append(prompt_embeds)
+                pooled_prompt_embeds_list.append(pooled_prompt_embeds)
+            if prompt5 is not None:
+                prompt_embeds, negative_prompt_embeds_, pooled_prompt_embeds, negative_pooled_prompt_embeds_ = self.pipe.encode_prompt(
+                    prompt5, num_images_per_prompt=num_samples, do_classifier_free_guidance=True, negative_prompt=negative_prompt)
+                prompt_embeds_list.append(prompt_embeds)
+                pooled_prompt_embeds_list.append(pooled_prompt_embeds)
+            prompt_embeds = torch.cat(prompt_embeds_list)
+            prompt_embeds = torch.mean(prompt_embeds,dim=0,keepdim=True)
+            pooled_prompt_embeds = torch.cat(pooled_prompt_embeds_list)
+            pooled_prompt_embeds = torch.mean(pooled_prompt_embeds,dim=0,keepdim=True)
             prompt_embeds = prompt_embeds * text_scale
+            pooled_prompt_embeds = pooled_prompt_embeds * text_scale
             prompt_embeds = torch.cat([prompt_embeds, image_prompt_embeds], dim=1)
             negative_prompt_embeds = torch.cat([negative_prompt_embeds, uncond_image_prompt_embeds], dim=1)