StableDiffusion-3.5-Large-IP-B

Running on Zero

1inkusFace commited on 24 days ago

Commit

4843f86

verified ·

1 Parent(s): b810973

Update pipeline_stable_diffusion_3_ipa.py

Files changed (1) hide show

pipeline_stable_diffusion_3_ipa.py CHANGED Viewed

@@ -1170,8 +1170,20 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
             image_prompt_embeds_5 = self.encode_clip_image_emb(clip_image, device, dtype)
             image_prompt_embeds_5 = image_prompt_embeds_5 * scale_5
             image_prompt_embeds_list.append(image_prompt_embeds_5)
-        clip_image_embeds = torch.cat(image_prompt_embeds_list).mean(dim=0).unsqueeze(0)
         # 4. Prepare timesteps
         timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)

             image_prompt_embeds_5 = self.encode_clip_image_emb(clip_image, device, dtype)
             image_prompt_embeds_5 = image_prompt_embeds_5 * scale_5
             image_prompt_embeds_list.append(image_prompt_embeds_5)
+        # Concatenate the image embeddings
+        concatenated_embeds = torch.cat(image_prompt_embeds_list, dim=1)  # Concatenate along dimension 1
+        # Create a linear layer
+        embedding_dim = concatenated_embeds.shape[-1]  # Get the embedding dimension
+        linear_layer = nn.Linear(embedding_dim * len(image_prompt_embeds_list), embedding_dim)
+        # Pass the concatenated embeddings through the linear layer
+        combined_embeds = linear_layer(concatenated_embeds)
+        # Add a ReLU activation for non-linearity (optional)
+        combined_embeds = torch.relu(combined_embeds)
+        clip_image_embeds = clip_image_embeds #torch.cat(image_prompt_embeds_list).mean(dim=0).unsqueeze(0)
         # 4. Prepare timesteps
         timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)