1inkusFace commited on
Commit
58bcabd
·
verified ·
1 Parent(s): 3de1417

Update pipeline_stable_diffusion_3_ipa.py

Browse files
Files changed (1) hide show
  1. pipeline_stable_diffusion_3_ipa.py +22 -22
pipeline_stable_diffusion_3_ipa.py CHANGED
@@ -1148,48 +1148,48 @@ class StableDiffusion3Pipeline(DiffusionPipeline, SD3LoraLoaderMixin, FromSingle
1148
  print('Using primary image.')
1149
  clip_image = clip_image.resize((max(clip_image.size), max(clip_image.size)))
1150
  #clip_image_embeds_1 = self.encode_clip_image_emb(clip_image, device, dtype)
1151
- with torch.no_grad():
1152
- clip_image_embeds_1 = self.clip_image_processor(images=clip_image, return_tensors="pt").pixel_values
1153
- print('clip output shape: ', clip_image_embeds_1.shape)
1154
- clip_image_embeds_1 = clip_image_embeds_1.to(device, dtype=dtype)
1155
- clip_image_embeds_1 = self.image_encoder(clip_image_embeds_1, output_hidden_states=True).hidden_states[-2]
1156
- print('encoder output shape: ', clip_image_embeds_1.shape)
1157
  clip_image_embeds_1 = clip_image_embeds_1 * scale_1
1158
  image_prompt_embeds_list.append(clip_image_embeds_1)
1159
  if clip_image_2 != None:
1160
  print('Using secondary image.')
1161
  clip_image_2 = clip_image_2.resize((max(clip_image_2.size), max(clip_image_2.size)))
1162
- with torch.no_grad():
1163
- clip_image_embeds_2 = self.clip_image_processor(images=clip_image_2, return_tensors="pt").pixel_values
1164
- clip_image_embeds_2 = clip_image_embeds_2.to(device, dtype=dtype)
1165
- clip_image_embeds_2 = self.image_encoder(clip_image_embeds_2, output_hidden_states=True).hidden_states[-2]
1166
  clip_image_embeds_2 = clip_image_embeds_2 * scale_2
1167
  image_prompt_embeds_list.append(clip_image_embeds_2)
1168
  if clip_image_3 != None:
1169
  print('Using tertiary image.')
1170
  clip_image_3 = clip_image_3.resize((max(clip_image_3.size), max(clip_image_3.size)))
1171
- with torch.no_grad():
1172
- clip_image_embeds_3 = self.clip_image_processor(images=clip_image_3, return_tensors="pt").pixel_values
1173
- clip_image_embeds_3 = clip_image_embeds_3.to(device, dtype=dtype)
1174
- clip_image_embeds_3 = self.image_encoder(clip_image_embeds_3, output_hidden_states=True).hidden_states[-2]
1175
  clip_image_embeds_3 = clip_image_embeds_3 * scale_3
1176
  image_prompt_embeds_list.append(clip_image_embeds_3)
1177
  if clip_image_4 != None:
1178
  print('Using quaternary image.')
1179
  clip_image_4 = clip_image_4.resize((max(clip_image_4.size), max(clip_image_4.size)))
1180
- with torch.no_grad():
1181
- clip_image_embeds_4 = self.clip_image_processor(images=clip_image_4, return_tensors="pt").pixel_values
1182
- clip_image_embeds_4 = clip_image_embeds_4.to(device, dtype=dtype)
1183
- clip_image_embeds_4 = self.image_encoder(clip_image_embeds_4, output_hidden_states=True).hidden_states[-2]
1184
  clip_image_embeds_4 = clip_image_embeds_4 * scale_4
1185
  image_prompt_embeds_list.append(clip_image_embeds_4)
1186
  if clip_image_5 != None:
1187
  print('Using quinary image.')
1188
  clip_image_5 = clip_image_5.resize((max(clip_image_5.size), max(clip_image_5.size)))
1189
- with torch.no_grad():
1190
- clip_image_embeds_5 = self.clip_image_processor(images=clip_image_5, return_tensors="pt").pixel_values
1191
- clip_image_embeds_5 = clip_image_embeds_5.to(device, dtype=dtype)
1192
- clip_image_embeds_5 = self.image_encoder(clip_image_embeds_5, output_hidden_states=True).hidden_states[-2]
1193
  clip_image_embeds_5 = clip_image_embeds_5 * scale_5
1194
  image_prompt_embeds_list.append(clip_image_embeds_5)
1195
 
 
1148
  print('Using primary image.')
1149
  clip_image = clip_image.resize((max(clip_image.size), max(clip_image.size)))
1150
  #clip_image_embeds_1 = self.encode_clip_image_emb(clip_image, device, dtype)
1151
+ #with torch.no_grad():
1152
+ clip_image_embeds_1 = self.clip_image_processor(images=clip_image, return_tensors="pt").pixel_values
1153
+ print('clip output shape: ', clip_image_embeds_1.shape)
1154
+ clip_image_embeds_1 = clip_image_embeds_1.to(device, dtype=dtype)
1155
+ clip_image_embeds_1 = self.image_encoder(clip_image_embeds_1, output_hidden_states=True).hidden_states[-2]
1156
+ print('encoder output shape: ', clip_image_embeds_1.shape)
1157
  clip_image_embeds_1 = clip_image_embeds_1 * scale_1
1158
  image_prompt_embeds_list.append(clip_image_embeds_1)
1159
  if clip_image_2 != None:
1160
  print('Using secondary image.')
1161
  clip_image_2 = clip_image_2.resize((max(clip_image_2.size), max(clip_image_2.size)))
1162
+ #with torch.no_grad():
1163
+ clip_image_embeds_2 = self.clip_image_processor(images=clip_image_2, return_tensors="pt").pixel_values
1164
+ clip_image_embeds_2 = clip_image_embeds_2.to(device, dtype=dtype)
1165
+ clip_image_embeds_2 = self.image_encoder(clip_image_embeds_2, output_hidden_states=True).hidden_states[-2]
1166
  clip_image_embeds_2 = clip_image_embeds_2 * scale_2
1167
  image_prompt_embeds_list.append(clip_image_embeds_2)
1168
  if clip_image_3 != None:
1169
  print('Using tertiary image.')
1170
  clip_image_3 = clip_image_3.resize((max(clip_image_3.size), max(clip_image_3.size)))
1171
+ #with torch.no_grad():
1172
+ clip_image_embeds_3 = self.clip_image_processor(images=clip_image_3, return_tensors="pt").pixel_values
1173
+ clip_image_embeds_3 = clip_image_embeds_3.to(device, dtype=dtype)
1174
+ clip_image_embeds_3 = self.image_encoder(clip_image_embeds_3, output_hidden_states=True).hidden_states[-2]
1175
  clip_image_embeds_3 = clip_image_embeds_3 * scale_3
1176
  image_prompt_embeds_list.append(clip_image_embeds_3)
1177
  if clip_image_4 != None:
1178
  print('Using quaternary image.')
1179
  clip_image_4 = clip_image_4.resize((max(clip_image_4.size), max(clip_image_4.size)))
1180
+ #with torch.no_grad():
1181
+ clip_image_embeds_4 = self.clip_image_processor(images=clip_image_4, return_tensors="pt").pixel_values
1182
+ clip_image_embeds_4 = clip_image_embeds_4.to(device, dtype=dtype)
1183
+ clip_image_embeds_4 = self.image_encoder(clip_image_embeds_4, output_hidden_states=True).hidden_states[-2]
1184
  clip_image_embeds_4 = clip_image_embeds_4 * scale_4
1185
  image_prompt_embeds_list.append(clip_image_embeds_4)
1186
  if clip_image_5 != None:
1187
  print('Using quinary image.')
1188
  clip_image_5 = clip_image_5.resize((max(clip_image_5.size), max(clip_image_5.size)))
1189
+ #with torch.no_grad():
1190
+ clip_image_embeds_5 = self.clip_image_processor(images=clip_image_5, return_tensors="pt").pixel_values
1191
+ clip_image_embeds_5 = clip_image_embeds_5.to(device, dtype=dtype)
1192
+ clip_image_embeds_5 = self.image_encoder(clip_image_embeds_5, output_hidden_states=True).hidden_states[-2]
1193
  clip_image_embeds_5 = clip_image_embeds_5 * scale_5
1194
  image_prompt_embeds_list.append(clip_image_embeds_5)
1195