how to use
import torch from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL from diffusers import AutoencoderDC from torchvision.utils import save_image import numpy as np import h5py
dc_encoder=AutoencoderDC.from_pretrained("mit-han-lab/dc-ae-f64c128-in-1.0-diffusers", torch_dtype=torch.float32).to('cuda')
with h5py.File('latent_folder/image_latents.hdf5', 'r', libver='latest', swmr=True) as f: print(list(f.keys())) dataset = f['image_latents'][:]
latents=np.expand_dims(dataset[5]*35, axis=0) latents=torch.from_numpy(latents) latents=latents.float() latents=latents.to('cuda')
y = dc_encoder.decode(latents).sample
save_image(y * 0.5 + 0.5, "demo_dc_ae.png")
import clip model, _ = clip.load("ViT-L/14")
model.to('cuda')
def encode_text(label, model, device): text_tokens = clip.tokenize(label, truncate=True).to(device) text_encoding = model.encode_text(text_tokens) return text_encoding.cpu()
import h5py
Open the file
f = h5py.File('latent_folder/text_encodings.hdf5', 'r')
View the contents
print(list(f.keys()))
Access specific datasets
dataset = f['text_encodings'][:]
Close the file when done
f.close()
def cosine_similarity(v1, v2): # Reshape v1 to match v2's dimensions if needed v1 = v1.reshape(-1) # converts (1,768) to (768,)
# Calculate cosine similarity
dot_product = np.dot(v1, v2)
norm_v1 = np.linalg.norm(v1)
norm_v2 = np.linalg.norm(v2)
return dot_product / (norm_v1 * norm_v2)
textembed=encode_text("""The double-cut pork chop at The Ainsworth in""" , model,'cuda')
cosine_similarity(textembed.cpu().detach().numpy() , dataset[5])
dataset link https://huggingface.co/datasets/zzliang/GRIT
- Downloads last month
- 6