how to use

import torch from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL from diffusers import AutoencoderDC from torchvision.utils import save_image import numpy as np import h5py

dc_encoder=AutoencoderDC.from_pretrained("mit-han-lab/dc-ae-f64c128-in-1.0-diffusers", torch_dtype=torch.float32).to('cuda')

with h5py.File('latent_folder/image_latents.hdf5', 'r', libver='latest', swmr=True) as f: print(list(f.keys())) dataset = f['image_latents'][:]

latents=np.expand_dims(dataset[5]*35, axis=0) latents=torch.from_numpy(latents) latents=latents.float() latents=latents.to('cuda')

y = dc_encoder.decode(latents).sample

save_image(y * 0.5 + 0.5, "demo_dc_ae.png")

import clip model, _ = clip.load("ViT-L/14")

model.to('cuda')

def encode_text(label, model, device): text_tokens = clip.tokenize(label, truncate=True).to(device) text_encoding = model.encode_text(text_tokens) return text_encoding.cpu()

import h5py

Open the file

f = h5py.File('latent_folder/text_encodings.hdf5', 'r')

View the contents

print(list(f.keys()))

Access specific datasets

dataset = f['text_encodings'][:]

Close the file when done

f.close()

def cosine_similarity(v1, v2): # Reshape v1 to match v2's dimensions if needed v1 = v1.reshape(-1) # converts (1,768) to (768,)

# Calculate cosine similarity
dot_product = np.dot(v1, v2)
norm_v1 = np.linalg.norm(v1)
norm_v2 = np.linalg.norm(v2)

return dot_product / (norm_v1 * norm_v2)

textembed=encode_text("""The double-cut pork chop at The Ainsworth in""" , model,'cuda')

cosine_similarity(textembed.cpu().detach().numpy() , dataset[5])

dataset link https://huggingface.co/datasets/zzliang/GRIT