rishabh063
commited on
Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
how to use
|
3 |
+
|
4 |
+
import torch
|
5 |
+
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
|
6 |
+
from diffusers import AutoencoderDC
|
7 |
+
from torchvision.utils import save_image
|
8 |
+
import numpy as np
|
9 |
+
import h5py
|
10 |
+
|
11 |
+
|
12 |
+
|
13 |
+
dc_encoder=AutoencoderDC.from_pretrained("mit-han-lab/dc-ae-f64c128-in-1.0-diffusers", torch_dtype=torch.float32).to('cuda')
|
14 |
+
|
15 |
+
|
16 |
+
|
17 |
+
|
18 |
+
with h5py.File('latent_folder/image_latents.hdf5', 'r', libver='latest', swmr=True) as f:
|
19 |
+
print(list(f.keys()))
|
20 |
+
dataset = f['image_latents'][:]
|
21 |
+
|
22 |
+
|
23 |
+
|
24 |
+
latents=np.expand_dims(dataset[5]*35, axis=0)
|
25 |
+
latents=torch.from_numpy(latents)
|
26 |
+
latents=latents.float()
|
27 |
+
latents=latents.to('cuda')
|
28 |
+
|
29 |
+
|
30 |
+
y = dc_encoder.decode(latents).sample
|
31 |
+
|
32 |
+
save_image(y * 0.5 + 0.5, "demo_dc_ae.png")
|
33 |
+
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
import clip
|
38 |
+
model, _ = clip.load("ViT-L/14")
|
39 |
+
|
40 |
+
model.to('cuda')
|
41 |
+
|
42 |
+
|
43 |
+
def encode_text(label, model, device):
|
44 |
+
text_tokens = clip.tokenize(label, truncate=True).to(device)
|
45 |
+
text_encoding = model.encode_text(text_tokens)
|
46 |
+
return text_encoding.cpu()
|
47 |
+
|
48 |
+
|
49 |
+
import h5py
|
50 |
+
|
51 |
+
# Open the file
|
52 |
+
f = h5py.File('latent_folder/text_encodings.hdf5', 'r')
|
53 |
+
|
54 |
+
# View the contents
|
55 |
+
print(list(f.keys()))
|
56 |
+
|
57 |
+
# Access specific datasets
|
58 |
+
dataset = f['text_encodings'][:]
|
59 |
+
|
60 |
+
# Close the file when done
|
61 |
+
f.close()
|
62 |
+
|
63 |
+
|
64 |
+
|
65 |
+
def cosine_similarity(v1, v2):
|
66 |
+
# Reshape v1 to match v2's dimensions if needed
|
67 |
+
v1 = v1.reshape(-1) # converts (1,768) to (768,)
|
68 |
+
|
69 |
+
# Calculate cosine similarity
|
70 |
+
dot_product = np.dot(v1, v2)
|
71 |
+
norm_v1 = np.linalg.norm(v1)
|
72 |
+
norm_v2 = np.linalg.norm(v2)
|
73 |
+
|
74 |
+
return dot_product / (norm_v1 * norm_v2)
|
75 |
+
|
76 |
+
|
77 |
+
|
78 |
+
textembed=encode_text("""The double-cut pork chop at The Ainsworth in""" , model,'cuda')
|
79 |
+
|
80 |
+
|
81 |
+
cosine_similarity(textembed.cpu().detach().numpy() , dataset[5])
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
dataset link https://huggingface.co/datasets/zzliang/GRIT
|
86 |
+
|