rishabh063 commited on
Commit
407f62e
·
verified ·
1 Parent(s): 3c8a0de

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +86 -0
README.md ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ how to use
3
+
4
+ import torch
5
+ from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
6
+ from diffusers import AutoencoderDC
7
+ from torchvision.utils import save_image
8
+ import numpy as np
9
+ import h5py
10
+
11
+
12
+
13
+ dc_encoder=AutoencoderDC.from_pretrained("mit-han-lab/dc-ae-f64c128-in-1.0-diffusers", torch_dtype=torch.float32).to('cuda')
14
+
15
+
16
+
17
+
18
+ with h5py.File('latent_folder/image_latents.hdf5', 'r', libver='latest', swmr=True) as f:
19
+ print(list(f.keys()))
20
+ dataset = f['image_latents'][:]
21
+
22
+
23
+
24
+ latents=np.expand_dims(dataset[5]*35, axis=0)
25
+ latents=torch.from_numpy(latents)
26
+ latents=latents.float()
27
+ latents=latents.to('cuda')
28
+
29
+
30
+ y = dc_encoder.decode(latents).sample
31
+
32
+ save_image(y * 0.5 + 0.5, "demo_dc_ae.png")
33
+
34
+
35
+
36
+
37
+ import clip
38
+ model, _ = clip.load("ViT-L/14")
39
+
40
+ model.to('cuda')
41
+
42
+
43
+ def encode_text(label, model, device):
44
+ text_tokens = clip.tokenize(label, truncate=True).to(device)
45
+ text_encoding = model.encode_text(text_tokens)
46
+ return text_encoding.cpu()
47
+
48
+
49
+ import h5py
50
+
51
+ # Open the file
52
+ f = h5py.File('latent_folder/text_encodings.hdf5', 'r')
53
+
54
+ # View the contents
55
+ print(list(f.keys()))
56
+
57
+ # Access specific datasets
58
+ dataset = f['text_encodings'][:]
59
+
60
+ # Close the file when done
61
+ f.close()
62
+
63
+
64
+
65
+ def cosine_similarity(v1, v2):
66
+ # Reshape v1 to match v2's dimensions if needed
67
+ v1 = v1.reshape(-1) # converts (1,768) to (768,)
68
+
69
+ # Calculate cosine similarity
70
+ dot_product = np.dot(v1, v2)
71
+ norm_v1 = np.linalg.norm(v1)
72
+ norm_v2 = np.linalg.norm(v2)
73
+
74
+ return dot_product / (norm_v1 * norm_v2)
75
+
76
+
77
+
78
+ textembed=encode_text("""The double-cut pork chop at The Ainsworth in""" , model,'cuda')
79
+
80
+
81
+ cosine_similarity(textembed.cpu().detach().numpy() , dataset[5])
82
+
83
+
84
+
85
+ dataset link https://huggingface.co/datasets/zzliang/GRIT
86
+