UPDATE README.md
Browse files
README.md
CHANGED
@@ -11,21 +11,27 @@ Load from HuggingFace:
|
|
11 |
import torch
|
12 |
from PIL import Image
|
13 |
from transformers import AutoModel, CLIPImageProcessor
|
|
|
14 |
|
15 |
model = AutoModel.from_pretrained(
|
16 |
'jienengchen/ViTamin-XL-384px',
|
17 |
-
|
18 |
-
low_cpu_mem_usage=True,
|
19 |
-
trust_remote_code=True).cuda().eval()
|
20 |
|
21 |
image = Image.open('./image.png').convert('RGB')
|
22 |
-
|
23 |
image_processor = CLIPImageProcessor.from_pretrained('jienengchen/ViTamin-XL-384px')
|
24 |
|
25 |
pixel_values = image_processor(images=image, return_tensors='pt').pixel_values
|
26 |
pixel_values = pixel_values.to(torch.bfloat16).cuda()
|
27 |
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
```
|
30 |
|
31 |
## Main Results with CLIP Pre-training on DataComp-1B
|
|
|
11 |
import torch
|
12 |
from PIL import Image
|
13 |
from transformers import AutoModel, CLIPImageProcessor
|
14 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
15 |
|
16 |
model = AutoModel.from_pretrained(
|
17 |
'jienengchen/ViTamin-XL-384px',
|
18 |
+
trust_remote_code=True).to(device).eval()
|
|
|
|
|
19 |
|
20 |
image = Image.open('./image.png').convert('RGB')
|
|
|
21 |
image_processor = CLIPImageProcessor.from_pretrained('jienengchen/ViTamin-XL-384px')
|
22 |
|
23 |
pixel_values = image_processor(images=image, return_tensors='pt').pixel_values
|
24 |
pixel_values = pixel_values.to(torch.bfloat16).cuda()
|
25 |
|
26 |
+
tokenizer = open_clip.get_tokenizer('hf-hub:laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K')
|
27 |
+
text = tokenizer(["a photo of vitamin", "a dog", "a cat"]).to(device)
|
28 |
+
|
29 |
+
with torch.no_grad(), torch.cuda.amp.autocast():
|
30 |
+
image_features, text_features, logit_scale = model(pixel_values, text)
|
31 |
+
text_probs = (100.0 * image_features @ text_features.to(torch.float).T).softmax(dim=-1)
|
32 |
+
|
33 |
+
print("Label probs:", text_probs)
|
34 |
+
|
35 |
```
|
36 |
|
37 |
## Main Results with CLIP Pre-training on DataComp-1B
|