Update
Browse files
app.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import torch
|
|
|
2 |
import gradio as gr
|
3 |
from transformers import CLIPProcessor, CLIPModel
|
4 |
import spaces
|
@@ -34,18 +35,28 @@ def calculate_score(image, text, model_name):
|
|
34 |
inputs = processor(text=labels, images=[image], return_tensors="pt", padding=True)
|
35 |
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
36 |
|
37 |
-
# Calculate
|
38 |
with torch.no_grad():
|
39 |
outputs = model(**inputs)
|
|
|
|
|
40 |
|
41 |
-
|
|
|
|
|
42 |
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
return results_dict
|
45 |
|
46 |
with gr.Blocks() as demo:
|
47 |
gr.Markdown("# Multi-Model CLIP Score")
|
48 |
-
gr.Markdown("Calculate the
|
49 |
|
50 |
with gr.Row():
|
51 |
image_input = gr.Image(type="pil")
|
|
|
1 |
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
import gradio as gr
|
4 |
from transformers import CLIPProcessor, CLIPModel
|
5 |
import spaces
|
|
|
35 |
inputs = processor(text=labels, images=[image], return_tensors="pt", padding=True)
|
36 |
inputs = {k: v.to("cuda") for k, v in inputs.items()}
|
37 |
|
38 |
+
# Calculate embeddings
|
39 |
with torch.no_grad():
|
40 |
outputs = model(**inputs)
|
41 |
+
image_embeds = outputs.image_embeds
|
42 |
+
text_embeds = outputs.text_embeds
|
43 |
|
44 |
+
# Normalize embeddings
|
45 |
+
image_embeds = F.normalize(image_embeds, p=2, dim=1)
|
46 |
+
text_embeds = F.normalize(text_embeds, p=2, dim=1)
|
47 |
|
48 |
+
# Calculate cosine similarity
|
49 |
+
cosine_similarities = torch.mm(text_embeds, image_embeds.t()).squeeze(1)
|
50 |
+
|
51 |
+
# Convert to percentages
|
52 |
+
percentages = ((cosine_similarities + 1) / 2 * 100).cpu().numpy()
|
53 |
+
|
54 |
+
results_dict = {label: float(score) for label, score in zip(labels, percentages)}
|
55 |
return results_dict
|
56 |
|
57 |
with gr.Blocks() as demo:
|
58 |
gr.Markdown("# Multi-Model CLIP Score")
|
59 |
+
gr.Markdown("Calculate the CLIP score (cosine similarity) between the given image and text descriptions using different CLIP model variants")
|
60 |
|
61 |
with gr.Row():
|
62 |
image_input = gr.Image(type="pil")
|