Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -31,16 +31,19 @@ text_embedding_cache = {}
|
|
31 |
|
32 |
def get_image_embedding(image):
|
33 |
try:
|
|
|
34 |
inputs = processor(
|
35 |
images=image,
|
36 |
-
text=
|
|
|
37 |
return_tensors="pt"
|
38 |
).to(device, torch_dtype)
|
39 |
|
40 |
with torch.no_grad():
|
|
|
41 |
outputs = model(**inputs)
|
42 |
-
#
|
43 |
-
image_embeddings = outputs.last_hidden_state
|
44 |
return image_embeddings.cpu().numpy()
|
45 |
except Exception as e:
|
46 |
print(f"Error in get_image_embedding: {str(e)}")
|
@@ -51,15 +54,25 @@ def get_text_embedding(text):
|
|
51 |
if text in text_embedding_cache:
|
52 |
return text_embedding_cache[text]
|
53 |
|
|
|
54 |
inputs = processor(
|
55 |
text=text,
|
56 |
-
|
57 |
return_tensors="pt"
|
58 |
).to(device, torch_dtype)
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
with torch.no_grad():
|
61 |
outputs = model(**inputs)
|
62 |
-
text_embeddings = outputs.last_hidden_state
|
63 |
|
64 |
embedding = text_embeddings.cpu().numpy()
|
65 |
text_embedding_cache[text] = embedding
|
|
|
31 |
|
32 |
def get_image_embedding(image):
|
33 |
try:
|
34 |
+
# Process image and add dummy text input
|
35 |
inputs = processor(
|
36 |
images=image,
|
37 |
+
text="Describe this image", # Adding a default text prompt
|
38 |
+
padding=True,
|
39 |
return_tensors="pt"
|
40 |
).to(device, torch_dtype)
|
41 |
|
42 |
with torch.no_grad():
|
43 |
+
# Get model outputs
|
44 |
outputs = model(**inputs)
|
45 |
+
# Extract image features from the cross-attention layers
|
46 |
+
image_embeddings = outputs.last_hidden_state.mean(dim=1)
|
47 |
return image_embeddings.cpu().numpy()
|
48 |
except Exception as e:
|
49 |
print(f"Error in get_image_embedding: {str(e)}")
|
|
|
54 |
if text in text_embedding_cache:
|
55 |
return text_embedding_cache[text]
|
56 |
|
57 |
+
# Process text with proper input formatting
|
58 |
inputs = processor(
|
59 |
text=text,
|
60 |
+
padding=True,
|
61 |
return_tensors="pt"
|
62 |
).to(device, torch_dtype)
|
63 |
|
64 |
+
# Add required decoder input ids
|
65 |
+
inputs['decoder_input_ids'] = model.generate(
|
66 |
+
**inputs,
|
67 |
+
max_length=1,
|
68 |
+
return_dict_in_generate=True,
|
69 |
+
output_hidden_states=True,
|
70 |
+
early_stopping=True
|
71 |
+
).sequences
|
72 |
+
|
73 |
with torch.no_grad():
|
74 |
outputs = model(**inputs)
|
75 |
+
text_embeddings = outputs.last_hidden_state.mean(dim=1)
|
76 |
|
77 |
embedding = text_embeddings.cpu().numpy()
|
78 |
text_embedding_cache[text] = embedding
|