Marqo
/

marqo-merged-bge-gist-gte-base

Model card Files Files and versions Community

t0b1as91 commited on Sep 13, 2024

Commit

da623e0

·

verified ·

1 Parent(s): 6caf95a

Update README.md

Files changed (1) hide show

README.md +11 -8

README.md CHANGED Viewed

@@ -12,30 +12,33 @@ This model focuses on retrieval tasks while also performing well on various task
 from transformers import AutoTokenizer, AutoModel
 import torch
 # Sentences we want sentence embeddings for
 sentences = ["this is a test sentence", "this is another test sentence"]
 # Prefixing for retrieval tasks
 instruction = "Represent this sentence for searching relevant passages: "
 # Load model from HuggingFace Hub
-tokenizer = AutoTokenizer.from_pretrained('Marqo/marqo-merged-bge-gist-gte-base')
-model = AutoModel.from_pretrained('Marqo/marqo-merged-bge-gist-gte-base')
 model.eval()
 # Tokenize sentences
 encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
-encoded_input_with_prefixing = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')
 # Compute token embeddings
 with torch.no_grad():
     model_output = model(**encoded_input)
     model_output_with_prefixing = model(**encoded_input_with_prefixing)
-    model_output_avg = (model_output + model_output_with_prefixing) / 2
-    # Perform pooling. In this case, cls pooling.
-    sentence_embeddings = model_output_avg[0][:, 0]
 # normalize embeddings
-sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
-print("Sentence embeddings:", sentence_embeddings)
 ```
 ## Evaluation
 <img src="slerp.png" alt="109M models retrieval benchmarks" width="650" style="margin-left:'auto' margin-right:'auto' display:'block'"/>

 from transformers import AutoTokenizer, AutoModel
 import torch
 # Sentences we want sentence embeddings for
+token=""
 sentences = ["this is a test sentence", "this is another test sentence"]
 # Prefixing for retrieval tasks
 instruction = "Represent this sentence for searching relevant passages: "
 # Load model from HuggingFace Hub
+tokenizer = AutoTokenizer.from_pretrained('Marqo/marqo-merged-bge-gist-gte-base', token=token)
+model = AutoModel.from_pretrained('Marqo/marqo-merged-bge-gist-gte-base', token=token)
 model.eval()
 # Tokenize sentences
 encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
+encoded_input_with_prefixing = tokenizer([instruction + q for q in sentences], padding=True, truncation=True, return_tensors='pt')
 # Compute token embeddings
 with torch.no_grad():
     model_output = model(**encoded_input)
     model_output_with_prefixing = model(**encoded_input_with_prefixing)
+    sentence_embeddings = model_output[0][:, 0]
+    sentence_embeddings_with_prefixing = model_output_with_prefixing[0][:, 0]
+    sentence_embeddings_avg = (sentence_embeddings + sentence_embeddings_with_prefixing) / 2
 # normalize embeddings
+sentence_embeddings_avg = torch.nn.functional.normalize(sentence_embeddings_avg, p=2, dim=1)
+print("Sentence embeddings:", sentence_embeddings_avg)
 ```
 ## Evaluation
 <img src="slerp.png" alt="109M models retrieval benchmarks" width="650" style="margin-left:'auto' margin-right:'auto' display:'block'"/>