t0b1as91 commited on
Commit
da623e0
·
verified ·
1 Parent(s): 6caf95a

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +11 -8
README.md CHANGED
@@ -12,30 +12,33 @@ This model focuses on retrieval tasks while also performing well on various task
12
  from transformers import AutoTokenizer, AutoModel
13
  import torch
14
  # Sentences we want sentence embeddings for
 
15
  sentences = ["this is a test sentence", "this is another test sentence"]
16
 
17
  # Prefixing for retrieval tasks
18
  instruction = "Represent this sentence for searching relevant passages: "
19
 
20
  # Load model from HuggingFace Hub
21
- tokenizer = AutoTokenizer.from_pretrained('Marqo/marqo-merged-bge-gist-gte-base')
22
- model = AutoModel.from_pretrained('Marqo/marqo-merged-bge-gist-gte-base')
23
  model.eval()
24
 
25
  # Tokenize sentences
26
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
27
- encoded_input_with_prefixing = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')
28
 
29
  # Compute token embeddings
30
  with torch.no_grad():
31
  model_output = model(**encoded_input)
32
  model_output_with_prefixing = model(**encoded_input_with_prefixing)
33
- model_output_avg = (model_output + model_output_with_prefixing) / 2
34
- # Perform pooling. In this case, cls pooling.
35
- sentence_embeddings = model_output_avg[0][:, 0]
 
 
36
  # normalize embeddings
37
- sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
38
- print("Sentence embeddings:", sentence_embeddings)
39
  ```
40
  ## Evaluation
41
  <img src="slerp.png" alt="109M models retrieval benchmarks" width="650" style="margin-left:'auto' margin-right:'auto' display:'block'"/>
 
12
  from transformers import AutoTokenizer, AutoModel
13
  import torch
14
  # Sentences we want sentence embeddings for
15
+ token=""
16
  sentences = ["this is a test sentence", "this is another test sentence"]
17
 
18
  # Prefixing for retrieval tasks
19
  instruction = "Represent this sentence for searching relevant passages: "
20
 
21
  # Load model from HuggingFace Hub
22
+ tokenizer = AutoTokenizer.from_pretrained('Marqo/marqo-merged-bge-gist-gte-base', token=token)
23
+ model = AutoModel.from_pretrained('Marqo/marqo-merged-bge-gist-gte-base', token=token)
24
  model.eval()
25
 
26
  # Tokenize sentences
27
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
28
+ encoded_input_with_prefixing = tokenizer([instruction + q for q in sentences], padding=True, truncation=True, return_tensors='pt')
29
 
30
  # Compute token embeddings
31
  with torch.no_grad():
32
  model_output = model(**encoded_input)
33
  model_output_with_prefixing = model(**encoded_input_with_prefixing)
34
+ sentence_embeddings = model_output[0][:, 0]
35
+ sentence_embeddings_with_prefixing = model_output_with_prefixing[0][:, 0]
36
+
37
+ sentence_embeddings_avg = (sentence_embeddings + sentence_embeddings_with_prefixing) / 2
38
+
39
  # normalize embeddings
40
+ sentence_embeddings_avg = torch.nn.functional.normalize(sentence_embeddings_avg, p=2, dim=1)
41
+ print("Sentence embeddings:", sentence_embeddings_avg)
42
  ```
43
  ## Evaluation
44
  <img src="slerp.png" alt="109M models retrieval benchmarks" width="650" style="margin-left:'auto' margin-right:'auto' display:'block'"/>