jpohhhh
/

msmarco-MiniLM-L-6-v3_onnx

sentence-embeddings

endpoints-template

Inference Endpoints

Model card Files Files and versions Community

jpohhhh commited on Jul 5, 2023

Commit

8af4da5

·

1 Parent(s): 87fd374

Try GPT4 suggestions

Files changed (1) hide show

handler.py +20 -7

handler.py CHANGED Viewed

@@ -3,11 +3,22 @@ from transformers import AutoTokenizer, AutoModel
 from optimum.pipelines import pipeline
 from optimum.onnxruntime import ORTModelForFeatureExtraction
 from pathlib import Path
 import time
 import os
 import torch
 def mean_pooling(model_output):
     # Get dimensions
     Z, Y = len(model_output[0]), len(model_output[0][0])
@@ -34,6 +45,12 @@ class EndpointHandler():
         self.tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/msmarco-MiniLM-L-6-v3')
         model_regular = ORTModelForFeatureExtraction.from_pretrained("jpohhhh/msmarco-MiniLM-L-6-v3_onnx", from_transformers=False)
         self.onnx_extractor = pipeline(task, model=model_regular, tokenizer=self.tokenizer)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
@@ -44,10 +61,6 @@ class EndpointHandler():
             A :obj:`list` | `dict`: will be serialized and returned
         """
         sentences = data.pop("inputs",data)
-        sentence_embeddings = []
-        for sentence in sentences:
-            # Compute token embeddings
-            with torch.no_grad():
-                model_output = self.onnx_extractor(sentence)
-            sentence_embeddings.append(mean_pooling(model_output))
-        return sentence_embeddings

 from optimum.pipelines import pipeline
 from optimum.onnxruntime import ORTModelForFeatureExtraction
 from pathlib import Path
+from multiprocessing import Pool
 import time
 import os
 import torch
+def mean_pooling2(model_output):
+    """Perform mean pooling on tensor T
+    Args:
+        model_output: tensor T (elements are 2 dimentional float arrays).
+    Returns:
+        array of mean values.
+    """
+    return torch.mean(model_output[0], dim=1)
 def mean_pooling(model_output):
     # Get dimensions
     Z, Y = len(model_output[0]), len(model_output[0][0])
         self.tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/msmarco-MiniLM-L-6-v3')
         model_regular = ORTModelForFeatureExtraction.from_pretrained("jpohhhh/msmarco-MiniLM-L-6-v3_onnx", from_transformers=False)
         self.onnx_extractor = pipeline(task, model=model_regular, tokenizer=self.tokenizer)
+        self.pool = Pool(4)
+    def process_sentence(self, sentence):  # Factored out for parallelization
+        with torch.no_grad():
+            model_output = self.onnx_extractor(sentence)
+        return mean_pooling2(model_output)
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
             A :obj:`list` | `dict`: will be serialized and returned
         """
         sentences = data.pop("inputs",data)
+        # Compute embeddings in parallel
+        sentence_embeddings = self.pool.map(self.process_sentence, sentences)
+        return sentence_embeddings