jpohhhh commited on
Commit
59199c3
·
1 Parent(s): 4016518

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +8 -9
handler.py CHANGED
@@ -1,5 +1,7 @@
1
  from typing import Dict, List, Any
2
  from transformers import AutoTokenizer, AutoModel
 
 
3
  import torch
4
 
5
  #Mean Pooling - Take attention mask into account for correct averaging
@@ -11,10 +13,10 @@ def mean_pooling(model_output, attention_mask):
11
  class EndpointHandler():
12
  def __init__(self, path=""):
13
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
- self.tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/msmarco-MiniLM-L-6-v3')
15
- self.model = AutoModel.from_pretrained('sentence-transformers/msmarco-MiniLM-L-6-v3')
16
- self.model.to(self.device)
17
- print("model will run on ", self.device)
18
 
19
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
20
  """
@@ -25,11 +27,8 @@ class EndpointHandler():
25
  A :obj:`list` | `dict`: will be serialized and returned
26
  """
27
  sentences = data.pop("inputs",data)
28
- encoded_input = self.tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
29
- encoded_input = {key: value.to(self.device) for key, value in encoded_input.items()}
30
- # Compute token embeddings
31
- with torch.no_grad():
32
- model_output = self.model(**encoded_input)
33
 
34
  # Perform pooling. In this case, max pooling.
35
  sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
 
1
  from typing import Dict, List, Any
2
  from transformers import AutoTokenizer, AutoModel
3
+ from optimum.onnxruntime.modeling_ort import ORTModelForCustomTasks
4
+
5
  import torch
6
 
7
  #Mean Pooling - Take attention mask into account for correct averaging
 
13
  class EndpointHandler():
14
  def __init__(self, path=""):
15
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
16
+ self.model = ORTModelForCustomTasks.from_pretrained("optimum/sbert-all-MiniLM-L6-with-pooler")
17
+ self.tokenizer = AutoTokenizer.from_pretrained("optimum/sbert-all-MiniLM-L6-with-pooler")
18
+ # self.model.to(self.device)
19
+ # print("model will run on ", self.device)
20
 
21
  def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
22
  """
 
27
  A :obj:`list` | `dict`: will be serialized and returned
28
  """
29
  sentences = data.pop("inputs",data)
30
+ inputs = tokenizer("I love burritos!", return_tensors="pt")
31
+ pred = self.model(**encoded_input)
 
 
 
32
 
33
  # Perform pooling. In this case, max pooling.
34
  sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])