File size: 1,844 Bytes
613d0b6
 
 
a61e58e
613d0b6
 
 
 
4016518
 
613d0b6
 
 
 
 
 
a17e571
613d0b6
 
 
 
 
 
 
 
 
 
 
 
c5ac78c
4016518
 
613d0b6
 
 
 
4016518
613d0b6
4016518
613d0b6
 
 
 
 
4016518
613d0b6
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from typing import Dict, List, Any
from transformers import AutoTokenizer, AutoModel
from optimum.pipelines import pipeline
from optimum.onnxruntime import ORTModelForFeatureExtraction
from pathlib import Path
import time

import os
import torch

def mean_pooling(model_output):
    # Get dimensions
    Z, Y = len(model_output[0]), len(model_output[0][0])

    # Initialize an empty list with length Y (384 in your case)
    output_array = [0.0] * Y 

    # Loop over secondary arrays (Z)
    for i in range(Z): 
        # Loop over values in innermost arrays (Y)
        for j in range(Y): 
            # Accumulate values
            output_array[j] += model_output[0][i][j]

    # Compute mean
    output_array = [val / Z for val in output_array]
    
    return output_array
    

class EndpointHandler():
    def __init__(self, path=""):
        task = "feature-extraction"
        self.tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/msmarco-MiniLM-L-6-v3')
        model_regular = ORTModelForFeatureExtraction.from_pretrained("jpohhhh/msmarco-MiniLM-L-6-v3_onnx", from_transformers=False)
        self.onnx_extractor = pipeline(task, model=model_regular, tokenizer=self.tokenizer)

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
       data args:
            inputs (:obj: `str` | `PIL.Image` | `np.array`)
            kwargs
      Return:
            A :obj:`list` | `dict`: will be serialized and returned
        """
        sentences = data.pop("inputs",data)
        sentence_embeddings = []
        for sentence in sentences:    
            # Compute token embeddings    
            with torch.no_grad():       
                model_output = self.onnx_extractor(sentence)  
            sentence_embeddings.append(mean_pooling(model_output))
        return sentence_embeddings