jpohhhh's picture
Try GPT4 suggestions
8af4da5
raw
history blame
2.21 kB
from typing import Dict, List, Any
from transformers import AutoTokenizer, AutoModel
from optimum.pipelines import pipeline
from optimum.onnxruntime import ORTModelForFeatureExtraction
from pathlib import Path
from multiprocessing import Pool
import time
import os
import torch
def mean_pooling2(model_output):
"""Perform mean pooling on tensor T
Args:
model_output: tensor T (elements are 2 dimentional float arrays).
Returns:
array of mean values.
"""
return torch.mean(model_output[0], dim=1)
def mean_pooling(model_output):
# Get dimensions
Z, Y = len(model_output[0]), len(model_output[0][0])
# Initialize an empty list with length Y (384 in your case)
output_array = [0.0] * Y
# Loop over secondary arrays (Z)
for i in range(Z):
# Loop over values in innermost arrays (Y)
for j in range(Y):
# Accumulate values
output_array[j] += model_output[0][i][j]
# Compute mean
output_array = [val / Z for val in output_array]
return output_array
class EndpointHandler():
def __init__(self, path=""):
task = "feature-extraction"
self.tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/msmarco-MiniLM-L-6-v3')
model_regular = ORTModelForFeatureExtraction.from_pretrained("jpohhhh/msmarco-MiniLM-L-6-v3_onnx", from_transformers=False)
self.onnx_extractor = pipeline(task, model=model_regular, tokenizer=self.tokenizer)
self.pool = Pool(4)
def process_sentence(self, sentence): # Factored out for parallelization
with torch.no_grad():
model_output = self.onnx_extractor(sentence)
return mean_pooling2(model_output)
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
data args:
inputs (:obj: `str` | `PIL.Image` | `np.array`)
kwargs
Return:
A :obj:`list` | `dict`: will be serialized and returned
"""
sentences = data.pop("inputs",data)
# Compute embeddings in parallel
sentence_embeddings = self.pool.map(self.process_sentence, sentences)
return sentence_embeddings