|
from typing import Dict, Any |
|
from transformers import AutoModelForSpeechSeq2Seq, AutoTokenizer, pipeline |
|
|
|
class EndpointHandler(): |
|
def __init__(self, path=""): |
|
model_id = "openai/whisper-large-v3" |
|
task = "automatic-speech-recognition" |
|
self.model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id) |
|
self.tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
self.pipeline = pipeline(task, model=self.model, tokenizer=self.tokenizer) |
|
|
|
def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]: |
|
inputs = data.pop("inputs", data) |
|
parameters = data.pop("parameters", None) |
|
|
|
if parameters is not None: |
|
result = self.pipeline(inputs, return_timestamps=True, **parameters) |
|
else: |
|
result = self.pipeline(inputs, return_timestamps=True) |
|
|
|
return {"chunks": result["chunks"]} |