from typing import Dict, Any from transformers import AutoModelForSpeechSeq2Seq, AutoTokenizer, pipeline class EndpointHandler(): def __init__(self, path=""): model_id = "openai/whisper-large-v3" task = "automatic-speech-recognition" self.model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id) self.tokenizer = AutoTokenizer.from_pretrained(model_id) self.pipeline = pipeline(task, model=self.model, tokenizer=self.tokenizer) def __call__(self, data: Dict[str, bytes]) -> Dict[str, str]: inputs = data.pop("inputs", data) parameters = data.pop("parameters", None) if parameters is not None: result = self.pipeline(inputs, return_timestamps=True, **parameters) else: result = self.pipeline(inputs, return_timestamps=True) return {"chunks": result["chunks"]}