from typing import Dict, List, Any from transformers import pipeline, GPT2Tokenizer from model import GPT class EndpointHandler(): def __init__(self, path=""): # Preload all the elements you are going to need at inference. model = GPT.from_pretrained(path) tokenizer = GPT2Tokenizer.from_pretrained('gpt2') self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer) def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: """ data args: inputs (:obj: `str` | `PIL.Image` | `np.array`) kwargs Return: A :obj:`list` | `dict`: will be serialized and returned """ inputs = data.pop("inputs", data) output = self.pipeline(inputs) return {"Answer": output}