from fastapi import FastAPI, HTTPException from pydantic import BaseModel from transformers import AutoModelForCausalLM, AutoTokenizer app = FastAPI() # Load your fine-tuned model and tokenizer model = AutoModelForCausalLM.from_pretrained("Hadeel11/fine-tuned-model") tokenizer = AutoTokenizer.from_pretrained("Hadeel11/fine-tuned-model") class Query(BaseModel): question: str contexts: list @app.post("/predict/") async def predict(query: Query): inputs = tokenizer(query.question, return_tensors="pt") outputs = model.generate(**inputs) answer = tokenizer.decode(outputs[0], skip_special_tokens=True) return {"answer": answer}