from fastapi import FastAPI, HTTPException from pydantic import BaseModel from transformers import AutoModelForCausalLM, AutoTokenizer app = FastAPI() # Load your fine-tuned model and tokenizer model = AutoModelForCausalLM.from_pretrained("Hadeel11/fine-tuned-model") tokenizer = AutoTokenizer.from_pretrained("Hadeel11/fine-tuned-model") @app.post("/query/") async def query_rag(request: Request): data = await request.json() question = data.get("question", "") contexts = query_qdrant(question, top_k=2) answer = generate_answer(question, contexts) return {"question": question, "contexts": contexts, "answer": answer}