Spaces:
Running
Running
from fastapi.responses import StreamingResponse | |
from fastapi import FastAPI, HTTPException | |
from llm_backend import chat_with_model | |
from schema import ChatRequest | |
""" | |
uvicorn api:app --reload | |
fastapi dev api.py --port 5723 | |
""" | |
app = FastAPI() | |
def chat_stream(request: ChatRequest): | |
kwargs = { | |
"max_tokens": request.max_tokens, | |
"temperature": request.temperature, | |
"top_p": request.top_p, | |
"min_p": request.min_p, | |
"typical_p": request.typical_p, | |
"frequency_penalty": request.frequency_penalty, | |
"presence_penalty": request.presence_penalty, | |
"repeat_penalty": request.repeat_penalty, | |
"top_k": request.top_k, | |
"seed": request.seed, | |
"tfs_z": request.tfs_z, | |
"mirostat_mode": request.mirostat_mode, | |
"mirostat_tau": request.mirostat_tau, | |
"mirostat_eta": request.mirostat_eta, | |
} | |
try: | |
token_generator = chat_with_model(request.chat_history, request.model, kwargs) | |
return StreamingResponse(token_generator, media_type="text/plain") | |
except Exception as e: | |
raise HTTPException(status_code=500, detail=str(e)) | |