|
from fastapi import FastAPI, Request, Form , Body |
|
from fastapi.responses import HTMLResponse, JSONResponse |
|
from fastapi.templating import Jinja2Templates |
|
from fastapi.staticfiles import StaticFiles |
|
from fastapi.middleware.cors import CORSMiddleware |
|
from pydantic import BaseModel |
|
import torch |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
app = FastAPI() |
|
|
|
|
|
templates = Jinja2Templates(directory="templates") |
|
|
|
|
|
personaje = "rias" |
|
user="user" |
|
chat={ |
|
personaje: f"hola soy {personaje} no esperaba verte por aqui", |
|
user:f"hola " |
|
} |
|
|
|
app.mount("/static", StaticFiles(directory="static"), name="static") |
|
|
|
|
|
@app.get("/", response_class=HTMLResponse) |
|
async def read_html(request: Request): |
|
return templates.TemplateResponse("listapersonajes.html", {"request": request}) |
|
|
|
|
|
@app.get("/personajes/{personaje}", response_class=HTMLResponse) |
|
async def personaje_detalle(request: Request, personaje: str): |
|
|
|
context = { |
|
"character_image": f"{personaje}.jpg" , |
|
"character_name": personaje.capitalize() |
|
|
|
} |
|
return templates.TemplateResponse("chat.html", {"request": request, **context}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_name = "allura-org/MoE-Girl_400MA_1BT" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
device_map="auto", |
|
torch_dtype=torch.float16 |
|
) |
|
|
|
@app.post("/personajes/{personaje}/chat") |
|
async def chat_with_character(request: Request, personaje: str, user_input: str = Body(...)): |
|
|
|
if not user_input: |
|
return JSONResponse(status_code=422, content={"message": "user_input is required"}) |
|
|
|
|
|
prompt = f"""<|im_start|>system |
|
You are {personaje}, a sexy girl who has been dating the user for 2 months.<|im_end|> |
|
<|im_start|>user |
|
{user_input}<|im_end|> |
|
<|im_start|>assistant |
|
""" |
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=500, |
|
pad_token_id=tokenizer.eos_token_id, |
|
do_sample=True, |
|
temperature=0.7 |
|
) |
|
|
|
|
|
generated_response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
response_text = generated_response.split("<|im_start|>assistant")[1].strip().split("<|im_end|>")[0].strip() |
|
|
|
|
|
return JSONResponse(content={"response": response_text}) |