Spaces:

raduqus
/

llm_test2

Sleeping

App Files Files Community

raduqus commited on Jan 16

Commit

27465f4

verified ·

1 Parent(s): aa850c7

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -5

app.py CHANGED Viewed

@@ -1,7 +1,58 @@
-from vllm import serve
-# Define your model name
-model_name = "raduqus/reco_1b_4bit"
-# Serve the model
-serve(model_name=model_name, host="0.0.0.0", port=8000)

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from transformers import AutoTokenizer, AutoModelForCausalLM
+# Initialize FastAPI app
+app = FastAPI()
+# Hugging Face model ID
+model_id = "raduqus/reco_1b_16bit"
+# Load tokenizer and model
+try:
+    print("Loading tokenizer...")
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    print("Loading 16-bit model...")
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype="float16",  # Specify 16-bit floating-point precision
+        device_map="auto"  # Automatically map to available devices
+    )
+    print("Model loaded successfully.")
+except Exception as e:
+    raise RuntimeError(f"Failed to load the model: {e}")
+# Input schema for task recommendations
+class RecommendationRequest(BaseModel):
+    prompt: str
+    max_length: int = 100
+    temperature: float = 0.7
+    top_p: float = 0.9
+@app.post("/recommend")
+async def recommend_task(request: RecommendationRequest):
+    """
+    Generate task recommendations based on input prompt.
+    """
+    try:
+        # Encode input and generate response
+        inputs = tokenizer(request.prompt, return_tensors="pt", truncation=True, max_length=request.max_length)
+        outputs = model.generate(
+            inputs["input_ids"],
+            max_length=request.max_length,
+            temperature=request.temperature,
+            top_p=request.top_p,
+            do_sample=True
+        )
+        # Decode generated text
+        generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return {"recommendation": generated_text}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error during generation: {e}")
+@app.get("/")
+async def root():
+    """
+    Health check endpoint.
+    """
+    return {"message": "Task recommender is running!"}