Hadeel11 commited on
Commit
1695fdf
1 Parent(s): ff6ffb5

Update Dockerfile and FastAPI app

Browse files
Files changed (2) hide show
  1. Dockerfile +6 -4
  2. app.py +4 -3
Dockerfile CHANGED
@@ -1,21 +1,23 @@
 
1
  FROM huggingface/transformers-pytorch-gpu:latest
2
 
 
3
  RUN pip install --upgrade pip
4
  RUN pip install transformers torch fastapi uvicorn
5
 
6
 
7
- ENV TRANSFORMERS_CACHE=/tmp/huggingface_cache
8
-
9
-
10
- ENV MODEL_NAME="your-username/your-finetuned-model"
11
  ENV USE_FP16=True
12
 
 
13
  COPY app.py /app/app.py
14
 
15
  WORKDIR /app
16
 
 
17
  EXPOSE 8080
18
 
 
19
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]
20
 
21
 
 
1
+ # Use the official Hugging Face TGI Docker image as the base
2
  FROM huggingface/transformers-pytorch-gpu:latest
3
 
4
+
5
  RUN pip install --upgrade pip
6
  RUN pip install transformers torch fastapi uvicorn
7
 
8
 
9
+ ENV MODEL_NAME="Hadeel11/fine-tuned-model"
 
 
 
10
  ENV USE_FP16=True
11
 
12
+
13
  COPY app.py /app/app.py
14
 
15
  WORKDIR /app
16
 
17
+
18
  EXPOSE 8080
19
 
20
+
21
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8080"]
22
 
23
 
app.py CHANGED
@@ -4,9 +4,9 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
  app = FastAPI()
6
 
7
- model_name = "Hadeel11/fine-tuned-model"
8
- model = AutoModelForCausalLM.from_pretrained(model_name)
9
- tokenizer = AutoTokenizer.from_pretrained(model_name)
10
 
11
  class Query(BaseModel):
12
  question: str
@@ -18,3 +18,4 @@ async def predict(query: Query):
18
  outputs = model.generate(**inputs)
19
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
  return {"answer": answer}
 
 
4
 
5
  app = FastAPI()
6
 
7
+ # Load your fine-tuned model and tokenizer
8
+ model = AutoModelForCausalLM.from_pretrained("Hadeel11/fine-tuned-model")
9
+ tokenizer = AutoTokenizer.from_pretrained("Hadeel11/fine-tuned-model")
10
 
11
  class Query(BaseModel):
12
  question: str
 
18
  outputs = model.generate(**inputs)
19
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
  return {"answer": answer}
21
+