yusufs commited on
Commit
ded2af7
·
1 Parent(s): d2e0be1

feat(run.sh): add script for running openai server

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -22
  2. openai/run.sh +15 -0
Dockerfile CHANGED
@@ -15,25 +15,5 @@ EXPOSE 7860
15
 
16
  #CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
17
 
18
- CMD [
19
- "python",
20
- "-u",
21
- "/app/openai/api_server.py",
22
- "--model",
23
- "meta-llama/Llama-3.2-3B-Instruct",
24
- "--revision",
25
- "0cb88a4f764b7a12671c53f0838cd831a0843b95",
26
- "--host",
27
- "0.0.0.0",
28
- "--port",
29
- "7860",
30
- "--max-num-batched-tokens",
31
- "32768",
32
- "--max-model-len",
33
- "32768",
34
- "--dtype",
35
- "half",
36
- "--enforce-eager",
37
- "--gpu-memory-utilization",
38
- "0.85"
39
- ]
 
15
 
16
  #CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
17
 
18
+ RUN chmod +x /app/run.sh
19
+ CMD ["/app/run.sh"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
openai/run.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+
4
+ printf "Running vLLM OpenAI compatible API Server at port %s\n" "7860"
5
+
6
+ python -u /app/openai/api_server.py \
7
+ --model meta-llama/Llama-3.2-3B-Instruct \
8
+ --revision 0cb88a4f764b7a12671c53f0838cd831a0843b95 \
9
+ --host 0.0.0.0 \
10
+ --port 7860 \
11
+ --max-num-batched-tokens 32768 \
12
+ --max-model-len 32768 \
13
+ --dtype half \
14
+ --enforce-eager \
15
+ --gpu-memory-utilization 0.85