yusufs commited on
Commit
ae7cfbb
·
1 Parent(s): 1a7087e

feat(first-commit): follow examples and tutorials

Browse files
Files changed (8) hide show
  1. .gitattributes +0 -35
  2. .gitignore +1 -0
  3. Dockerfile +13 -0
  4. README.md +8 -6
  5. main.py +61 -0
  6. poetry.lock +0 -0
  7. pyproject.toml +19 -0
  8. requirements.txt +119 -0
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,12 +1,14 @@
1
  ---
2
- title: Vllm Inference
3
- emoji: 🚀
4
  colorFrom: blue
5
- colorTo: green
6
  sdk: docker
7
  pinned: false
8
- license: apache-2.0
9
- short_description: VLLM Inference Engine Demo
10
  ---
11
-
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
1
  ---
2
+ title: Deploy VLLM
3
+ emoji: 🐢
4
  colorFrom: blue
5
+ colorTo: blue
6
  sdk: docker
7
  pinned: false
 
 
8
  ---
 
9
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
10
+
11
+
12
+ ```shell
13
+ poetry export -f requirements.txt --output requirements.txt --without-hashes
14
+ ```
main.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+ from fastapi import FastAPI
3
+ from pydantic import BaseModel
4
+ from vllm import LLM, SamplingParams, RequestOutput
5
+
6
+
7
+ # Don't forget to set HF_TOKEN in the env during running
8
+
9
+ app = FastAPI()
10
+
11
+ # Initialize the LLM engine
12
+ # Replace 'your-model-path' with the actual path or name of your model
13
+
14
+ engine = LLM(
15
+ model='meta-llama/Llama-3.2-3B-Instruct',
16
+ revision="0cb88a4f764b7a12671c53f0838cd831a0843b95",
17
+ max_num_batched_tokens=512, # Reduced for T4
18
+ max_num_seqs=16, # Reduced for T4
19
+ gpu_memory_utilization=0.85, # Slightly increased, adjust if needed
20
+ max_model_len=131072, # Llama-3.2-3B-Instruct context length
21
+ enforce_eager=True, # Disable CUDA graph
22
+ dtype='half', # Use half precision
23
+ )
24
+
25
+
26
+ @app.get("/")
27
+ def greet_json():
28
+ return {"Hello": "World!"}
29
+
30
+
31
+ class GenerationRequest(BaseModel):
32
+ prompt: str
33
+ max_tokens: int = 100
34
+ temperature: float = 0.7
35
+ logit_bias: Optional[dict[int, float]] = None
36
+
37
+
38
+ class GenerationResponse(BaseModel):
39
+ text: Optional[str]
40
+ error: Optional[str]
41
+
42
+
43
+ @app.post("/generate-llama3-2")
44
+ def generate_text(request: GenerationRequest) -> list[RequestOutput] | dict[str, str]:
45
+ try:
46
+ sampling_params: SamplingParams = SamplingParams(
47
+ temperature=request.temperature,
48
+ max_tokens=request.max_tokens,
49
+ logit_bias=request.logit_bias,
50
+ )
51
+
52
+ # Generate text
53
+ return engine.generate(
54
+ prompts=request.prompt,
55
+ sampling_params=sampling_params
56
+ )
57
+
58
+ except Exception as e:
59
+ return {
60
+ "error": str(e)
61
+ }
poetry.lock ADDED
File without changes
pyproject.toml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ package-mode = false
3
+ name = "gradio-qa"
4
+ version = "0.1.0"
5
+ description = ""
6
+ authors = ["Yusuf <[email protected]>"]
7
+ readme = "README.md"
8
+
9
+ [tool.poetry.dependencies]
10
+ python = ">=3.12,<3.13"
11
+ vllm = "^0.6.4.post1"
12
+ fastapi = "^0.115.5"
13
+ pydantic = "^2.10.2"
14
+ uvicorn = "^0.32.1"
15
+
16
+
17
+ [build-system]
18
+ requires = ["poetry-core"]
19
+ build-backend = "poetry.core.masonry.api"
requirements.txt ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohappyeyeballs==2.4.3 ; python_version >= "3.12" and python_version < "3.13"
2
+ aiohttp==3.11.7 ; python_version >= "3.12" and python_version < "3.13"
3
+ aiosignal==1.3.1 ; python_version >= "3.12" and python_version < "3.13"
4
+ annotated-types==0.7.0 ; python_version >= "3.12" and python_version < "3.13"
5
+ anyio==4.6.2.post1 ; python_version >= "3.12" and python_version < "3.13"
6
+ attrs==24.2.0 ; python_version >= "3.12" and python_version < "3.13"
7
+ certifi==2024.8.30 ; python_version >= "3.12" and python_version < "3.13"
8
+ cffi==1.17.1 ; python_version >= "3.12" and python_version < "3.13" and implementation_name == "pypy"
9
+ charset-normalizer==3.4.0 ; python_version >= "3.12" and python_version < "3.13"
10
+ click==8.1.7 ; python_version >= "3.12" and python_version < "3.13"
11
+ cloudpickle==3.1.0 ; python_version >= "3.12" and python_version < "3.13"
12
+ colorama==0.4.6 ; python_version >= "3.12" and python_version < "3.13" and (platform_system == "Windows" or sys_platform == "win32")
13
+ compressed-tensors==0.8.0 ; python_version >= "3.12" and python_version < "3.13"
14
+ datasets==2.14.4 ; python_version >= "3.12" and python_version < "3.13"
15
+ dill==0.3.7 ; python_version >= "3.12" and python_version < "3.13"
16
+ diskcache==5.6.3 ; python_version >= "3.12" and python_version < "3.13"
17
+ distro==1.9.0 ; python_version >= "3.12" and python_version < "3.13"
18
+ einops==0.8.0 ; python_version >= "3.12" and python_version < "3.13"
19
+ fastapi==0.115.5 ; python_version >= "3.12" and python_version < "3.13"
20
+ filelock==3.16.1 ; python_version >= "3.12" and python_version < "3.13"
21
+ frozenlist==1.5.0 ; python_version >= "3.12" and python_version < "3.13"
22
+ fsspec==2024.10.0 ; python_version >= "3.12" and python_version < "3.13"
23
+ fsspec[http]==2024.10.0 ; python_version >= "3.12" and python_version < "3.13"
24
+ gguf==0.10.0 ; python_version >= "3.12" and python_version < "3.13"
25
+ h11==0.14.0 ; python_version >= "3.12" and python_version < "3.13"
26
+ httpcore==1.0.7 ; python_version >= "3.12" and python_version < "3.13"
27
+ httptools==0.6.4 ; python_version >= "3.12" and python_version < "3.13"
28
+ httpx==0.27.2 ; python_version >= "3.12" and python_version < "3.13"
29
+ huggingface-hub==0.26.2 ; python_version >= "3.12" and python_version < "3.13"
30
+ idna==3.10 ; python_version >= "3.12" and python_version < "3.13"
31
+ importlib-metadata==8.5.0 ; python_version >= "3.12" and python_version < "3.13"
32
+ interegular==0.3.3 ; python_version >= "3.12" and python_version < "3.13"
33
+ jinja2==3.1.4 ; python_version >= "3.12" and python_version < "3.13"
34
+ jiter==0.8.0 ; python_version >= "3.12" and python_version < "3.13"
35
+ jsonschema-specifications==2024.10.1 ; python_version >= "3.12" and python_version < "3.13"
36
+ jsonschema==4.23.0 ; python_version >= "3.12" and python_version < "3.13"
37
+ lark==1.2.2 ; python_version >= "3.12" and python_version < "3.13"
38
+ llvmlite==0.43.0 ; python_version >= "3.12" and python_version < "3.13"
39
+ lm-format-enforcer==0.10.9 ; python_version >= "3.12" and python_version < "3.13"
40
+ markupsafe==3.0.2 ; python_version >= "3.12" and python_version < "3.13"
41
+ mistral-common[opencv]==1.5.1 ; python_version >= "3.12" and python_version < "3.13"
42
+ mpmath==1.3.0 ; python_version >= "3.12" and python_version < "3.13"
43
+ msgpack==1.1.0 ; python_version >= "3.12" and python_version < "3.13"
44
+ msgspec==0.18.6 ; python_version >= "3.12" and python_version < "3.13"
45
+ multidict==6.1.0 ; python_version >= "3.12" and python_version < "3.13"
46
+ multiprocess==0.70.15 ; python_version >= "3.12" and python_version < "3.13"
47
+ nest-asyncio==1.6.0 ; python_version >= "3.12" and python_version < "3.13"
48
+ networkx==3.4.2 ; python_version >= "3.12" and python_version < "3.13"
49
+ numba==0.60.0 ; python_version >= "3.12" and python_version < "3.13"
50
+ numpy==1.26.4 ; python_version >= "3.12" and python_version < "3.13"
51
+ nvidia-cublas-cu12==12.4.5.8 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
52
+ nvidia-cuda-cupti-cu12==12.4.127 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
53
+ nvidia-cuda-nvrtc-cu12==12.4.127 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
54
+ nvidia-cuda-runtime-cu12==12.4.127 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
55
+ nvidia-cudnn-cu12==9.1.0.70 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
56
+ nvidia-cufft-cu12==11.2.1.3 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
57
+ nvidia-curand-cu12==10.3.5.147 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
58
+ nvidia-cusolver-cu12==11.6.1.9 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
59
+ nvidia-cusparse-cu12==12.3.1.170 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
60
+ nvidia-ml-py==12.560.30 ; python_version >= "3.12" and python_version < "3.13"
61
+ nvidia-nccl-cu12==2.21.5 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
62
+ nvidia-nvjitlink-cu12==12.4.127 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
63
+ nvidia-nvtx-cu12==12.4.127 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
64
+ openai==1.55.1 ; python_version >= "3.12" and python_version < "3.13"
65
+ opencv-python-headless==4.10.0.84 ; python_version >= "3.12" and python_version < "3.13"
66
+ outlines==0.0.46 ; python_version >= "3.12" and python_version < "3.13"
67
+ packaging==24.2 ; python_version >= "3.12" and python_version < "3.13"
68
+ pandas==2.2.3 ; python_version >= "3.12" and python_version < "3.13"
69
+ partial-json-parser==0.2.1.1.post4 ; python_version >= "3.12" and python_version < "3.13"
70
+ pillow==10.4.0 ; python_version >= "3.12" and python_version < "3.13"
71
+ prometheus-client==0.21.0 ; python_version >= "3.12" and python_version < "3.13"
72
+ prometheus-fastapi-instrumentator==7.0.0 ; python_version >= "3.12" and python_version < "3.13"
73
+ propcache==0.2.0 ; python_version >= "3.12" and python_version < "3.13"
74
+ protobuf==5.28.3 ; python_version >= "3.12" and python_version < "3.13"
75
+ psutil==6.1.0 ; python_version >= "3.12" and python_version < "3.13"
76
+ py-cpuinfo==9.0.0 ; python_version >= "3.12" and python_version < "3.13"
77
+ pyairports==2.1.1 ; python_version >= "3.12" and python_version < "3.13"
78
+ pyarrow==18.1.0 ; python_version >= "3.12" and python_version < "3.13"
79
+ pycountry==24.6.1 ; python_version >= "3.12" and python_version < "3.13"
80
+ pycparser==2.22 ; python_version >= "3.12" and python_version < "3.13" and implementation_name == "pypy"
81
+ pydantic-core==2.27.1 ; python_version >= "3.12" and python_version < "3.13"
82
+ pydantic==2.10.2 ; python_version >= "3.12" and python_version < "3.13"
83
+ python-dateutil==2.9.0.post0 ; python_version >= "3.12" and python_version < "3.13"
84
+ python-dotenv==1.0.1 ; python_version >= "3.12" and python_version < "3.13"
85
+ pytz==2024.2 ; python_version >= "3.12" and python_version < "3.13"
86
+ pyyaml==6.0.2 ; python_version >= "3.12" and python_version < "3.13"
87
+ pyzmq==26.2.0 ; python_version >= "3.12" and python_version < "3.13"
88
+ ray==2.39.0 ; python_version >= "3.12" and python_version < "3.13"
89
+ referencing==0.35.1 ; python_version >= "3.12" and python_version < "3.13"
90
+ regex==2024.11.6 ; python_version >= "3.12" and python_version < "3.13"
91
+ requests==2.32.3 ; python_version >= "3.12" and python_version < "3.13"
92
+ rpds-py==0.21.0 ; python_version >= "3.12" and python_version < "3.13"
93
+ safetensors==0.4.5 ; python_version >= "3.12" and python_version < "3.13"
94
+ sentencepiece==0.2.0 ; python_version >= "3.12" and python_version < "3.13"
95
+ setuptools==75.6.0 ; python_version >= "3.12" and python_version < "3.13"
96
+ six==1.16.0 ; python_version >= "3.12" and python_version < "3.13"
97
+ sniffio==1.3.1 ; python_version >= "3.12" and python_version < "3.13"
98
+ starlette==0.41.3 ; python_version >= "3.12" and python_version < "3.13"
99
+ sympy==1.13.1 ; python_version >= "3.12" and python_version < "3.13"
100
+ tiktoken==0.7.0 ; python_version >= "3.12" and python_version < "3.13"
101
+ tokenizers==0.20.4 ; python_version >= "3.12" and python_version < "3.13"
102
+ torch==2.5.1 ; python_version >= "3.12" and python_version < "3.13"
103
+ torchvision==0.20.1 ; python_version >= "3.12" and python_version < "3.13"
104
+ tqdm==4.67.1 ; python_version >= "3.12" and python_version < "3.13"
105
+ transformers==4.46.3 ; python_version >= "3.12" and python_version < "3.13"
106
+ triton==3.1.0 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version < "3.13" and python_version >= "3.12"
107
+ typing-extensions==4.12.2 ; python_version >= "3.12" and python_version < "3.13"
108
+ tzdata==2024.2 ; python_version >= "3.12" and python_version < "3.13"
109
+ urllib3==2.2.3 ; python_version >= "3.12" and python_version < "3.13"
110
+ uvicorn==0.32.1 ; python_version >= "3.12" and python_version < "3.13"
111
+ uvicorn[standard]==0.32.1 ; python_version >= "3.12" and python_version < "3.13"
112
+ uvloop==0.21.0 ; (sys_platform != "win32" and sys_platform != "cygwin") and platform_python_implementation != "PyPy" and python_version >= "3.12" and python_version < "3.13"
113
+ vllm==0.6.4.post1 ; python_version >= "3.12" and python_version < "3.13"
114
+ watchfiles==1.0.0 ; python_version >= "3.12" and python_version < "3.13"
115
+ websockets==14.1 ; python_version >= "3.12" and python_version < "3.13"
116
+ xformers==0.0.28.post3 ; platform_system == "Linux" and platform_machine == "x86_64" and python_version >= "3.12" and python_version < "3.13"
117
+ xxhash==3.5.0 ; python_version >= "3.12" and python_version < "3.13"
118
+ yarl==1.18.0 ; python_version >= "3.12" and python_version < "3.13"
119
+ zipp==3.21.0 ; python_version >= "3.12" and python_version < "3.13"