Commit
·
8dfc799
1
Parent(s):
2d84a88
add vllm deployment info
Browse files
README.md
CHANGED
@@ -89,6 +89,8 @@ Optionally, you can use different API providers and models.
|
|
89 |
- `OPENAI_BASE_URL`: The base URL for any OpenAI compatible API, e.g. `https://api.openai.com/v1/`.
|
90 |
- `OLLAMA_BASE_URL`: The base URL for any Ollama compatible API, e.g. `http://127.0.0.1:11434/`.
|
91 |
- `HUGGINGFACE_BASE_URL`: The base URL for any Hugging Face compatible API, e.g. TGI server or Dedicated Inference Endpoints. If you want to use serverless inference, only set the `MODEL`.
|
|
|
|
|
92 |
|
93 |
SFT and Chat Data generation is only supported with Hugging Face Inference Endpoints , and you can set the following environment variables use it with models other than Llama3 and Qwen2.
|
94 |
|
|
|
89 |
- `OPENAI_BASE_URL`: The base URL for any OpenAI compatible API, e.g. `https://api.openai.com/v1/`.
|
90 |
- `OLLAMA_BASE_URL`: The base URL for any Ollama compatible API, e.g. `http://127.0.0.1:11434/`.
|
91 |
- `HUGGINGFACE_BASE_URL`: The base URL for any Hugging Face compatible API, e.g. TGI server or Dedicated Inference Endpoints. If you want to use serverless inference, only set the `MODEL`.
|
92 |
+
- `VLLM_BASE_URL`: The base URL for any VLLM compatible API, e.g. `http://localhost:8000/`.
|
93 |
+
|
94 |
|
95 |
SFT and Chat Data generation is only supported with Hugging Face Inference Endpoints , and you can set the following environment variables use it with models other than Llama3 and Qwen2.
|
96 |
|
examples/vllm_deployment.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# pip install synthetic-dataset-generator
|
2 |
+
# vllm serve Qwen/Qwen2.5-1.5B-Instruct
|
3 |
+
import os
|
4 |
+
|
5 |
+
from synthetic_dataset_generator import launch
|
6 |
+
|
7 |
+
# os.environ["HF_TOKEN"] = "hf_..." # push the data to huggingface
|
8 |
+
os.environ["VLLM_BASE_URL"] = "http://127.0.0.1:8000/" # vllm base url
|
9 |
+
os.environ["MODEL"] = "Qwen/Qwen2.5-1.5B-Instruct" # model id
|
10 |
+
os.environ["TOKENIZER_ID"] = "Qwen/Qwen2.5-1.5B-Instruct" # tokenizer id
|
11 |
+
os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "qwen2"
|
12 |
+
os.environ["MAX_NUM_ROWS"] = "10000"
|
13 |
+
os.environ["DEFAULT_BATCH_SIZE"] = "2"
|
14 |
+
os.environ["MAX_NUM_TOKENS"] = "1024"
|
15 |
+
|
16 |
+
launch()
|
src/synthetic_dataset_generator/constants.py
CHANGED
@@ -18,23 +18,28 @@ TOKENIZER_ID = os.getenv(key="TOKENIZER_ID", default=None)
|
|
18 |
OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
19 |
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL")
|
20 |
HUGGINGFACE_BASE_URL = os.getenv("HUGGINGFACE_BASE_URL")
|
|
|
|
|
|
|
21 |
if HUGGINGFACE_BASE_URL and MODEL:
|
22 |
raise ValueError(
|
23 |
"`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
|
24 |
)
|
25 |
if not MODEL:
|
26 |
-
if OPENAI_BASE_URL or OLLAMA_BASE_URL:
|
27 |
raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
|
28 |
|
29 |
# Check if multiple base URLs are provided
|
30 |
base_urls = [
|
31 |
-
url
|
|
|
|
|
32 |
]
|
33 |
if len(base_urls) > 1:
|
34 |
raise ValueError(
|
35 |
f"Multiple base URLs provided: {', '.join(base_urls)}. Only one base URL can be set at a time."
|
36 |
)
|
37 |
-
BASE_URL = OPENAI_BASE_URL or OLLAMA_BASE_URL or HUGGINGFACE_BASE_URL
|
38 |
|
39 |
|
40 |
# API Keys
|
|
|
18 |
OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
|
19 |
OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL")
|
20 |
HUGGINGFACE_BASE_URL = os.getenv("HUGGINGFACE_BASE_URL")
|
21 |
+
VLLM_BASE_URL = os.getenv("VLLM_BASE_URL")
|
22 |
+
|
23 |
+
# check if model is set correctly
|
24 |
if HUGGINGFACE_BASE_URL and MODEL:
|
25 |
raise ValueError(
|
26 |
"`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
|
27 |
)
|
28 |
if not MODEL:
|
29 |
+
if OPENAI_BASE_URL or OLLAMA_BASE_URL or VLLM_BASE_URL:
|
30 |
raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
|
31 |
|
32 |
# Check if multiple base URLs are provided
|
33 |
base_urls = [
|
34 |
+
url
|
35 |
+
for url in [OPENAI_BASE_URL, OLLAMA_BASE_URL, HUGGINGFACE_BASE_URL, VLLM_BASE_URL]
|
36 |
+
if url
|
37 |
]
|
38 |
if len(base_urls) > 1:
|
39 |
raise ValueError(
|
40 |
f"Multiple base URLs provided: {', '.join(base_urls)}. Only one base URL can be set at a time."
|
41 |
)
|
42 |
+
BASE_URL = OPENAI_BASE_URL or OLLAMA_BASE_URL or HUGGINGFACE_BASE_URL or VLLM_BASE_URL
|
43 |
|
44 |
|
45 |
# API Keys
|
src/synthetic_dataset_generator/pipelines/base.py
CHANGED
@@ -2,7 +2,7 @@ import math
|
|
2 |
import random
|
3 |
|
4 |
import gradio as gr
|
5 |
-
from distilabel.llms import InferenceEndpointsLLM, OllamaLLM, OpenAILLM
|
6 |
from distilabel.steps.tasks import TextGeneration
|
7 |
|
8 |
from synthetic_dataset_generator.constants import (
|
@@ -14,6 +14,7 @@ from synthetic_dataset_generator.constants import (
|
|
14 |
OLLAMA_BASE_URL,
|
15 |
OPENAI_BASE_URL,
|
16 |
TOKENIZER_ID,
|
|
|
17 |
)
|
18 |
|
19 |
TOKEN_INDEX = 0
|
@@ -109,6 +110,17 @@ def _get_llm(use_magpie_template=False, **kwargs):
|
|
109 |
tokenizer_id=TOKENIZER_ID or MODEL,
|
110 |
**kwargs,
|
111 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
else:
|
113 |
llm = InferenceEndpointsLLM(
|
114 |
api_key=_get_next_api_key(),
|
|
|
2 |
import random
|
3 |
|
4 |
import gradio as gr
|
5 |
+
from distilabel.llms import ClientvLLM, InferenceEndpointsLLM, OllamaLLM, OpenAILLM
|
6 |
from distilabel.steps.tasks import TextGeneration
|
7 |
|
8 |
from synthetic_dataset_generator.constants import (
|
|
|
14 |
OLLAMA_BASE_URL,
|
15 |
OPENAI_BASE_URL,
|
16 |
TOKENIZER_ID,
|
17 |
+
VLLM_BASE_URL,
|
18 |
)
|
19 |
|
20 |
TOKEN_INDEX = 0
|
|
|
110 |
tokenizer_id=TOKENIZER_ID or MODEL,
|
111 |
**kwargs,
|
112 |
)
|
113 |
+
elif VLLM_BASE_URL:
|
114 |
+
if "generation_kwargs" in kwargs:
|
115 |
+
if "do_sample" in kwargs["generation_kwargs"]:
|
116 |
+
del kwargs["generation_kwargs"]["do_sample"]
|
117 |
+
llm = ClientvLLM(
|
118 |
+
base_url=VLLM_BASE_URL,
|
119 |
+
model=MODEL,
|
120 |
+
tokenizer=TOKENIZER_ID or MODEL,
|
121 |
+
api_key=_get_next_api_key(),
|
122 |
+
**kwargs,
|
123 |
+
)
|
124 |
else:
|
125 |
llm = InferenceEndpointsLLM(
|
126 |
api_key=_get_next_api_key(),
|