davidberenstein1957 HF staff commited on
Commit
8dfc799
·
1 Parent(s): 2d84a88

add vllm deployment info

Browse files
README.md CHANGED
@@ -89,6 +89,8 @@ Optionally, you can use different API providers and models.
89
  - `OPENAI_BASE_URL`: The base URL for any OpenAI compatible API, e.g. `https://api.openai.com/v1/`.
90
  - `OLLAMA_BASE_URL`: The base URL for any Ollama compatible API, e.g. `http://127.0.0.1:11434/`.
91
  - `HUGGINGFACE_BASE_URL`: The base URL for any Hugging Face compatible API, e.g. TGI server or Dedicated Inference Endpoints. If you want to use serverless inference, only set the `MODEL`.
 
 
92
 
93
  SFT and Chat Data generation is only supported with Hugging Face Inference Endpoints , and you can set the following environment variables use it with models other than Llama3 and Qwen2.
94
 
 
89
  - `OPENAI_BASE_URL`: The base URL for any OpenAI compatible API, e.g. `https://api.openai.com/v1/`.
90
  - `OLLAMA_BASE_URL`: The base URL for any Ollama compatible API, e.g. `http://127.0.0.1:11434/`.
91
  - `HUGGINGFACE_BASE_URL`: The base URL for any Hugging Face compatible API, e.g. TGI server or Dedicated Inference Endpoints. If you want to use serverless inference, only set the `MODEL`.
92
+ - `VLLM_BASE_URL`: The base URL for any VLLM compatible API, e.g. `http://localhost:8000/`.
93
+
94
 
95
  SFT and Chat Data generation is only supported with Hugging Face Inference Endpoints , and you can set the following environment variables use it with models other than Llama3 and Qwen2.
96
 
examples/vllm_deployment.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # pip install synthetic-dataset-generator
2
+ # vllm serve Qwen/Qwen2.5-1.5B-Instruct
3
+ import os
4
+
5
+ from synthetic_dataset_generator import launch
6
+
7
+ # os.environ["HF_TOKEN"] = "hf_..." # push the data to huggingface
8
+ os.environ["VLLM_BASE_URL"] = "http://127.0.0.1:8000/" # vllm base url
9
+ os.environ["MODEL"] = "Qwen/Qwen2.5-1.5B-Instruct" # model id
10
+ os.environ["TOKENIZER_ID"] = "Qwen/Qwen2.5-1.5B-Instruct" # tokenizer id
11
+ os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "qwen2"
12
+ os.environ["MAX_NUM_ROWS"] = "10000"
13
+ os.environ["DEFAULT_BATCH_SIZE"] = "2"
14
+ os.environ["MAX_NUM_TOKENS"] = "1024"
15
+
16
+ launch()
src/synthetic_dataset_generator/constants.py CHANGED
@@ -18,23 +18,28 @@ TOKENIZER_ID = os.getenv(key="TOKENIZER_ID", default=None)
18
  OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
19
  OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL")
20
  HUGGINGFACE_BASE_URL = os.getenv("HUGGINGFACE_BASE_URL")
 
 
 
21
  if HUGGINGFACE_BASE_URL and MODEL:
22
  raise ValueError(
23
  "`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
24
  )
25
  if not MODEL:
26
- if OPENAI_BASE_URL or OLLAMA_BASE_URL:
27
  raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
28
 
29
  # Check if multiple base URLs are provided
30
  base_urls = [
31
- url for url in [OPENAI_BASE_URL, OLLAMA_BASE_URL, HUGGINGFACE_BASE_URL] if url
 
 
32
  ]
33
  if len(base_urls) > 1:
34
  raise ValueError(
35
  f"Multiple base URLs provided: {', '.join(base_urls)}. Only one base URL can be set at a time."
36
  )
37
- BASE_URL = OPENAI_BASE_URL or OLLAMA_BASE_URL or HUGGINGFACE_BASE_URL
38
 
39
 
40
  # API Keys
 
18
  OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
19
  OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL")
20
  HUGGINGFACE_BASE_URL = os.getenv("HUGGINGFACE_BASE_URL")
21
+ VLLM_BASE_URL = os.getenv("VLLM_BASE_URL")
22
+
23
+ # check if model is set correctly
24
  if HUGGINGFACE_BASE_URL and MODEL:
25
  raise ValueError(
26
  "`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
27
  )
28
  if not MODEL:
29
+ if OPENAI_BASE_URL or OLLAMA_BASE_URL or VLLM_BASE_URL:
30
  raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
31
 
32
  # Check if multiple base URLs are provided
33
  base_urls = [
34
+ url
35
+ for url in [OPENAI_BASE_URL, OLLAMA_BASE_URL, HUGGINGFACE_BASE_URL, VLLM_BASE_URL]
36
+ if url
37
  ]
38
  if len(base_urls) > 1:
39
  raise ValueError(
40
  f"Multiple base URLs provided: {', '.join(base_urls)}. Only one base URL can be set at a time."
41
  )
42
+ BASE_URL = OPENAI_BASE_URL or OLLAMA_BASE_URL or HUGGINGFACE_BASE_URL or VLLM_BASE_URL
43
 
44
 
45
  # API Keys
src/synthetic_dataset_generator/pipelines/base.py CHANGED
@@ -2,7 +2,7 @@ import math
2
  import random
3
 
4
  import gradio as gr
5
- from distilabel.llms import InferenceEndpointsLLM, OllamaLLM, OpenAILLM
6
  from distilabel.steps.tasks import TextGeneration
7
 
8
  from synthetic_dataset_generator.constants import (
@@ -14,6 +14,7 @@ from synthetic_dataset_generator.constants import (
14
  OLLAMA_BASE_URL,
15
  OPENAI_BASE_URL,
16
  TOKENIZER_ID,
 
17
  )
18
 
19
  TOKEN_INDEX = 0
@@ -109,6 +110,17 @@ def _get_llm(use_magpie_template=False, **kwargs):
109
  tokenizer_id=TOKENIZER_ID or MODEL,
110
  **kwargs,
111
  )
 
 
 
 
 
 
 
 
 
 
 
112
  else:
113
  llm = InferenceEndpointsLLM(
114
  api_key=_get_next_api_key(),
 
2
  import random
3
 
4
  import gradio as gr
5
+ from distilabel.llms import ClientvLLM, InferenceEndpointsLLM, OllamaLLM, OpenAILLM
6
  from distilabel.steps.tasks import TextGeneration
7
 
8
  from synthetic_dataset_generator.constants import (
 
14
  OLLAMA_BASE_URL,
15
  OPENAI_BASE_URL,
16
  TOKENIZER_ID,
17
+ VLLM_BASE_URL,
18
  )
19
 
20
  TOKEN_INDEX = 0
 
110
  tokenizer_id=TOKENIZER_ID or MODEL,
111
  **kwargs,
112
  )
113
+ elif VLLM_BASE_URL:
114
+ if "generation_kwargs" in kwargs:
115
+ if "do_sample" in kwargs["generation_kwargs"]:
116
+ del kwargs["generation_kwargs"]["do_sample"]
117
+ llm = ClientvLLM(
118
+ base_url=VLLM_BASE_URL,
119
+ model=MODEL,
120
+ tokenizer=TOKENIZER_ID or MODEL,
121
+ api_key=_get_next_api_key(),
122
+ **kwargs,
123
+ )
124
  else:
125
  llm = InferenceEndpointsLLM(
126
  api_key=_get_next_api_key(),