Spaces:

argilla
/

synthetic-data-generator

Running

davidberenstein1957 HF staff commited on Jan 5

Commit

8dfc799

1 Parent(s): 2d84a88

add vllm deployment info

Files changed (4) hide show

README.md CHANGED Viewed

@@ -89,6 +89,8 @@ Optionally, you can use different API providers and models.
 - `OPENAI_BASE_URL`: The base URL for any OpenAI compatible API, e.g. `https://api.openai.com/v1/`.
 - `OLLAMA_BASE_URL`: The base URL for any Ollama compatible API, e.g. `http://127.0.0.1:11434/`.
 - `HUGGINGFACE_BASE_URL`: The base URL for any Hugging Face compatible API, e.g. TGI server or Dedicated Inference Endpoints. If you want to use serverless inference, only set the `MODEL`.
 SFT and Chat Data generation is only supported with Hugging Face Inference Endpoints , and you can set the following environment variables use it with models other than Llama3 and Qwen2.

 - `OPENAI_BASE_URL`: The base URL for any OpenAI compatible API, e.g. `https://api.openai.com/v1/`.
 - `OLLAMA_BASE_URL`: The base URL for any Ollama compatible API, e.g. `http://127.0.0.1:11434/`.
 - `HUGGINGFACE_BASE_URL`: The base URL for any Hugging Face compatible API, e.g. TGI server or Dedicated Inference Endpoints. If you want to use serverless inference, only set the `MODEL`.
+- `VLLM_BASE_URL`: The base URL for any VLLM compatible API, e.g. `http://localhost:8000/`.
 SFT and Chat Data generation is only supported with Hugging Face Inference Endpoints , and you can set the following environment variables use it with models other than Llama3 and Qwen2.

examples/vllm_deployment.py ADDED Viewed

+# pip install synthetic-dataset-generator
+# vllm serve Qwen/Qwen2.5-1.5B-Instruct
+import os
+from synthetic_dataset_generator import launch
+# os.environ["HF_TOKEN"] = "hf_..."  # push the data to huggingface
+os.environ["VLLM_BASE_URL"] = "http://127.0.0.1:8000/"  # vllm base url
+os.environ["MODEL"] = "Qwen/Qwen2.5-1.5B-Instruct"  # model id
+os.environ["TOKENIZER_ID"] = "Qwen/Qwen2.5-1.5B-Instruct"  # tokenizer id
+os.environ["MAGPIE_PRE_QUERY_TEMPLATE"] = "qwen2"
+os.environ["MAX_NUM_ROWS"] = "10000"
+os.environ["DEFAULT_BATCH_SIZE"] = "2"
+os.environ["MAX_NUM_TOKENS"] = "1024"
+launch()

src/synthetic_dataset_generator/constants.py CHANGED Viewed

@@ -18,23 +18,28 @@ TOKENIZER_ID = os.getenv(key="TOKENIZER_ID", default=None)
 OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
 OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL")
 HUGGINGFACE_BASE_URL = os.getenv("HUGGINGFACE_BASE_URL")
 if HUGGINGFACE_BASE_URL and MODEL:
     raise ValueError(
         "`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
     )
 if not MODEL:
-    if OPENAI_BASE_URL or OLLAMA_BASE_URL:
         raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
 # Check if multiple base URLs are provided
 base_urls = [
-    url for url in [OPENAI_BASE_URL, OLLAMA_BASE_URL, HUGGINGFACE_BASE_URL] if url
 ]
 if len(base_urls) > 1:
     raise ValueError(
         f"Multiple base URLs provided: {', '.join(base_urls)}. Only one base URL can be set at a time."
     )
-BASE_URL = OPENAI_BASE_URL or OLLAMA_BASE_URL or HUGGINGFACE_BASE_URL
 # API Keys

 OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL")
 OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL")
 HUGGINGFACE_BASE_URL = os.getenv("HUGGINGFACE_BASE_URL")
+VLLM_BASE_URL = os.getenv("VLLM_BASE_URL")
+# check if model is set correctly
 if HUGGINGFACE_BASE_URL and MODEL:
     raise ValueError(
         "`HUGGINGFACE_BASE_URL` and `MODEL` cannot be set at the same time. Use a model id for serverless inference and a base URL dedicated to Hugging Face Inference Endpoints."
     )
 if not MODEL:
+    if OPENAI_BASE_URL or OLLAMA_BASE_URL or VLLM_BASE_URL:
         raise ValueError("`MODEL` is not set. Please provide a model id for inference.")
 # Check if multiple base URLs are provided
 base_urls = [
+    url
+    for url in [OPENAI_BASE_URL, OLLAMA_BASE_URL, HUGGINGFACE_BASE_URL, VLLM_BASE_URL]
+    if url
 ]
 if len(base_urls) > 1:
     raise ValueError(
         f"Multiple base URLs provided: {', '.join(base_urls)}. Only one base URL can be set at a time."
     )
+BASE_URL = OPENAI_BASE_URL or OLLAMA_BASE_URL or HUGGINGFACE_BASE_URL or VLLM_BASE_URL
 # API Keys

src/synthetic_dataset_generator/pipelines/base.py CHANGED Viewed

@@ -2,7 +2,7 @@ import math
 import random
 import gradio as gr
-from distilabel.llms import InferenceEndpointsLLM, OllamaLLM, OpenAILLM
 from distilabel.steps.tasks import TextGeneration
 from synthetic_dataset_generator.constants import (
@@ -14,6 +14,7 @@ from synthetic_dataset_generator.constants import (
     OLLAMA_BASE_URL,
     OPENAI_BASE_URL,
     TOKENIZER_ID,
 )
 TOKEN_INDEX = 0
@@ -109,6 +110,17 @@ def _get_llm(use_magpie_template=False, **kwargs):
             tokenizer_id=TOKENIZER_ID or MODEL,
             **kwargs,
         )
     else:
         llm = InferenceEndpointsLLM(
             api_key=_get_next_api_key(),

 import random
 import gradio as gr
+from distilabel.llms import ClientvLLM, InferenceEndpointsLLM, OllamaLLM, OpenAILLM
 from distilabel.steps.tasks import TextGeneration
 from synthetic_dataset_generator.constants import (
     OLLAMA_BASE_URL,
     OPENAI_BASE_URL,
     TOKENIZER_ID,
+    VLLM_BASE_URL,
 )
 TOKEN_INDEX = 0
             tokenizer_id=TOKENIZER_ID or MODEL,
             **kwargs,
         )
+    elif VLLM_BASE_URL:
+        if "generation_kwargs" in kwargs:
+            if "do_sample" in kwargs["generation_kwargs"]:
+                del kwargs["generation_kwargs"]["do_sample"]
+        llm = ClientvLLM(
+            base_url=VLLM_BASE_URL,
+            model=MODEL,
+            tokenizer=TOKENIZER_ID or MODEL,
+            api_key=_get_next_api_key(),
+            **kwargs,
+        )
     else:
         llm = InferenceEndpointsLLM(
             api_key=_get_next_api_key(),