Spaces:

argilla
/

synthetic-data-generator

Running

App Files Files Community

daqc commited on 8 days ago

Commit

9cfb192

1 Parent(s): 4691368

Refactor environment templates for improved configuration flexibility

Browse files

Files changed (2) hide show

.env.template → .env.local.template +22 -71
docker/.env.docker.template +43 -0

.env.template → .env.local.template RENAMED Viewed

@@ -1,103 +1,54 @@
 # =============================================================================
-# REQUIRED CONFIGURATION
 # =============================================================================
-# Hugging Face token with read/write permissions for repositories and inference API
-# Get it from: https://huggingface.co/settings/tokens
-HF_TOKEN=hg_...
 # -----------------------------------------------------------------------------
-# GENERATION SETTINGS
 # -----------------------------------------------------------------------------
 MAX_NUM_TOKENS=2048
 MAX_NUM_ROWS=1000
 DEFAULT_BATCH_SIZE=5
 # Required for chat data generation with Llama or Qwen models
 # Options: "llama3", "qwen2", or custom template string
-#MAGPIE_PRE_QUERY_TEMPLATE=qwen2
-# =============================================================================
-# MODEL & SERVICES CONFIGURATION
-# =============================================================================
 # -----------------------------------------------------------------------------
-# A. STANDALONE SETUP (No additional installation required)
 # -----------------------------------------------------------------------------
-# 1. HUGGING FACE SERVERLESS (Recommended default)
-# Just requires HF_TOKEN
-# MODEL=meta-llama/Llama-3.1-8B-Instruct
 # MODEL=Qwen/Qwen2.5-1.5B-Instruct
-# 2. ARGILLA ON HUGGING FACE SPACES (Recommended for data annotation)
-# ARGILLA_API_URL=https://daqc-my-argilla.hf.space/
-#ARGILLA_API_KEY=
-# 3. OPENAI API
-# Requires OpenAI API key
 # OPENAI_BASE_URL=https://api.openai.com/v1/
 # MODEL=gpt-4
-# API_KEY=
 # -----------------------------------------------------------------------------
-# B. LOCAL SETUP (Requires local installation)
 # -----------------------------------------------------------------------------
 # 1. LOCAL OLLAMA
-# Requires: Ollama installed (https://ollama.ai)
-#OLLAMA_BASE_URL=http://127.0.0.1:11434/
-#MODEL=qwen2.5:32b-instruct-q5_K_S
-#TOKENIZER_ID=Qwen/Qwen2.5-32B-Instruct
-# MODEL=deepseek-r1:1.5b
-# TOKENIZER_ID=deepseek-r1:1.5b
 # 2. LOCAL VLLM
-# Requires: VLLM installed
 # VLLM_BASE_URL=http://127.0.0.1:8000/
 # MODEL=Qwen/Qwen2.5-1.5B-Instruct
 # TOKENIZER_ID=Qwen/Qwen2.5-1.5B-Instruct
-# 3. LOCAL TGI/ENDPOINTS
-# Requires: Text Generation Inference installed
 # HUGGINGFACE_BASE_URL=http://127.0.0.1:3000/
 # TOKENIZER_ID=meta-llama/Llama-3.1-8B-Instruct
-# -----------------------------------------------------------------------------
-# C. DOCKER SETUP (Ready to use with docker-compose, recommended for full setup)
-# -----------------------------------------------------------------------------
-# 1. DOCKER OLLAMA
-OLLAMA_BASE_URL=http://ollama:11434
-# Options for OLLAMA_HARDWARE: latest (for CPU/NVIDIA), rocm (for AMD)
-OLLAMA_HARDWARE=latest
-# DEEPSEEK R1
-#MODEL=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
-#TOKENIZER_ID=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
-#MAGPIE_PRE_QUERY_TEMPLATE= "<｜begin▁of▁sentence｜>User: " # use the custom template for the model
-#LLAMA3.2
-MODEL=llama3.2:1b # model for instruction generation
-TOKENIZER_ID=meta-llama/Llama-3.2-1B-Instruct # tokenizer for instruction generation
-MAGPIE_PRE_QUERY_TEMPLATE=llama3 # magpie template required for instruction generation
-# 2. DOCKER ARGILLA (persistent data)
-ARGILLA_API_URL=http://argilla:6900
-ARGILLA_USERNAME=admin
-ARGILLA_PASSWORD=admin1234
-ARGILLA_API_KEY=admin.1234
-ARGILLA_REINDEX_DATASET=1
-# Usage:
-#docker-compose --profile with-ollama --profile with-argilla build
-#(open new terminal) docker-compose --profile with-ollama up -d
-#                    docker-compose exec ollama ollama run llama3.2:1b
-#docker-compose --profile with-ollama --profile with-argilla up -d

 # =============================================================================
+# LOCAL/API CONFIGURATION
 # =============================================================================
 # -----------------------------------------------------------------------------
+# REQUIRED CONFIGURATION
 # -----------------------------------------------------------------------------
+# Hugging Face token (required for all setups)
+HF_TOKEN=hf_...
+# Generation Settings
 MAX_NUM_TOKENS=2048
 MAX_NUM_ROWS=1000
 DEFAULT_BATCH_SIZE=5
 # Required for chat data generation with Llama or Qwen models
 # Options: "llama3", "qwen2", or custom template string
+MAGPIE_PRE_QUERY_TEMPLATE=llama3
 # -----------------------------------------------------------------------------
+# A. CLOUD API SERVICES
 # -----------------------------------------------------------------------------
+# 1. HUGGING FACE INFERENCE API (Default, Recommended)
+MODEL=meta-llama/Llama-3.1-8B-Instruct
 # MODEL=Qwen/Qwen2.5-1.5B-Instruct
+# 2. OPENAI API
 # OPENAI_BASE_URL=https://api.openai.com/v1/
 # MODEL=gpt-4
+# API_KEY=sk-...
+# 3. HUGGING FACE SPACE FOR ARGILLA (optional)
+# ARGILLA_API_URL=https://your-space.hf.space/
+# ARGILLA_API_KEY=your_key
 # -----------------------------------------------------------------------------
+# B. LOCAL SERVICES (Requires Installation)
 # -----------------------------------------------------------------------------
 # 1. LOCAL OLLAMA
+# OLLAMA_BASE_URL=http://127.0.0.1:11434/
+# MODEL=llama3.2:1b
+# TOKENIZER_ID=meta-llama/Llama-3.2-1B-Instruct
 # 2. LOCAL VLLM
 # VLLM_BASE_URL=http://127.0.0.1:8000/
 # MODEL=Qwen/Qwen2.5-1.5B-Instruct
 # TOKENIZER_ID=Qwen/Qwen2.5-1.5B-Instruct
+# 3. LOCAL TGI
 # HUGGINGFACE_BASE_URL=http://127.0.0.1:3000/
+# MODEL=meta-llama/Llama-3.1-8B-Instruct
 # TOKENIZER_ID=meta-llama/Llama-3.1-8B-Instruct

docker/.env.docker.template ADDED Viewed

	@@ -0,0 +1,43 @@

+# =============================================================================
+# DOCKER CONFIGURATION ONLY - FULL SETUP (APP + OLLAMA + ARGILLA)
+# =============================================================================
+# Note: Before building:
+# 1. Copy this template to the root directory: cp docker/.env.docker.template .env
+# 2. Comment/uncomment the sections you want to use (OLLAMA and/or ARGILLA)
+# 3. Then build and run with the appropriate docker compose command
+# Hugging Face token with read/write permissions
+HF_TOKEN=your_token_here
+# -----------------------------------------------------------------------------
+# GENERATION SETTINGS
+# -----------------------------------------------------------------------------
+MAX_NUM_TOKENS=2048
+MAX_NUM_ROWS=1000
+DEFAULT_BATCH_SIZE=5
+# -----------------------------------------------------------------------------
+# OLLAMA DOCKER CONFIGURATION
+# -----------------------------------------------------------------------------
+OLLAMA_BASE_URL=http://ollama:11434
+OLLAMA_HARDWARE=latest
+# LLAMA 3.2
+MODEL=llama3.2:1b
+TOKENIZER_ID=meta-llama/Llama-3.2-1B-Instruct
+MAGPIE_PRE_QUERY_TEMPLATE=llama3
+# DEEPSEEK R1
+#MODEL=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+#TOKENIZER_ID=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+#MAGPIE_PRE_QUERY_TEMPLATE= "<｜begin▁of▁sentence｜>User: "
+# -----------------------------------------------------------------------------
+# ARGILLA DOCKER CONFIGURATION (persistent data)
+# -----------------------------------------------------------------------------
+ARGILLA_API_URL=http://argilla:6900
+ARGILLA_USERNAME=admin
+ARGILLA_PASSWORD=admin1234
+ARGILLA_API_KEY=admin.1234
+ARGILLA_REINDEX_DATASET=1