daqc commited on
Commit
9cfb192
·
1 Parent(s): 4691368

Refactor environment templates for improved configuration flexibility

Browse files
.env.template → .env.local.template RENAMED
@@ -1,103 +1,54 @@
1
  # =============================================================================
2
- # REQUIRED CONFIGURATION
3
  # =============================================================================
4
- # Hugging Face token with read/write permissions for repositories and inference API
5
- # Get it from: https://huggingface.co/settings/tokens
6
- HF_TOKEN=hg_...
7
 
8
  # -----------------------------------------------------------------------------
9
- # GENERATION SETTINGS
10
  # -----------------------------------------------------------------------------
 
 
 
 
11
  MAX_NUM_TOKENS=2048
12
  MAX_NUM_ROWS=1000
13
  DEFAULT_BATCH_SIZE=5
14
 
15
  # Required for chat data generation with Llama or Qwen models
16
  # Options: "llama3", "qwen2", or custom template string
17
- #MAGPIE_PRE_QUERY_TEMPLATE=qwen2
18
-
19
-
20
-
21
- # =============================================================================
22
- # MODEL & SERVICES CONFIGURATION
23
- # =============================================================================
24
 
25
  # -----------------------------------------------------------------------------
26
- # A. STANDALONE SETUP (No additional installation required)
27
  # -----------------------------------------------------------------------------
28
 
29
- # 1. HUGGING FACE SERVERLESS (Recommended default)
30
- # Just requires HF_TOKEN
31
- # MODEL=meta-llama/Llama-3.1-8B-Instruct
32
  # MODEL=Qwen/Qwen2.5-1.5B-Instruct
33
 
34
- # 2. ARGILLA ON HUGGING FACE SPACES (Recommended for data annotation)
35
- # ARGILLA_API_URL=https://daqc-my-argilla.hf.space/
36
- #ARGILLA_API_KEY=
37
-
38
- # 3. OPENAI API
39
- # Requires OpenAI API key
40
  # OPENAI_BASE_URL=https://api.openai.com/v1/
41
  # MODEL=gpt-4
42
- # API_KEY=
 
 
 
 
43
 
44
  # -----------------------------------------------------------------------------
45
- # B. LOCAL SETUP (Requires local installation)
46
  # -----------------------------------------------------------------------------
47
 
48
  # 1. LOCAL OLLAMA
49
- # Requires: Ollama installed (https://ollama.ai)
50
- #OLLAMA_BASE_URL=http://127.0.0.1:11434/
51
- #MODEL=qwen2.5:32b-instruct-q5_K_S
52
- #TOKENIZER_ID=Qwen/Qwen2.5-32B-Instruct
53
-
54
- # MODEL=deepseek-r1:1.5b
55
- # TOKENIZER_ID=deepseek-r1:1.5b
56
-
57
-
58
 
59
  # 2. LOCAL VLLM
60
- # Requires: VLLM installed
61
  # VLLM_BASE_URL=http://127.0.0.1:8000/
62
  # MODEL=Qwen/Qwen2.5-1.5B-Instruct
63
  # TOKENIZER_ID=Qwen/Qwen2.5-1.5B-Instruct
64
 
65
- # 3. LOCAL TGI/ENDPOINTS
66
- # Requires: Text Generation Inference installed
67
  # HUGGINGFACE_BASE_URL=http://127.0.0.1:3000/
 
68
  # TOKENIZER_ID=meta-llama/Llama-3.1-8B-Instruct
69
-
70
-
71
- # -----------------------------------------------------------------------------
72
- # C. DOCKER SETUP (Ready to use with docker-compose, recommended for full setup)
73
- # -----------------------------------------------------------------------------
74
-
75
- # 1. DOCKER OLLAMA
76
- OLLAMA_BASE_URL=http://ollama:11434
77
- # Options for OLLAMA_HARDWARE: latest (for CPU/NVIDIA), rocm (for AMD)
78
- OLLAMA_HARDWARE=latest
79
-
80
-
81
- # DEEPSEEK R1
82
- #MODEL=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
83
- #TOKENIZER_ID=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
84
- #MAGPIE_PRE_QUERY_TEMPLATE= "<|begin▁of▁sentence|>User: " # use the custom template for the model
85
-
86
- #LLAMA3.2
87
- MODEL=llama3.2:1b # model for instruction generation
88
- TOKENIZER_ID=meta-llama/Llama-3.2-1B-Instruct # tokenizer for instruction generation
89
- MAGPIE_PRE_QUERY_TEMPLATE=llama3 # magpie template required for instruction generation
90
-
91
-
92
- # 2. DOCKER ARGILLA (persistent data)
93
- ARGILLA_API_URL=http://argilla:6900
94
- ARGILLA_USERNAME=admin
95
- ARGILLA_PASSWORD=admin1234
96
- ARGILLA_API_KEY=admin.1234
97
- ARGILLA_REINDEX_DATASET=1
98
-
99
- # Usage:
100
- #docker-compose --profile with-ollama --profile with-argilla build
101
- #(open new terminal) docker-compose --profile with-ollama up -d
102
- # docker-compose exec ollama ollama run llama3.2:1b
103
- #docker-compose --profile with-ollama --profile with-argilla up -d
 
1
  # =============================================================================
2
+ # LOCAL/API CONFIGURATION
3
  # =============================================================================
 
 
 
4
 
5
  # -----------------------------------------------------------------------------
6
+ # REQUIRED CONFIGURATION
7
  # -----------------------------------------------------------------------------
8
+ # Hugging Face token (required for all setups)
9
+ HF_TOKEN=hf_...
10
+
11
+ # Generation Settings
12
  MAX_NUM_TOKENS=2048
13
  MAX_NUM_ROWS=1000
14
  DEFAULT_BATCH_SIZE=5
15
 
16
  # Required for chat data generation with Llama or Qwen models
17
  # Options: "llama3", "qwen2", or custom template string
18
+ MAGPIE_PRE_QUERY_TEMPLATE=llama3
 
 
 
 
 
 
19
 
20
  # -----------------------------------------------------------------------------
21
+ # A. CLOUD API SERVICES
22
  # -----------------------------------------------------------------------------
23
 
24
+ # 1. HUGGING FACE INFERENCE API (Default, Recommended)
25
+ MODEL=meta-llama/Llama-3.1-8B-Instruct
 
26
  # MODEL=Qwen/Qwen2.5-1.5B-Instruct
27
 
28
+ # 2. OPENAI API
 
 
 
 
 
29
  # OPENAI_BASE_URL=https://api.openai.com/v1/
30
  # MODEL=gpt-4
31
+ # API_KEY=sk-...
32
+
33
+ # 3. HUGGING FACE SPACE FOR ARGILLA (optional)
34
+ # ARGILLA_API_URL=https://your-space.hf.space/
35
+ # ARGILLA_API_KEY=your_key
36
 
37
  # -----------------------------------------------------------------------------
38
+ # B. LOCAL SERVICES (Requires Installation)
39
  # -----------------------------------------------------------------------------
40
 
41
  # 1. LOCAL OLLAMA
42
+ # OLLAMA_BASE_URL=http://127.0.0.1:11434/
43
+ # MODEL=llama3.2:1b
44
+ # TOKENIZER_ID=meta-llama/Llama-3.2-1B-Instruct
 
 
 
 
 
 
45
 
46
  # 2. LOCAL VLLM
 
47
  # VLLM_BASE_URL=http://127.0.0.1:8000/
48
  # MODEL=Qwen/Qwen2.5-1.5B-Instruct
49
  # TOKENIZER_ID=Qwen/Qwen2.5-1.5B-Instruct
50
 
51
+ # 3. LOCAL TGI
 
52
  # HUGGINGFACE_BASE_URL=http://127.0.0.1:3000/
53
+ # MODEL=meta-llama/Llama-3.1-8B-Instruct
54
  # TOKENIZER_ID=meta-llama/Llama-3.1-8B-Instruct
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
docker/.env.docker.template ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # =============================================================================
2
+ # DOCKER CONFIGURATION ONLY - FULL SETUP (APP + OLLAMA + ARGILLA)
3
+ # =============================================================================
4
+
5
+ # Note: Before building:
6
+ # 1. Copy this template to the root directory: cp docker/.env.docker.template .env
7
+ # 2. Comment/uncomment the sections you want to use (OLLAMA and/or ARGILLA)
8
+ # 3. Then build and run with the appropriate docker compose command
9
+
10
+ # Hugging Face token with read/write permissions
11
+ HF_TOKEN=your_token_here
12
+
13
+ # -----------------------------------------------------------------------------
14
+ # GENERATION SETTINGS
15
+ # -----------------------------------------------------------------------------
16
+ MAX_NUM_TOKENS=2048
17
+ MAX_NUM_ROWS=1000
18
+ DEFAULT_BATCH_SIZE=5
19
+
20
+ # -----------------------------------------------------------------------------
21
+ # OLLAMA DOCKER CONFIGURATION
22
+ # -----------------------------------------------------------------------------
23
+ OLLAMA_BASE_URL=http://ollama:11434
24
+ OLLAMA_HARDWARE=latest
25
+
26
+ # LLAMA 3.2
27
+ MODEL=llama3.2:1b
28
+ TOKENIZER_ID=meta-llama/Llama-3.2-1B-Instruct
29
+ MAGPIE_PRE_QUERY_TEMPLATE=llama3
30
+
31
+ # DEEPSEEK R1
32
+ #MODEL=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
33
+ #TOKENIZER_ID=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
34
+ #MAGPIE_PRE_QUERY_TEMPLATE= "<|begin▁of▁sentence|>User: "
35
+
36
+ # -----------------------------------------------------------------------------
37
+ # ARGILLA DOCKER CONFIGURATION (persistent data)
38
+ # -----------------------------------------------------------------------------
39
+ ARGILLA_API_URL=http://argilla:6900
40
+ ARGILLA_USERNAME=admin
41
+ ARGILLA_PASSWORD=admin1234
42
+ ARGILLA_API_KEY=admin.1234
43
+ ARGILLA_REINDEX_DATASET=1