Spaces:
Running
Running
Default Values
Browse files
app.py
CHANGED
@@ -28,8 +28,6 @@ def generate_synthetic_dataset(
|
|
28 |
temperature,
|
29 |
top_p,
|
30 |
max_tokens,
|
31 |
-
api_base,
|
32 |
-
api_key,
|
33 |
dataset_type,
|
34 |
topic,
|
35 |
domains,
|
@@ -48,8 +46,6 @@ def generate_synthetic_dataset(
|
|
48 |
temperature (float): The temperature for the LLM.
|
49 |
top_p (float): The top_p value for the LLM.
|
50 |
max_tokens (int): The maximum number of tokens for the LLM.
|
51 |
-
api_base (str): The API base URL.
|
52 |
-
api_key (str): The API key.
|
53 |
dataset_type (str): The type of dataset to generate.
|
54 |
topic (str): The topic of the dataset.
|
55 |
domains (str): The domains for the dataset.
|
@@ -86,22 +82,13 @@ def generate_synthetic_dataset(
|
|
86 |
):
|
87 |
return "All fields except API Base and API Key must be filled."
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
api_key=api_key,
|
97 |
-
)
|
98 |
-
else:
|
99 |
-
llm_config = LLMConfig(
|
100 |
-
model=llm_model,
|
101 |
-
temperature=temperature,
|
102 |
-
top_p=top_p,
|
103 |
-
max_tokens=max_tokens,
|
104 |
-
)
|
105 |
|
106 |
dataset_config = DatasetConfig(
|
107 |
topic=topic,
|
@@ -168,7 +155,7 @@ def ui_main():
|
|
168 |
|
169 |
with gr.Row():
|
170 |
llm_model = gr.Textbox(
|
171 |
-
label="LLM Model", placeholder="model_provider/model_name"
|
172 |
)
|
173 |
temperature = gr.Slider(
|
174 |
label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.5
|
@@ -177,10 +164,6 @@ def ui_main():
|
|
177 |
label="Top P", minimum=0.0, maximum=1.0, step=0.1, value=0.9
|
178 |
)
|
179 |
max_tokens = gr.Number(label="Max Tokens", value=2048)
|
180 |
-
api_base = gr.Textbox(label="API Base", placeholder="API Base - Optional")
|
181 |
-
api_key = gr.Textbox(
|
182 |
-
label="API Key", placeholder="Your API Key - Optional", type="password"
|
183 |
-
)
|
184 |
|
185 |
with gr.Row():
|
186 |
dataset_type = gr.Dropdown(
|
@@ -194,15 +177,15 @@ def ui_main():
|
|
194 |
"Text Classification",
|
195 |
],
|
196 |
)
|
197 |
-
topic = gr.Textbox(label="Topic", placeholder="Dataset topic")
|
198 |
-
domains = gr.Textbox(label="Domains", placeholder="Comma-separated domains")
|
199 |
language = gr.Textbox(
|
200 |
label="Language", placeholder="Language", value="English"
|
201 |
)
|
202 |
additional_description = gr.Textbox(
|
203 |
label="Additional Description",
|
204 |
placeholder="Additional description",
|
205 |
-
value="",
|
206 |
)
|
207 |
num_entries = gr.Number(label="Number of Entries", value=1000)
|
208 |
|
@@ -211,17 +194,17 @@ def ui_main():
|
|
211 |
label="Hugging Face Token",
|
212 |
placeholder="Your HF Token",
|
213 |
type="password",
|
214 |
-
value=
|
215 |
)
|
216 |
hf_repo_name = gr.Textbox(
|
217 |
label="Hugging Face Repo Name",
|
218 |
placeholder="organization_or_user_name/dataset_name",
|
219 |
-
value=
|
220 |
)
|
221 |
llm_env_vars = gr.Textbox(
|
222 |
label="LLM Environment Variables",
|
223 |
placeholder="Comma-separated environment variables (e.g., KEY1=VALUE1, KEY2=VALUE2)",
|
224 |
-
value=
|
225 |
)
|
226 |
|
227 |
generate_button = gr.Button("Generate Dataset")
|
@@ -234,8 +217,6 @@ def ui_main():
|
|
234 |
temperature,
|
235 |
top_p,
|
236 |
max_tokens,
|
237 |
-
api_base,
|
238 |
-
api_key,
|
239 |
dataset_type,
|
240 |
topic,
|
241 |
domains,
|
|
|
28 |
temperature,
|
29 |
top_p,
|
30 |
max_tokens,
|
|
|
|
|
31 |
dataset_type,
|
32 |
topic,
|
33 |
domains,
|
|
|
46 |
temperature (float): The temperature for the LLM.
|
47 |
top_p (float): The top_p value for the LLM.
|
48 |
max_tokens (int): The maximum number of tokens for the LLM.
|
|
|
|
|
49 |
dataset_type (str): The type of dataset to generate.
|
50 |
topic (str): The topic of the dataset.
|
51 |
domains (str): The domains for the dataset.
|
|
|
82 |
):
|
83 |
return "All fields except API Base and API Key must be filled."
|
84 |
|
85 |
+
|
86 |
+
llm_config = LLMConfig(
|
87 |
+
model=llm_model,
|
88 |
+
temperature=temperature,
|
89 |
+
top_p=top_p,
|
90 |
+
max_tokens=max_tokens,
|
91 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
dataset_config = DatasetConfig(
|
94 |
topic=topic,
|
|
|
155 |
|
156 |
with gr.Row():
|
157 |
llm_model = gr.Textbox(
|
158 |
+
label="LLM Model", placeholder="model_provider/model_name", value="huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct"
|
159 |
)
|
160 |
temperature = gr.Slider(
|
161 |
label="Temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.5
|
|
|
164 |
label="Top P", minimum=0.0, maximum=1.0, step=0.1, value=0.9
|
165 |
)
|
166 |
max_tokens = gr.Number(label="Max Tokens", value=2048)
|
|
|
|
|
|
|
|
|
167 |
|
168 |
with gr.Row():
|
169 |
dataset_type = gr.Dropdown(
|
|
|
177 |
"Text Classification",
|
178 |
],
|
179 |
)
|
180 |
+
topic = gr.Textbox(label="Topic", placeholder="Dataset topic", value="Artificial Intelligence")
|
181 |
+
domains = gr.Textbox(label="Domains", placeholder="Comma-separated domains", value="Machine Learning, Deep Learning")
|
182 |
language = gr.Textbox(
|
183 |
label="Language", placeholder="Language", value="English"
|
184 |
)
|
185 |
additional_description = gr.Textbox(
|
186 |
label="Additional Description",
|
187 |
placeholder="Additional description",
|
188 |
+
value="This dataset must be more focused on healthcare implementations of AI, Machine Learning, and Deep Learning.",
|
189 |
)
|
190 |
num_entries = gr.Number(label="Number of Entries", value=1000)
|
191 |
|
|
|
194 |
label="Hugging Face Token",
|
195 |
placeholder="Your HF Token",
|
196 |
type="password",
|
197 |
+
value="hf_1234566789912345677889",
|
198 |
)
|
199 |
hf_repo_name = gr.Textbox(
|
200 |
label="Hugging Face Repo Name",
|
201 |
placeholder="organization_or_user_name/dataset_name",
|
202 |
+
value="shekswess/synthgenai-dataset",
|
203 |
)
|
204 |
llm_env_vars = gr.Textbox(
|
205 |
label="LLM Environment Variables",
|
206 |
placeholder="Comma-separated environment variables (e.g., KEY1=VALUE1, KEY2=VALUE2)",
|
207 |
+
value="HUGGINGFACE_API_KEY=hf_1234566789912345677889",
|
208 |
)
|
209 |
|
210 |
generate_button = gr.Button("Generate Dataset")
|
|
|
217 |
temperature,
|
218 |
top_p,
|
219 |
max_tokens,
|
|
|
|
|
220 |
dataset_type,
|
221 |
topic,
|
222 |
domains,
|