Spaces:
Runtime error
Runtime error
philipp-zettl
commited on
Commit
•
d4e5279
1
Parent(s):
238f239
Update app.py (#2)
Browse files- Update app.py (f20f8a73f1dfc56dfe1d3b6a56118e3fda0001e4)
- Add queue (f8a810646edd593f006aa61cc6f5915ea15c0928)
- Attempt adding seed + make optimization optional (f0e697bf3a2896eb938cfa08c0469cb16759892a)
app.py
CHANGED
@@ -185,9 +185,9 @@ def find_best_parameters(eval_data, model, tokenizer, max_length=85):
|
|
185 |
|
186 |
|
187 |
|
188 |
-
def run_model(inputs, tokenizer, model, num_beams=2, num_beam_groups=2, temperature=0.5, num_return_sequences=1, max_length=85):
|
189 |
all_outputs = []
|
190 |
-
torch.manual_seed(
|
191 |
for input_text in inputs:
|
192 |
model_inputs = tokenizer([input_text], max_length=512, padding=True, truncation=True)
|
193 |
input_ids = torch.tensor(model_inputs['input_ids']).to(device)
|
@@ -232,7 +232,7 @@ def run_model(inputs, tokenizer, model, num_beams=2, num_beam_groups=2, temperat
|
|
232 |
|
233 |
|
234 |
@spaces.GPU
|
235 |
-
def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_qg=1, num_return_sequences_qa=1, max_length=85):
|
236 |
inputs = [
|
237 |
f'context: {content}'
|
238 |
]
|
@@ -244,21 +244,24 @@ def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_q
|
|
244 |
num_beam_groups=num_return_sequences_qg,
|
245 |
temperature=temperature_qg,
|
246 |
num_return_sequences=num_return_sequences_qg,
|
247 |
-
max_length=max_length
|
|
|
248 |
)
|
249 |
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
|
|
|
|
262 |
|
263 |
inputs = list(chain.from_iterable([
|
264 |
[f'question: {q} context: {content}' for q in q_set] for q_set in question
|
@@ -271,7 +274,8 @@ def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_q
|
|
271 |
num_beam_groups=num_return_sequences_qa,
|
272 |
temperature=temperature_qa,
|
273 |
num_return_sequences=num_return_sequences_qa,
|
274 |
-
max_length=max_length
|
|
|
275 |
)
|
276 |
|
277 |
questions = list(chain.from_iterable(question))
|
@@ -305,6 +309,30 @@ def create_file_download(qnas):
|
|
305 |
|
306 |
|
307 |
with gr.Blocks(css='.hidden_input {display: none;}') as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
308 |
with gr.Row(equal_height=True):
|
309 |
with gr.Group("Content"):
|
310 |
content = gr.Textbox(label='Content', lines=15, placeholder='Enter text here', max_lines=10_000)
|
@@ -314,6 +342,8 @@ with gr.Blocks(css='.hidden_input {display: none;}') as demo:
|
|
314 |
max_length = gr.Number(label='Max Length', value=85, minimum=1, step=1, maximum=512)
|
315 |
num_return_sequences_qg = gr.Number(label='Number Questions', value=max_questions, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
|
316 |
num_return_sequences_qa = gr.Number(label="Number Answers", value=max_answers, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
|
|
|
|
|
317 |
|
318 |
with gr.Row():
|
319 |
gen_btn = gr.Button("Generate")
|
@@ -321,14 +351,14 @@ with gr.Blocks(css='.hidden_input {display: none;}') as demo:
|
|
321 |
@gr.render(
|
322 |
inputs=[
|
323 |
content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
|
324 |
-
max_length
|
325 |
],
|
326 |
triggers=[gen_btn.click]
|
327 |
)
|
328 |
def render_results(content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa, max_length):
|
329 |
qnas = gen(
|
330 |
content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
|
331 |
-
max_length
|
332 |
)
|
333 |
df = gr.Dataframe(
|
334 |
value=[u.values() for u in qnas],
|
@@ -342,4 +372,5 @@ with gr.Blocks(css='.hidden_input {display: none;}') as demo:
|
|
342 |
|
343 |
|
344 |
|
|
|
345 |
demo.launch()
|
|
|
185 |
|
186 |
|
187 |
|
188 |
+
def run_model(inputs, tokenizer, model, num_beams=2, num_beam_groups=2, temperature=0.5, num_return_sequences=1, max_length=85, seed=42069):
|
189 |
all_outputs = []
|
190 |
+
torch.manual_seed(seed)
|
191 |
for input_text in inputs:
|
192 |
model_inputs = tokenizer([input_text], max_length=512, padding=True, truncation=True)
|
193 |
input_ids = torch.tensor(model_inputs['input_ids']).to(device)
|
|
|
232 |
|
233 |
|
234 |
@spaces.GPU
|
235 |
+
def gen(content, temperature_qg=0.5, temperature_qa=0.75, num_return_sequences_qg=1, num_return_sequences_qa=1, max_length=85, seed=42069, optimize_questions=False):
|
236 |
inputs = [
|
237 |
f'context: {content}'
|
238 |
]
|
|
|
244 |
num_beam_groups=num_return_sequences_qg,
|
245 |
temperature=temperature_qg,
|
246 |
num_return_sequences=num_return_sequences_qg,
|
247 |
+
max_length=max_length,
|
248 |
+
seed=seed
|
249 |
)
|
250 |
|
251 |
+
if optimize_questions:
|
252 |
+
q_params = find_best_parameters(list(chain.from_iterable(question)), qg_model, tokenizer, max_length=max_length)
|
253 |
+
|
254 |
+
question = run_model(
|
255 |
+
inputs,
|
256 |
+
tokenizer,
|
257 |
+
qg_model,
|
258 |
+
num_beams=q_params[0],
|
259 |
+
num_beam_groups=q_params[1],
|
260 |
+
temperature=temperature_qg,
|
261 |
+
num_return_sequences=num_return_sequences_qg,
|
262 |
+
max_length=max_length,
|
263 |
+
seed=seed
|
264 |
+
)
|
265 |
|
266 |
inputs = list(chain.from_iterable([
|
267 |
[f'question: {q} context: {content}' for q in q_set] for q_set in question
|
|
|
274 |
num_beam_groups=num_return_sequences_qa,
|
275 |
temperature=temperature_qa,
|
276 |
num_return_sequences=num_return_sequences_qa,
|
277 |
+
max_length=max_length,
|
278 |
+
seed=seed
|
279 |
)
|
280 |
|
281 |
questions = list(chain.from_iterable(question))
|
|
|
309 |
|
310 |
|
311 |
with gr.Blocks(css='.hidden_input {display: none;}') as demo:
|
312 |
+
with gr.Row(equal_height=True):
|
313 |
+
gr.Markdown(
|
314 |
+
"""
|
315 |
+
# QA-Generator
|
316 |
+
A combination of fine-tuned flan-T5(-small) models chained into sequence
|
317 |
+
to generate:
|
318 |
+
|
319 |
+
A) a versatile set of questions
|
320 |
+
B) an accurate set of matching answers
|
321 |
+
|
322 |
+
according to a given piece of text content.
|
323 |
+
|
324 |
+
The idea is simple:
|
325 |
+
|
326 |
+
1. Add your content
|
327 |
+
2. Select the amount of questions you want to generate
|
328 |
+
2.2 (optional) Select the amount of answers you want to generate per goven question
|
329 |
+
3. Press generate
|
330 |
+
4. ???
|
331 |
+
5. Profit
|
332 |
+
|
333 |
+
If you're satisfied with the generated data set, you can export it as TSV
|
334 |
+
to edit or import it into your favourite tool.
|
335 |
+
""")
|
336 |
with gr.Row(equal_height=True):
|
337 |
with gr.Group("Content"):
|
338 |
content = gr.Textbox(label='Content', lines=15, placeholder='Enter text here', max_lines=10_000)
|
|
|
342 |
max_length = gr.Number(label='Max Length', value=85, minimum=1, step=1, maximum=512)
|
343 |
num_return_sequences_qg = gr.Number(label='Number Questions', value=max_questions, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
|
344 |
num_return_sequences_qa = gr.Number(label="Number Answers", value=max_answers, minimum=1, step=1, maximum=max(max_questions, max_elem_value))
|
345 |
+
seed = gr.Number(label="seed", value=42069)
|
346 |
+
optimize_questions = gr.Checkbox(label="Optimize questions?", value=False)
|
347 |
|
348 |
with gr.Row():
|
349 |
gen_btn = gr.Button("Generate")
|
|
|
351 |
@gr.render(
|
352 |
inputs=[
|
353 |
content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
|
354 |
+
max_length, seed, optimize_questions
|
355 |
],
|
356 |
triggers=[gen_btn.click]
|
357 |
)
|
358 |
def render_results(content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa, max_length):
|
359 |
qnas = gen(
|
360 |
content, temperature_qg, temperature_qa, num_return_sequences_qg, num_return_sequences_qa,
|
361 |
+
max_length, seed, optimize_questions
|
362 |
)
|
363 |
df = gr.Dataframe(
|
364 |
value=[u.values() for u in qnas],
|
|
|
372 |
|
373 |
|
374 |
|
375 |
+
demo.queue()
|
376 |
demo.launch()
|