agi_eval / app.py
Paul Alex
add accuracy
9030486
import requests, time, json, gradio as gr, pandas as pd, ast
def agi_answer(endpoint, instruction, question, temp, top_p, top_k, beams, max_tokens):
response = requests.post(f'{endpoint}/run/predict', json={
'data': [
instruction,
question,
temp,
top_p,
top_k,
beams,
max_tokens,
]
}).json()
return response['data'][0]
def eval_agi(endpoint, temp, top_p, top_k, beams, max_tokens):
test_df = pd.read_csv('mmlu_testdf.csv')
for index, value in test_df['input'].iteritems():
ans = agi_answer(endpoint, ast.literal_eval(value)[0]['content'], ast.literal_eval(value)[1]['content'], temp, top_p, top_k, beams, max_tokens)
test_df.loc[index, 'Answer_AGI'] = ans[:1]
test_df.loc[index, 'Answer_AGI_raw'] = ans
print(index, '/', test_df.shape[0])
time.sleep(0.001)
accuracy = (test_df['ideal'] == test_df['Answer_AGI']).sum() / len(test_df)
return [accuracy, test_df[['ideal', 'Answer_AGI']]]
demo = gr.Interface(fn=eval_agi,
inputs=[
gr.inputs.Textbox(default='https://191779ad955db5c67f.gradio.live', label='endpoint'),
gr.inputs.Slider(0, 1, label='temperature', default=0.1),
gr.inputs.Slider(0, 1, default=0.75, label='top p'),
gr.inputs.Slider(0, 100, default=40, label='top k'),
gr.inputs.Slider(0, 4, default=4, label='beams'),
gr.inputs.Slider(0, 2000, default=128, label='max tokens')
],
outputs=[
gr.outputs.Label(label="Accuracy"),
'dataframe'
])
demo.launch()