task: mmlu_all | |
dataset_path: cais/mmlu | |
dataset_name: all | |
description: "MMLU dataset" | |
test_split: test | |
fewshot_split: dev | |
fewshot_config: | |
sampler: first_n | |
output_type: multiple_choice | |
doc_to_text: "{{question.strip()}} | |
A. {{choices[0]}} | |
B. {{choices[1]}} | |
C. {{choices[2]}} | |
D. {{choices[3]}} | |
Answer:" | |
doc_to_choice: ["A", "B", "C", "D"] | |
doc_to_target: answer | |
metric_list: | |
- metric: acc | |
aggregation: mean | |
higher_is_better: true | |
- metric: acc_norm | |
aggregation: mean | |
higher_is_better: true | |