llm-metaeval / evals /tasks /mmlu_en_us.yaml
flunardelli's picture
Upload folder using huggingface_hub
b8ee329 verified
raw
history blame
508 Bytes
task: mmlu_all
dataset_path: cais/mmlu
dataset_name: all
description: "MMLU dataset"
test_split: test
fewshot_split: dev
fewshot_config:
sampler: first_n
output_type: multiple_choice
doc_to_text: "{{question.strip()}}
A. {{choices[0]}}
B. {{choices[1]}}
C. {{choices[2]}}
D. {{choices[3]}}
Answer:"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_target: answer
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true