task: mmlu_all dataset_path: cais/mmlu dataset_name: all description: "MMLU dataset" test_split: test fewshot_split: dev fewshot_config: sampler: first_n output_type: multiple_choice doc_to_text: "{{question.strip()}} A. {{choices[0]}} B. {{choices[1]}} C. {{choices[2]}} D. {{choices[3]}} Answer:" doc_to_choice: ["A", "B", "C", "D"] doc_to_target: answer metric_list: - metric: acc aggregation: mean higher_is_better: true - metric: acc_norm aggregation: mean higher_is_better: true