MLLM_leaderboard / eval-results /PulsarAI /Nebula-7B /results_2023-10-09T12-29-36.965037.json
Wwwduojin's picture
Upload 1529 files
a312f2f
{
"config_general": {
"model_name": "PulsarAI/Nebula-7B",
"model_sha": "569f848698a468fb03d37033c67f3734bbaec127",
"model_size": "13.99 GB",
"model_dtype": "torch.float16",
"lighteval_sha": "0f318ecf002208468154899217b3ba7c6ae09374",
"num_few_shot_default": 0,
"num_fewshot_seeds": 1,
"override_batch_size": 1,
"max_samples": null,
"job_id": ""
},
"results": {
"harness|arc:challenge|25": {
"acc": 0.5418088737201365,
"acc_stderr": 0.0145602203087147,
"acc_norm": 0.5930034129692833,
"acc_norm_stderr": 0.014356399418009121
},
"harness|hellaswag|10": {
"acc": 0.6342362079267079,
"acc_stderr": 0.004806593424942265,
"acc_norm": 0.8345947022505477,
"acc_norm_stderr": 0.0037078660457296048
},
"harness|hendrycksTest-abstract_algebra|5": {
"acc": 0.34,
"acc_stderr": 0.04760952285695235,
"acc_norm": 0.34,
"acc_norm_stderr": 0.04760952285695235
},
"harness|hendrycksTest-anatomy|5": {
"acc": 0.5555555555555556,
"acc_stderr": 0.04292596718256981,
"acc_norm": 0.5555555555555556,
"acc_norm_stderr": 0.04292596718256981
},
"harness|hendrycksTest-astronomy|5": {
"acc": 0.5986842105263158,
"acc_stderr": 0.039889037033362836,
"acc_norm": 0.5986842105263158,
"acc_norm_stderr": 0.039889037033362836
},
"harness|hendrycksTest-business_ethics|5": {
"acc": 0.43,
"acc_stderr": 0.04975698519562428,
"acc_norm": 0.43,
"acc_norm_stderr": 0.04975698519562428
},
"harness|hendrycksTest-clinical_knowledge|5": {
"acc": 0.5773584905660377,
"acc_stderr": 0.03040233144576954,
"acc_norm": 0.5773584905660377,
"acc_norm_stderr": 0.03040233144576954
},
"harness|hendrycksTest-college_biology|5": {
"acc": 0.6944444444444444,
"acc_stderr": 0.03852084696008534,
"acc_norm": 0.6944444444444444,
"acc_norm_stderr": 0.03852084696008534
},
"harness|hendrycksTest-college_chemistry|5": {
"acc": 0.43,
"acc_stderr": 0.049756985195624284,
"acc_norm": 0.43,
"acc_norm_stderr": 0.049756985195624284
},
"harness|hendrycksTest-college_computer_science|5": {
"acc": 0.44,
"acc_stderr": 0.04988876515698589,
"acc_norm": 0.44,
"acc_norm_stderr": 0.04988876515698589
},
"harness|hendrycksTest-college_mathematics|5": {
"acc": 0.29,
"acc_stderr": 0.045604802157206845,
"acc_norm": 0.29,
"acc_norm_stderr": 0.045604802157206845
},
"harness|hendrycksTest-college_medicine|5": {
"acc": 0.5202312138728323,
"acc_stderr": 0.03809342081273957,
"acc_norm": 0.5202312138728323,
"acc_norm_stderr": 0.03809342081273957
},
"harness|hendrycksTest-college_physics|5": {
"acc": 0.3137254901960784,
"acc_stderr": 0.04617034827006715,
"acc_norm": 0.3137254901960784,
"acc_norm_stderr": 0.04617034827006715
},
"harness|hendrycksTest-computer_security|5": {
"acc": 0.68,
"acc_stderr": 0.046882617226215034,
"acc_norm": 0.68,
"acc_norm_stderr": 0.046882617226215034
},
"harness|hendrycksTest-conceptual_physics|5": {
"acc": 0.5063829787234042,
"acc_stderr": 0.03268335899936336,
"acc_norm": 0.5063829787234042,
"acc_norm_stderr": 0.03268335899936336
},
"harness|hendrycksTest-econometrics|5": {
"acc": 0.4649122807017544,
"acc_stderr": 0.04692008381368909,
"acc_norm": 0.4649122807017544,
"acc_norm_stderr": 0.04692008381368909
},
"harness|hendrycksTest-electrical_engineering|5": {
"acc": 0.503448275862069,
"acc_stderr": 0.041665675771015785,
"acc_norm": 0.503448275862069,
"acc_norm_stderr": 0.041665675771015785
},
"harness|hendrycksTest-elementary_mathematics|5": {
"acc": 0.38095238095238093,
"acc_stderr": 0.02501074911613759,
"acc_norm": 0.38095238095238093,
"acc_norm_stderr": 0.02501074911613759
},
"harness|hendrycksTest-formal_logic|5": {
"acc": 0.40476190476190477,
"acc_stderr": 0.043902592653775635,
"acc_norm": 0.40476190476190477,
"acc_norm_stderr": 0.043902592653775635
},
"harness|hendrycksTest-global_facts|5": {
"acc": 0.34,
"acc_stderr": 0.04760952285695235,
"acc_norm": 0.34,
"acc_norm_stderr": 0.04760952285695235
},
"harness|hendrycksTest-high_school_biology|5": {
"acc": 0.6516129032258065,
"acc_stderr": 0.027104826328100944,
"acc_norm": 0.6516129032258065,
"acc_norm_stderr": 0.027104826328100944
},
"harness|hendrycksTest-high_school_chemistry|5": {
"acc": 0.47783251231527096,
"acc_stderr": 0.035145285621750094,
"acc_norm": 0.47783251231527096,
"acc_norm_stderr": 0.035145285621750094
},
"harness|hendrycksTest-high_school_computer_science|5": {
"acc": 0.56,
"acc_stderr": 0.049888765156985884,
"acc_norm": 0.56,
"acc_norm_stderr": 0.049888765156985884
},
"harness|hendrycksTest-high_school_european_history|5": {
"acc": 0.696969696969697,
"acc_stderr": 0.03588624800091707,
"acc_norm": 0.696969696969697,
"acc_norm_stderr": 0.03588624800091707
},
"harness|hendrycksTest-high_school_geography|5": {
"acc": 0.7525252525252525,
"acc_stderr": 0.030746300742124495,
"acc_norm": 0.7525252525252525,
"acc_norm_stderr": 0.030746300742124495
},
"harness|hendrycksTest-high_school_government_and_politics|5": {
"acc": 0.8082901554404145,
"acc_stderr": 0.028408953626245282,
"acc_norm": 0.8082901554404145,
"acc_norm_stderr": 0.028408953626245282
},
"harness|hendrycksTest-high_school_macroeconomics|5": {
"acc": 0.5333333333333333,
"acc_stderr": 0.02529460802398647,
"acc_norm": 0.5333333333333333,
"acc_norm_stderr": 0.02529460802398647
},
"harness|hendrycksTest-high_school_mathematics|5": {
"acc": 0.2814814814814815,
"acc_stderr": 0.02742001935094527,
"acc_norm": 0.2814814814814815,
"acc_norm_stderr": 0.02742001935094527
},
"harness|hendrycksTest-high_school_microeconomics|5": {
"acc": 0.5588235294117647,
"acc_stderr": 0.032252942323996406,
"acc_norm": 0.5588235294117647,
"acc_norm_stderr": 0.032252942323996406
},
"harness|hendrycksTest-high_school_physics|5": {
"acc": 0.3443708609271523,
"acc_stderr": 0.03879687024073327,
"acc_norm": 0.3443708609271523,
"acc_norm_stderr": 0.03879687024073327
},
"harness|hendrycksTest-high_school_psychology|5": {
"acc": 0.7743119266055046,
"acc_stderr": 0.017923087667803064,
"acc_norm": 0.7743119266055046,
"acc_norm_stderr": 0.017923087667803064
},
"harness|hendrycksTest-high_school_statistics|5": {
"acc": 0.39351851851851855,
"acc_stderr": 0.03331747876370312,
"acc_norm": 0.39351851851851855,
"acc_norm_stderr": 0.03331747876370312
},
"harness|hendrycksTest-high_school_us_history|5": {
"acc": 0.7156862745098039,
"acc_stderr": 0.031660096793998116,
"acc_norm": 0.7156862745098039,
"acc_norm_stderr": 0.031660096793998116
},
"harness|hendrycksTest-high_school_world_history|5": {
"acc": 0.7510548523206751,
"acc_stderr": 0.028146970599422644,
"acc_norm": 0.7510548523206751,
"acc_norm_stderr": 0.028146970599422644
},
"harness|hendrycksTest-human_aging|5": {
"acc": 0.6681614349775785,
"acc_stderr": 0.03160295143776679,
"acc_norm": 0.6681614349775785,
"acc_norm_stderr": 0.03160295143776679
},
"harness|hendrycksTest-human_sexuality|5": {
"acc": 0.6793893129770993,
"acc_stderr": 0.04093329229834278,
"acc_norm": 0.6793893129770993,
"acc_norm_stderr": 0.04093329229834278
},
"harness|hendrycksTest-international_law|5": {
"acc": 0.7355371900826446,
"acc_stderr": 0.040261875275912046,
"acc_norm": 0.7355371900826446,
"acc_norm_stderr": 0.040261875275912046
},
"harness|hendrycksTest-jurisprudence|5": {
"acc": 0.6851851851851852,
"acc_stderr": 0.04489931073591311,
"acc_norm": 0.6851851851851852,
"acc_norm_stderr": 0.04489931073591311
},
"harness|hendrycksTest-logical_fallacies|5": {
"acc": 0.6932515337423313,
"acc_stderr": 0.03623089915724146,
"acc_norm": 0.6932515337423313,
"acc_norm_stderr": 0.03623089915724146
},
"harness|hendrycksTest-machine_learning|5": {
"acc": 0.38392857142857145,
"acc_stderr": 0.04616143075028547,
"acc_norm": 0.38392857142857145,
"acc_norm_stderr": 0.04616143075028547
},
"harness|hendrycksTest-management|5": {
"acc": 0.6990291262135923,
"acc_stderr": 0.04541609446503947,
"acc_norm": 0.6990291262135923,
"acc_norm_stderr": 0.04541609446503947
},
"harness|hendrycksTest-marketing|5": {
"acc": 0.8205128205128205,
"acc_stderr": 0.025140935950335442,
"acc_norm": 0.8205128205128205,
"acc_norm_stderr": 0.025140935950335442
},
"harness|hendrycksTest-medical_genetics|5": {
"acc": 0.66,
"acc_stderr": 0.04760952285695237,
"acc_norm": 0.66,
"acc_norm_stderr": 0.04760952285695237
},
"harness|hendrycksTest-miscellaneous|5": {
"acc": 0.8007662835249042,
"acc_stderr": 0.014283378044296422,
"acc_norm": 0.8007662835249042,
"acc_norm_stderr": 0.014283378044296422
},
"harness|hendrycksTest-moral_disputes|5": {
"acc": 0.6242774566473989,
"acc_stderr": 0.02607431485165708,
"acc_norm": 0.6242774566473989,
"acc_norm_stderr": 0.02607431485165708
},
"harness|hendrycksTest-moral_scenarios|5": {
"acc": 0.2659217877094972,
"acc_stderr": 0.014776765066438885,
"acc_norm": 0.2659217877094972,
"acc_norm_stderr": 0.014776765066438885
},
"harness|hendrycksTest-nutrition|5": {
"acc": 0.6274509803921569,
"acc_stderr": 0.027684181883302888,
"acc_norm": 0.6274509803921569,
"acc_norm_stderr": 0.027684181883302888
},
"harness|hendrycksTest-philosophy|5": {
"acc": 0.662379421221865,
"acc_stderr": 0.026858825879488544,
"acc_norm": 0.662379421221865,
"acc_norm_stderr": 0.026858825879488544
},
"harness|hendrycksTest-prehistory|5": {
"acc": 0.7067901234567902,
"acc_stderr": 0.025329888171900926,
"acc_norm": 0.7067901234567902,
"acc_norm_stderr": 0.025329888171900926
},
"harness|hendrycksTest-professional_accounting|5": {
"acc": 0.44680851063829785,
"acc_stderr": 0.02965823509766691,
"acc_norm": 0.44680851063829785,
"acc_norm_stderr": 0.02965823509766691
},
"harness|hendrycksTest-professional_law|5": {
"acc": 0.4406779661016949,
"acc_stderr": 0.012680037994097065,
"acc_norm": 0.4406779661016949,
"acc_norm_stderr": 0.012680037994097065
},
"harness|hendrycksTest-professional_medicine|5": {
"acc": 0.5441176470588235,
"acc_stderr": 0.03025437257397671,
"acc_norm": 0.5441176470588235,
"acc_norm_stderr": 0.03025437257397671
},
"harness|hendrycksTest-professional_psychology|5": {
"acc": 0.5980392156862745,
"acc_stderr": 0.019835176484375383,
"acc_norm": 0.5980392156862745,
"acc_norm_stderr": 0.019835176484375383
},
"harness|hendrycksTest-public_relations|5": {
"acc": 0.6181818181818182,
"acc_stderr": 0.046534298079135075,
"acc_norm": 0.6181818181818182,
"acc_norm_stderr": 0.046534298079135075
},
"harness|hendrycksTest-security_studies|5": {
"acc": 0.5142857142857142,
"acc_stderr": 0.03199615232806286,
"acc_norm": 0.5142857142857142,
"acc_norm_stderr": 0.03199615232806286
},
"harness|hendrycksTest-sociology|5": {
"acc": 0.7661691542288557,
"acc_stderr": 0.029929415408348377,
"acc_norm": 0.7661691542288557,
"acc_norm_stderr": 0.029929415408348377
},
"harness|hendrycksTest-us_foreign_policy|5": {
"acc": 0.83,
"acc_stderr": 0.0377525168068637,
"acc_norm": 0.83,
"acc_norm_stderr": 0.0377525168068637
},
"harness|hendrycksTest-virology|5": {
"acc": 0.4819277108433735,
"acc_stderr": 0.03889951252827216,
"acc_norm": 0.4819277108433735,
"acc_norm_stderr": 0.03889951252827216
},
"harness|hendrycksTest-world_religions|5": {
"acc": 0.7660818713450293,
"acc_stderr": 0.03246721765117826,
"acc_norm": 0.7660818713450293,
"acc_norm_stderr": 0.03246721765117826
},
"harness|truthfulqa:mc|0": {
"mc1": 0.31334149326805383,
"mc1_stderr": 0.0162380650690596,
"mc2": 0.45561649492894496,
"mc2_stderr": 0.014644899277894422
},
"all": {
"acc": 0.570596346471807,
"acc_stderr": 0.034371584431446715,
"acc_norm": 0.5748599572103322,
"acc_norm_stderr": 0.03434950734212607,
"mc1": 0.31334149326805383,
"mc1_stderr": 0.0162380650690596,
"mc2": 0.45561649492894496,
"mc2_stderr": 0.014644899277894422
}
},
"versions": {
"harness|arc:challenge|25": 0,
"harness|hellaswag|10": 0,
"harness|hendrycksTest-abstract_algebra|5": 1,
"harness|hendrycksTest-anatomy|5": 1,
"harness|hendrycksTest-astronomy|5": 1,
"harness|hendrycksTest-business_ethics|5": 1,
"harness|hendrycksTest-clinical_knowledge|5": 1,
"harness|hendrycksTest-college_biology|5": 1,
"harness|hendrycksTest-college_chemistry|5": 1,
"harness|hendrycksTest-college_computer_science|5": 1,
"harness|hendrycksTest-college_mathematics|5": 1,
"harness|hendrycksTest-college_medicine|5": 1,
"harness|hendrycksTest-college_physics|5": 1,
"harness|hendrycksTest-computer_security|5": 1,
"harness|hendrycksTest-conceptual_physics|5": 1,
"harness|hendrycksTest-econometrics|5": 1,
"harness|hendrycksTest-electrical_engineering|5": 1,
"harness|hendrycksTest-elementary_mathematics|5": 1,
"harness|hendrycksTest-formal_logic|5": 1,
"harness|hendrycksTest-global_facts|5": 1,
"harness|hendrycksTest-high_school_biology|5": 1,
"harness|hendrycksTest-high_school_chemistry|5": 1,
"harness|hendrycksTest-high_school_computer_science|5": 1,
"harness|hendrycksTest-high_school_european_history|5": 1,
"harness|hendrycksTest-high_school_geography|5": 1,
"harness|hendrycksTest-high_school_government_and_politics|5": 1,
"harness|hendrycksTest-high_school_macroeconomics|5": 1,
"harness|hendrycksTest-high_school_mathematics|5": 1,
"harness|hendrycksTest-high_school_microeconomics|5": 1,
"harness|hendrycksTest-high_school_physics|5": 1,
"harness|hendrycksTest-high_school_psychology|5": 1,
"harness|hendrycksTest-high_school_statistics|5": 1,
"harness|hendrycksTest-high_school_us_history|5": 1,
"harness|hendrycksTest-high_school_world_history|5": 1,
"harness|hendrycksTest-human_aging|5": 1,
"harness|hendrycksTest-human_sexuality|5": 1,
"harness|hendrycksTest-international_law|5": 1,
"harness|hendrycksTest-jurisprudence|5": 1,
"harness|hendrycksTest-logical_fallacies|5": 1,
"harness|hendrycksTest-machine_learning|5": 1,
"harness|hendrycksTest-management|5": 1,
"harness|hendrycksTest-marketing|5": 1,
"harness|hendrycksTest-medical_genetics|5": 1,
"harness|hendrycksTest-miscellaneous|5": 1,
"harness|hendrycksTest-moral_disputes|5": 1,
"harness|hendrycksTest-moral_scenarios|5": 1,
"harness|hendrycksTest-nutrition|5": 1,
"harness|hendrycksTest-philosophy|5": 1,
"harness|hendrycksTest-prehistory|5": 1,
"harness|hendrycksTest-professional_accounting|5": 1,
"harness|hendrycksTest-professional_law|5": 1,
"harness|hendrycksTest-professional_medicine|5": 1,
"harness|hendrycksTest-professional_psychology|5": 1,
"harness|hendrycksTest-public_relations|5": 1,
"harness|hendrycksTest-security_studies|5": 1,
"harness|hendrycksTest-sociology|5": 1,
"harness|hendrycksTest-us_foreign_policy|5": 1,
"harness|hendrycksTest-virology|5": 1,
"harness|hendrycksTest-world_religions|5": 1,
"harness|truthfulqa:mc|0": 1,
"all": 0
},
"config_tasks": {
"harness|arc:challenge": "LM Harness task",
"harness|hellaswag": "LM Harness task",
"harness|hendrycksTest-abstract_algebra": "LM Harness task",
"harness|hendrycksTest-anatomy": "LM Harness task",
"harness|hendrycksTest-astronomy": "LM Harness task",
"harness|hendrycksTest-business_ethics": "LM Harness task",
"harness|hendrycksTest-clinical_knowledge": "LM Harness task",
"harness|hendrycksTest-college_biology": "LM Harness task",
"harness|hendrycksTest-college_chemistry": "LM Harness task",
"harness|hendrycksTest-college_computer_science": "LM Harness task",
"harness|hendrycksTest-college_mathematics": "LM Harness task",
"harness|hendrycksTest-college_medicine": "LM Harness task",
"harness|hendrycksTest-college_physics": "LM Harness task",
"harness|hendrycksTest-computer_security": "LM Harness task",
"harness|hendrycksTest-conceptual_physics": "LM Harness task",
"harness|hendrycksTest-econometrics": "LM Harness task",
"harness|hendrycksTest-electrical_engineering": "LM Harness task",
"harness|hendrycksTest-elementary_mathematics": "LM Harness task",
"harness|hendrycksTest-formal_logic": "LM Harness task",
"harness|hendrycksTest-global_facts": "LM Harness task",
"harness|hendrycksTest-high_school_biology": "LM Harness task",
"harness|hendrycksTest-high_school_chemistry": "LM Harness task",
"harness|hendrycksTest-high_school_computer_science": "LM Harness task",
"harness|hendrycksTest-high_school_european_history": "LM Harness task",
"harness|hendrycksTest-high_school_geography": "LM Harness task",
"harness|hendrycksTest-high_school_government_and_politics": "LM Harness task",
"harness|hendrycksTest-high_school_macroeconomics": "LM Harness task",
"harness|hendrycksTest-high_school_mathematics": "LM Harness task",
"harness|hendrycksTest-high_school_microeconomics": "LM Harness task",
"harness|hendrycksTest-high_school_physics": "LM Harness task",
"harness|hendrycksTest-high_school_psychology": "LM Harness task",
"harness|hendrycksTest-high_school_statistics": "LM Harness task",
"harness|hendrycksTest-high_school_us_history": "LM Harness task",
"harness|hendrycksTest-high_school_world_history": "LM Harness task",
"harness|hendrycksTest-human_aging": "LM Harness task",
"harness|hendrycksTest-human_sexuality": "LM Harness task",
"harness|hendrycksTest-international_law": "LM Harness task",
"harness|hendrycksTest-jurisprudence": "LM Harness task",
"harness|hendrycksTest-logical_fallacies": "LM Harness task",
"harness|hendrycksTest-machine_learning": "LM Harness task",
"harness|hendrycksTest-management": "LM Harness task",
"harness|hendrycksTest-marketing": "LM Harness task",
"harness|hendrycksTest-medical_genetics": "LM Harness task",
"harness|hendrycksTest-miscellaneous": "LM Harness task",
"harness|hendrycksTest-moral_disputes": "LM Harness task",
"harness|hendrycksTest-moral_scenarios": "LM Harness task",
"harness|hendrycksTest-nutrition": "LM Harness task",
"harness|hendrycksTest-philosophy": "LM Harness task",
"harness|hendrycksTest-prehistory": "LM Harness task",
"harness|hendrycksTest-professional_accounting": "LM Harness task",
"harness|hendrycksTest-professional_law": "LM Harness task",
"harness|hendrycksTest-professional_medicine": "LM Harness task",
"harness|hendrycksTest-professional_psychology": "LM Harness task",
"harness|hendrycksTest-public_relations": "LM Harness task",
"harness|hendrycksTest-security_studies": "LM Harness task",
"harness|hendrycksTest-sociology": "LM Harness task",
"harness|hendrycksTest-us_foreign_policy": "LM Harness task",
"harness|hendrycksTest-virology": "LM Harness task",
"harness|hendrycksTest-world_religions": "LM Harness task",
"harness|truthfulqa:mc": "LM Harness task"
},
"summary_tasks": {
"harness|arc:challenge|25": {
"hashes": {
"hash_examples": "17b0cae357c0259e",
"hash_full_prompts": "045cbb916e5145c6",
"hash_input_tokens": "e43adcaa871b1364",
"hash_cont_tokens": "289aa98c400841d8"
},
"truncated": 0,
"non-truncated": 4687,
"padded": 4684,
"non-padded": 3,
"effective_few_shots": 25.0,
"num_truncated_few_shots": 0
},
"harness|hellaswag|10": {
"hashes": {
"hash_examples": "e1768ecb99d7ecf0",
"hash_full_prompts": "0b4c16983130f84f",
"hash_input_tokens": "08da6b3d0798f3e5",
"hash_cont_tokens": "ac460260c3e6efc9"
},
"truncated": 0,
"non-truncated": 40168,
"padded": 40039,
"non-padded": 129,
"effective_few_shots": 10.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-abstract_algebra|5": {
"hashes": {
"hash_examples": "280f9f325b40559a",
"hash_full_prompts": "2f776a367d23aea2",
"hash_input_tokens": "5e2b26eb9b4d08bf",
"hash_cont_tokens": "17b868b63507f9a3"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-anatomy|5": {
"hashes": {
"hash_examples": "2f83a4f1cab4ba18",
"hash_full_prompts": "516f74bef25df620",
"hash_input_tokens": "d33cda9df28030eb",
"hash_cont_tokens": "a52a4f60d98cbe5c"
},
"truncated": 0,
"non-truncated": 540,
"padded": 540,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-astronomy|5": {
"hashes": {
"hash_examples": "7d587b908da4d762",
"hash_full_prompts": "faf4e80f65de93ca",
"hash_input_tokens": "0dd50c500d64c57d",
"hash_cont_tokens": "10f7d8eeba97841d"
},
"truncated": 0,
"non-truncated": 608,
"padded": 608,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-business_ethics|5": {
"hashes": {
"hash_examples": "33e51740670de686",
"hash_full_prompts": "db01c3ef8e1479d4",
"hash_input_tokens": "40b524d0df3defc2",
"hash_cont_tokens": "17b868b63507f9a3"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-clinical_knowledge|5": {
"hashes": {
"hash_examples": "f3366dbe7eefffa4",
"hash_full_prompts": "49654f71d94b65c3",
"hash_input_tokens": "1f87d12d677e0dfd",
"hash_cont_tokens": "edef9975ba9165b5"
},
"truncated": 0,
"non-truncated": 1060,
"padded": 1056,
"non-padded": 4,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_biology|5": {
"hashes": {
"hash_examples": "ca2b6753a0193e7f",
"hash_full_prompts": "2b460b75f1fdfefd",
"hash_input_tokens": "dd6d69d8b13afbeb",
"hash_cont_tokens": "0aa103ec6602280b"
},
"truncated": 0,
"non-truncated": 576,
"padded": 572,
"non-padded": 4,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_chemistry|5": {
"hashes": {
"hash_examples": "22ff85f1d34f42d1",
"hash_full_prompts": "242c9be6da583e95",
"hash_input_tokens": "d45f3c401a00e97e",
"hash_cont_tokens": "17b868b63507f9a3"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_computer_science|5": {
"hashes": {
"hash_examples": "30318289d717a5cf",
"hash_full_prompts": "ed2bdb4e87c4b371",
"hash_input_tokens": "c04f21d954ae67b2",
"hash_cont_tokens": "17b868b63507f9a3"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_mathematics|5": {
"hashes": {
"hash_examples": "4944d1f0b6b5d911",
"hash_full_prompts": "770bc4281c973190",
"hash_input_tokens": "e7de03b4e1a407d8",
"hash_cont_tokens": "17b868b63507f9a3"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_medicine|5": {
"hashes": {
"hash_examples": "dd69cc33381275af",
"hash_full_prompts": "ad2a53e5250ab46e",
"hash_input_tokens": "9ce9516475f0b09c",
"hash_cont_tokens": "1979021dbc698754"
},
"truncated": 0,
"non-truncated": 692,
"padded": 684,
"non-padded": 8,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-college_physics|5": {
"hashes": {
"hash_examples": "875dd26d22655b0d",
"hash_full_prompts": "833a0d7b55aed500",
"hash_input_tokens": "f749592a0d6c967d",
"hash_cont_tokens": "7cf7fe2bab00acbd"
},
"truncated": 0,
"non-truncated": 408,
"padded": 404,
"non-padded": 4,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-computer_security|5": {
"hashes": {
"hash_examples": "006451eedc0ededb",
"hash_full_prompts": "94034c97e85d8f46",
"hash_input_tokens": "1a6dccf2066f3598",
"hash_cont_tokens": "17b868b63507f9a3"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-conceptual_physics|5": {
"hashes": {
"hash_examples": "8874ece872d2ca4c",
"hash_full_prompts": "e40d15a34640d6fa",
"hash_input_tokens": "6ce98c8aec8e7514",
"hash_cont_tokens": "903f64eed2b0d217"
},
"truncated": 0,
"non-truncated": 940,
"padded": 940,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-econometrics|5": {
"hashes": {
"hash_examples": "64d3623b0bfaa43f",
"hash_full_prompts": "612f340fae41338d",
"hash_input_tokens": "7794b03bf6b9bb11",
"hash_cont_tokens": "721ae6c5302c4bf2"
},
"truncated": 0,
"non-truncated": 456,
"padded": 456,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-electrical_engineering|5": {
"hashes": {
"hash_examples": "e98f51780c674d7e",
"hash_full_prompts": "10275b312d812ae6",
"hash_input_tokens": "e47ff85e05850517",
"hash_cont_tokens": "15a738960ed3e587"
},
"truncated": 0,
"non-truncated": 580,
"padded": 580,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-elementary_mathematics|5": {
"hashes": {
"hash_examples": "fc48208a5ac1c0ce",
"hash_full_prompts": "5ec274c6c82aca23",
"hash_input_tokens": "2ce6901704311790",
"hash_cont_tokens": "c96470462fc71683"
},
"truncated": 0,
"non-truncated": 1512,
"padded": 1512,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-formal_logic|5": {
"hashes": {
"hash_examples": "5a6525665f63ea72",
"hash_full_prompts": "07b92638c4a6b500",
"hash_input_tokens": "fa49c3faa72a3955",
"hash_cont_tokens": "0e1ce025c9d6ee7e"
},
"truncated": 0,
"non-truncated": 504,
"padded": 504,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-global_facts|5": {
"hashes": {
"hash_examples": "371d70d743b2b89b",
"hash_full_prompts": "332fdee50a1921b4",
"hash_input_tokens": "38992a391c7040d5",
"hash_cont_tokens": "17b868b63507f9a3"
},
"truncated": 0,
"non-truncated": 400,
"padded": 396,
"non-padded": 4,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_biology|5": {
"hashes": {
"hash_examples": "a79e1018b1674052",
"hash_full_prompts": "e624e26ede922561",
"hash_input_tokens": "4944fad6e0578120",
"hash_cont_tokens": "e34d57f7d3c4ca16"
},
"truncated": 0,
"non-truncated": 1240,
"padded": 1240,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_chemistry|5": {
"hashes": {
"hash_examples": "44bfc25c389f0e03",
"hash_full_prompts": "0e3e5f5d9246482a",
"hash_input_tokens": "bec955dfccee0331",
"hash_cont_tokens": "e8482d44df4b3740"
},
"truncated": 0,
"non-truncated": 812,
"padded": 796,
"non-padded": 16,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_computer_science|5": {
"hashes": {
"hash_examples": "8b8cdb1084f24169",
"hash_full_prompts": "c00487e67c1813cc",
"hash_input_tokens": "2ccfe020e0a8e824",
"hash_cont_tokens": "17b868b63507f9a3"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_european_history|5": {
"hashes": {
"hash_examples": "11cd32d0ef440171",
"hash_full_prompts": "318f4513c537c6bf",
"hash_input_tokens": "5e5e8bf3808e0ead",
"hash_cont_tokens": "d63e679a49418339"
},
"truncated": 0,
"non-truncated": 660,
"padded": 656,
"non-padded": 4,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_geography|5": {
"hashes": {
"hash_examples": "b60019b9e80b642f",
"hash_full_prompts": "ee5789fcc1a81b1e",
"hash_input_tokens": "6a624d76e1b40f9d",
"hash_cont_tokens": "d78483e286d06f1a"
},
"truncated": 0,
"non-truncated": 792,
"padded": 792,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_government_and_politics|5": {
"hashes": {
"hash_examples": "d221ec983d143dc3",
"hash_full_prompts": "ac42d888e1ce1155",
"hash_input_tokens": "8340aed0285230f4",
"hash_cont_tokens": "691cdff71ff5fe57"
},
"truncated": 0,
"non-truncated": 772,
"padded": 772,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_macroeconomics|5": {
"hashes": {
"hash_examples": "59c2915cacfd3fbb",
"hash_full_prompts": "c6bd9d25158abd0e",
"hash_input_tokens": "ca47137b1f3a769c",
"hash_cont_tokens": "d5ad4c5bdca967ad"
},
"truncated": 0,
"non-truncated": 1560,
"padded": 1560,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_mathematics|5": {
"hashes": {
"hash_examples": "1f8ac897608de342",
"hash_full_prompts": "5d88f41fc2d643a8",
"hash_input_tokens": "c9d341ab62890f30",
"hash_cont_tokens": "8f631ca5687dd0d4"
},
"truncated": 0,
"non-truncated": 1080,
"padded": 1080,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_microeconomics|5": {
"hashes": {
"hash_examples": "ead6a0f2f6c83370",
"hash_full_prompts": "bfc393381298609e",
"hash_input_tokens": "62573d06618ae7df",
"hash_cont_tokens": "7321048a28451473"
},
"truncated": 0,
"non-truncated": 952,
"padded": 952,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_physics|5": {
"hashes": {
"hash_examples": "c3f2025990afec64",
"hash_full_prompts": "fc78b4997e436734",
"hash_input_tokens": "ddddcaae96263221",
"hash_cont_tokens": "bb137581f269861c"
},
"truncated": 0,
"non-truncated": 604,
"padded": 604,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_psychology|5": {
"hashes": {
"hash_examples": "21f8aab618f6d636",
"hash_full_prompts": "d5c76aa40b9dbc43",
"hash_input_tokens": "ef9c1ae343139fdd",
"hash_cont_tokens": "b455cab2675bd863"
},
"truncated": 0,
"non-truncated": 2180,
"padded": 2161,
"non-padded": 19,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_statistics|5": {
"hashes": {
"hash_examples": "2386a60a11fc5de3",
"hash_full_prompts": "4c5c8be5aafac432",
"hash_input_tokens": "eb4abd87b0e863cc",
"hash_cont_tokens": "1b3196fec7e58037"
},
"truncated": 0,
"non-truncated": 864,
"padded": 864,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_us_history|5": {
"hashes": {
"hash_examples": "74961543be40f04f",
"hash_full_prompts": "5d5ca4840131ba21",
"hash_input_tokens": "63548c7fa9ba7a78",
"hash_cont_tokens": "a331dedc2aa01b3e"
},
"truncated": 0,
"non-truncated": 816,
"padded": 816,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-high_school_world_history|5": {
"hashes": {
"hash_examples": "2ad2f6b7198b2234",
"hash_full_prompts": "11845057459afd72",
"hash_input_tokens": "83c5da18bfa50812",
"hash_cont_tokens": "d0fbe030b8c8c2bf"
},
"truncated": 0,
"non-truncated": 948,
"padded": 948,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-human_aging|5": {
"hashes": {
"hash_examples": "1a7199dc733e779b",
"hash_full_prompts": "756b9096b8eaf892",
"hash_input_tokens": "c93c778cb8c58a32",
"hash_cont_tokens": "1dd29c3755494850"
},
"truncated": 0,
"non-truncated": 892,
"padded": 892,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-human_sexuality|5": {
"hashes": {
"hash_examples": "7acb8fdad97f88a6",
"hash_full_prompts": "731a52ff15b8cfdb",
"hash_input_tokens": "1daed91f54b42f7d",
"hash_cont_tokens": "c85573f663c10691"
},
"truncated": 0,
"non-truncated": 524,
"padded": 524,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-international_law|5": {
"hashes": {
"hash_examples": "1300bfd0dfc59114",
"hash_full_prompts": "db2aefbff5eec996",
"hash_input_tokens": "cfdae69f75ee8670",
"hash_cont_tokens": "d263804ba918154f"
},
"truncated": 0,
"non-truncated": 484,
"padded": 484,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-jurisprudence|5": {
"hashes": {
"hash_examples": "083b1e4904c48dc2",
"hash_full_prompts": "0f89ee3fe03d6a21",
"hash_input_tokens": "173979adbb5ab44e",
"hash_cont_tokens": "581986691a84ece8"
},
"truncated": 0,
"non-truncated": 432,
"padded": 432,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-logical_fallacies|5": {
"hashes": {
"hash_examples": "709128f9926a634c",
"hash_full_prompts": "98a04b1f8f841069",
"hash_input_tokens": "7b7d06271aff55ff",
"hash_cont_tokens": "55a858b28bbda458"
},
"truncated": 0,
"non-truncated": 652,
"padded": 652,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-machine_learning|5": {
"hashes": {
"hash_examples": "88f22a636029ae47",
"hash_full_prompts": "2e1c8d4b1e0cc921",
"hash_input_tokens": "ca062cfd7c7fddcb",
"hash_cont_tokens": "e99d3d3efd4ac7a3"
},
"truncated": 0,
"non-truncated": 448,
"padded": 445,
"non-padded": 3,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-management|5": {
"hashes": {
"hash_examples": "8c8a1e07a2151dca",
"hash_full_prompts": "f51611f514b265b0",
"hash_input_tokens": "fc47171ffb714da3",
"hash_cont_tokens": "13d9dc56bca34726"
},
"truncated": 0,
"non-truncated": 412,
"padded": 412,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-marketing|5": {
"hashes": {
"hash_examples": "2668953431f91e96",
"hash_full_prompts": "77562bef997c7650",
"hash_input_tokens": "aa29e9d883670c8f",
"hash_cont_tokens": "2700ea26933916a2"
},
"truncated": 0,
"non-truncated": 936,
"padded": 936,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-medical_genetics|5": {
"hashes": {
"hash_examples": "9c2dda34a2ea4fd2",
"hash_full_prompts": "202139046daa118f",
"hash_input_tokens": "88ad044b653ecaa5",
"hash_cont_tokens": "17b868b63507f9a3"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-miscellaneous|5": {
"hashes": {
"hash_examples": "41adb694024809c2",
"hash_full_prompts": "bffec9fc237bcf93",
"hash_input_tokens": "f9e7e01573277484",
"hash_cont_tokens": "7bf4341c79587250"
},
"truncated": 0,
"non-truncated": 3132,
"padded": 3132,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-moral_disputes|5": {
"hashes": {
"hash_examples": "3171c13ba3c594c4",
"hash_full_prompts": "170831fc36f1d59e",
"hash_input_tokens": "03728b9e48594c28",
"hash_cont_tokens": "38a48e9de6976f00"
},
"truncated": 0,
"non-truncated": 1384,
"padded": 1360,
"non-padded": 24,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-moral_scenarios|5": {
"hashes": {
"hash_examples": "9873e077e83e0546",
"hash_full_prompts": "08f4ceba3131a068",
"hash_input_tokens": "04a903966514d177",
"hash_cont_tokens": "761c4dc187689d89"
},
"truncated": 0,
"non-truncated": 3580,
"padded": 3580,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-nutrition|5": {
"hashes": {
"hash_examples": "7db1d8142ec14323",
"hash_full_prompts": "4c0e68e3586cb453",
"hash_input_tokens": "a2176d3ac6f01cf0",
"hash_cont_tokens": "65005bd7d6f6012a"
},
"truncated": 0,
"non-truncated": 1224,
"padded": 1224,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-philosophy|5": {
"hashes": {
"hash_examples": "9b455b7d72811cc8",
"hash_full_prompts": "e467f822d8a0d3ff",
"hash_input_tokens": "a96dc872948245a8",
"hash_cont_tokens": "0b47934fb6314dec"
},
"truncated": 0,
"non-truncated": 1244,
"padded": 1244,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-prehistory|5": {
"hashes": {
"hash_examples": "8be90d0f538f1560",
"hash_full_prompts": "152187949bcd0921",
"hash_input_tokens": "e0b03637947e9efa",
"hash_cont_tokens": "3f20acd855ee0a29"
},
"truncated": 0,
"non-truncated": 1296,
"padded": 1296,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-professional_accounting|5": {
"hashes": {
"hash_examples": "8d377597916cd07e",
"hash_full_prompts": "0eb7345d6144ee0d",
"hash_input_tokens": "0b4c6d0e49c47ab4",
"hash_cont_tokens": "8f122ba881355d4b"
},
"truncated": 0,
"non-truncated": 1128,
"padded": 1128,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-professional_law|5": {
"hashes": {
"hash_examples": "cd9dbc52b3c932d6",
"hash_full_prompts": "36ac764272bfb182",
"hash_input_tokens": "bcbdbbde22ec73e3",
"hash_cont_tokens": "90d5df417c4d3fd3"
},
"truncated": 0,
"non-truncated": 6136,
"padded": 6136,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-professional_medicine|5": {
"hashes": {
"hash_examples": "b20e4e816c1e383e",
"hash_full_prompts": "7b8d69ea2acaf2f7",
"hash_input_tokens": "c54d753563114d45",
"hash_cont_tokens": "4a2d2988884f7f70"
},
"truncated": 0,
"non-truncated": 1088,
"padded": 1088,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-professional_psychology|5": {
"hashes": {
"hash_examples": "d45b73b22f9cc039",
"hash_full_prompts": "fe8937e9ffc99771",
"hash_input_tokens": "9e6e34f48034edc0",
"hash_cont_tokens": "e0a952cb8a9c81de"
},
"truncated": 0,
"non-truncated": 2448,
"padded": 2448,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-public_relations|5": {
"hashes": {
"hash_examples": "0d25072e1761652a",
"hash_full_prompts": "f9adc39cfa9f42ba",
"hash_input_tokens": "634feb3f97d1064d",
"hash_cont_tokens": "1fa77a8dff3922b8"
},
"truncated": 0,
"non-truncated": 440,
"padded": 440,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-security_studies|5": {
"hashes": {
"hash_examples": "62bb8197e63d60d4",
"hash_full_prompts": "869c9c3ae196b7c3",
"hash_input_tokens": "ca8497342e5b1d57",
"hash_cont_tokens": "81fc9cb3cbdd52db"
},
"truncated": 0,
"non-truncated": 980,
"padded": 980,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-sociology|5": {
"hashes": {
"hash_examples": "e7959df87dea8672",
"hash_full_prompts": "1a1fc00e17b3a52a",
"hash_input_tokens": "ae361375c940a0fb",
"hash_cont_tokens": "2a0493252ed2cf43"
},
"truncated": 0,
"non-truncated": 804,
"padded": 800,
"non-padded": 4,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-us_foreign_policy|5": {
"hashes": {
"hash_examples": "4a56a01ddca44dca",
"hash_full_prompts": "0c7a7081c71c07b6",
"hash_input_tokens": "e8bdf33cf82d89f5",
"hash_cont_tokens": "17b868b63507f9a3"
},
"truncated": 0,
"non-truncated": 400,
"padded": 400,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-virology|5": {
"hashes": {
"hash_examples": "451cc86a8c4f4fe9",
"hash_full_prompts": "01e95325d8b738e4",
"hash_input_tokens": "32ce831e0ba2d2e2",
"hash_cont_tokens": "5ab892d003b00c98"
},
"truncated": 0,
"non-truncated": 664,
"padded": 664,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|hendrycksTest-world_religions|5": {
"hashes": {
"hash_examples": "3b29cfaf1a81c379",
"hash_full_prompts": "e0d79a15083dfdff",
"hash_input_tokens": "4ed9b68c5694211b",
"hash_cont_tokens": "15a5e5dbdfbb8568"
},
"truncated": 0,
"non-truncated": 684,
"padded": 684,
"non-padded": 0,
"effective_few_shots": 5.0,
"num_truncated_few_shots": 0
},
"harness|truthfulqa:mc|0": {
"hashes": {
"hash_examples": "23176c0531c7b867",
"hash_full_prompts": "36a6d90e75d92d4a",
"hash_input_tokens": "a30fbd9af05d717a",
"hash_cont_tokens": "5a8d4bb398b1c3c0"
},
"truncated": 0,
"non-truncated": 9996,
"padded": 9996,
"non-padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
}
},
"summary_general": {
"hashes": {
"hash_examples": "d84d18e9a963753d",
"hash_full_prompts": "12b540783521a8e6",
"hash_input_tokens": "3d86ffeb7677bd9d",
"hash_cont_tokens": "35527140510ee91a"
},
"total_evaluation_time_secondes": "4129.603856563568",
"truncated": 0,
"non-truncated": 111019,
"padded": 110793,
"non-padded": 226,
"num_truncated_few_shots": 0
}
}