task,metric,value,err,version anli_r1,acc,0.35,0.015090650341444236,0 anli_r2,acc,0.32,0.01475865230357488,0 anli_r3,acc,0.33416666666666667,0.013622434813136783,0 arc_challenge,acc,0.19539249146757678,0.01158690718995291,0 arc_challenge,acc_norm,0.22781569965870307,0.012256708602326903,0 arc_easy,acc,0.39436026936026936,0.010028176038392995,0 arc_easy,acc_norm,0.35395622895622897,0.00981237064417441,0 boolq,acc,0.5675840978593272,0.008664798701065799,1 cb,acc,0.44642857142857145,0.067031892279424,1 cb,f1,0.30886196246139225,,1 copa,acc,0.61,0.04902071300001975,0 hellaswag,acc,0.2802230631348337,0.004481902637505665,0 hellaswag,acc_norm,0.2960565624377614,0.00455583246277459,0 piqa,acc,0.6311207834602829,0.011257546676908804,0 piqa,acc_norm,0.6207834602829162,0.011320331012905077,0 rte,acc,0.4548736462093863,0.029973636495415252,0 sciq,acc,0.681,0.01474640486547349,0 sciq,acc_norm,0.645,0.015139491543780532,0 storycloze_2016,acc,0.569748797434527,0.011449379528209637,0 winogrande,acc,0.505130228887135,0.014051745961790516,0