task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928355,0 anli_r2,acc,0.322,0.014782913600996667,0 anli_r3,acc,0.3516666666666667,0.013789711695404794,0 arc_challenge,acc,0.17832764505119453,0.011186139406711289,0 arc_challenge,acc_norm,0.21843003412969283,0.012074291605700983,0 arc_easy,acc,0.4006734006734007,0.010055304474255558,0 arc_easy,acc_norm,0.3611111111111111,0.009856013425811244,0 boolq,acc,0.5626911314984709,0.008676043429497423,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.34486817325800373,,1 copa,acc,0.59,0.049431107042371025,0 hellaswag,acc,0.2810197171878112,0.00448578446857668,0 hellaswag,acc_norm,0.29645488946425014,0.004557606227194299,0 piqa,acc,0.6245919477693145,0.011297839589776662,0 piqa,acc_norm,0.6213275299238302,0.011317163404516854,0 rte,acc,0.5306859205776173,0.030039730592197816,0 sciq,acc,0.684,0.014709193056057125,0 sciq,acc_norm,0.644,0.015149042659306623,0 storycloze_2016,acc,0.5622661678246926,0.01147242507417594,0 winogrande,acc,0.510655090765588,0.014049294536290393,0