task,metric,value,err,version anli_r1,acc,0.337,0.014955087918653591,0 anli_r2,acc,0.347,0.015060472031706618,0 anli_r3,acc,0.3233333333333333,0.01350837286730022,0 arc_challenge,acc,0.1825938566552901,0.011289730684565,0 arc_challenge,acc_norm,0.2295221843003413,0.012288926760890797,0 arc_easy,acc,0.3952020202020202,0.010031894052790978,0 arc_easy,acc_norm,0.36826599326599324,0.009897286209010894,0 boolq,acc,0.5614678899082569,0.008678720482001873,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.2554143126177024,,1 copa,acc,0.63,0.04852365870939099,0 hellaswag,acc,0.279326827325234,0.004477514681328155,0 hellaswag,acc_norm,0.2956582354112727,0.0045540545376920125,0 piqa,acc,0.6294885745375408,0.011267826475447665,0 piqa,acc_norm,0.6322089227421109,0.01125061664667879,0 rte,acc,0.5379061371841155,0.030009848912529117,0 sciq,acc,0.686,0.014683991951087955,0 sciq,acc_norm,0.635,0.015231776226264903,0 storycloze_2016,acc,0.5750935328701229,0.011431286492205844,0 winogrande,acc,0.5114443567482242,0.014048804199859325,0