task,metric,value,err,version anli_r1,acc,0.328,0.014853842487270333,0 anli_r2,acc,0.325,0.014818724459095527,0 anli_r3,acc,0.34,0.013680495725767789,0 arc_challenge,acc,0.18430034129692832,0.011330517933037411,0 arc_challenge,acc_norm,0.23122866894197952,0.012320858834772278,0 arc_easy,acc,0.4065656565656566,0.010079056419223503,0 arc_easy,acc_norm,0.36952861952861954,0.009904325878447317,0 boolq,acc,0.5541284403669725,0.008693659886486843,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.30414746543778803,,1 copa,acc,0.66,0.04760952285695237,0 hellaswag,acc,0.28141804421429994,0.00448771884333028,0 hellaswag,acc_norm,0.29934276040629354,0.004570342034463229,0 piqa,acc,0.6376496191512514,0.011215040215104565,0 piqa,acc_norm,0.6289445048966268,0.011271222398600525,0 rte,acc,0.5487364620938628,0.029953149241808943,0 sciq,acc,0.685,0.014696631960792496,0 sciq,acc_norm,0.595,0.015531136990453047,0 storycloze_2016,acc,0.5831106360235169,0.011401581234266751,0 winogrande,acc,0.4988161010260458,0.014052446290529019,0