task,metric,value,err,version anli_r1,acc,0.319,0.014746404865473493,0 anli_r2,acc,0.337,0.0149550879186536,0 anli_r3,acc,0.3375,0.013655897185463665,0 arc_challenge,acc,0.18515358361774745,0.011350774438389699,0 arc_challenge,acc_norm,0.22696245733788395,0.012240491536132872,0 arc_easy,acc,0.39604377104377103,0.010035580962097937,0 arc_easy,acc_norm,0.36153198653198654,0.009858506543162062,0 boolq,acc,0.5434250764525994,0.008712010793695303,1 cb,acc,0.44642857142857145,0.06703189227942397,1 cb,f1,0.3011063011063011,,1 copa,acc,0.54,0.05009082659620332,0 hellaswag,acc,0.27853017327225654,0.004473595650807679,0 hellaswag,acc_norm,0.2961561441943836,0.004556276293751938,0 piqa,acc,0.6305767138193689,0.011260988628572347,0 piqa,acc_norm,0.6311207834602829,0.011257546676908804,0 rte,acc,0.5054151624548736,0.030094698123239966,0 sciq,acc,0.693,0.014593284892852623,0 sciq,acc_norm,0.635,0.015231776226264903,0 storycloze_2016,acc,0.5777659005879209,0.011421727692385657,0 winogrande,acc,0.5067087608524072,0.014051220692330352,0