task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928362,0 anli_r2,acc,0.324,0.014806864733738854,0 anli_r3,acc,0.3325,0.013605417345710528,0 arc_challenge,acc,0.18771331058020477,0.011411001314155117,0 arc_challenge,acc_norm,0.23378839590443687,0.012368225378507137,0 arc_easy,acc,0.39604377104377103,0.010035580962097935,0 arc_easy,acc_norm,0.375,0.009933992677987828,0 boolq,acc,0.5293577981651376,0.008729967580199218,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.3284421618977745,,1 copa,acc,0.61,0.04902071300001974,0 hellaswag,acc,0.27962557259510057,0.004478979795506768,0 hellaswag,acc_norm,0.2967536347341167,0.00455893382299554,0 piqa,acc,0.6240478781284005,0.011301098166895729,0 piqa,acc_norm,0.6316648531011969,0.011254089354334357,0 rte,acc,0.5379061371841155,0.030009848912529113,0 sciq,acc,0.681,0.01474640486547349,0 sciq,acc_norm,0.628,0.015292149942040577,0 storycloze_2016,acc,0.5713522180652058,0.011444094780077097,0 winogrande,acc,0.5019731649565904,0.014052376259225636,0