|
task,metric,value,err,version
|
|
anli_r1,acc,0.329,0.014865395385928362,0
|
|
anli_r2,acc,0.324,0.014806864733738854,0
|
|
anli_r3,acc,0.3325,0.013605417345710528,0
|
|
arc_challenge,acc,0.18771331058020477,0.011411001314155117,0
|
|
arc_challenge,acc_norm,0.23378839590443687,0.012368225378507137,0
|
|
arc_easy,acc,0.39604377104377103,0.010035580962097935,0
|
|
arc_easy,acc_norm,0.375,0.009933992677987828,0
|
|
boolq,acc,0.5293577981651376,0.008729967580199218,1
|
|
cb,acc,0.4642857142857143,0.06724777654937658,1
|
|
cb,f1,0.3284421618977745,,1
|
|
copa,acc,0.61,0.04902071300001974,0
|
|
hellaswag,acc,0.27962557259510057,0.004478979795506768,0
|
|
hellaswag,acc_norm,0.2967536347341167,0.00455893382299554,0
|
|
piqa,acc,0.6240478781284005,0.011301098166895729,0
|
|
piqa,acc_norm,0.6316648531011969,0.011254089354334357,0
|
|
rte,acc,0.5379061371841155,0.030009848912529113,0
|
|
sciq,acc,0.681,0.01474640486547349,0
|
|
sciq,acc_norm,0.628,0.015292149942040577,0
|
|
storycloze_2016,acc,0.5713522180652058,0.011444094780077097,0
|
|
winogrande,acc,0.5019731649565904,0.014052376259225636,0
|
|
|