|
task,metric,value,err,version
|
|
anli_r1,acc,0.319,0.014746404865473493,0
|
|
anli_r2,acc,0.337,0.0149550879186536,0
|
|
anli_r3,acc,0.3375,0.013655897185463665,0
|
|
arc_challenge,acc,0.18515358361774745,0.011350774438389699,0
|
|
arc_challenge,acc_norm,0.22696245733788395,0.012240491536132872,0
|
|
arc_easy,acc,0.39604377104377103,0.010035580962097937,0
|
|
arc_easy,acc_norm,0.36153198653198654,0.009858506543162062,0
|
|
boolq,acc,0.5434250764525994,0.008712010793695303,1
|
|
cb,acc,0.44642857142857145,0.06703189227942397,1
|
|
cb,f1,0.3011063011063011,,1
|
|
copa,acc,0.54,0.05009082659620332,0
|
|
hellaswag,acc,0.27853017327225654,0.004473595650807679,0
|
|
hellaswag,acc_norm,0.2961561441943836,0.004556276293751938,0
|
|
piqa,acc,0.6305767138193689,0.011260988628572347,0
|
|
piqa,acc_norm,0.6311207834602829,0.011257546676908804,0
|
|
rte,acc,0.5054151624548736,0.030094698123239966,0
|
|
sciq,acc,0.693,0.014593284892852623,0
|
|
sciq,acc_norm,0.635,0.015231776226264903,0
|
|
storycloze_2016,acc,0.5777659005879209,0.011421727692385657,0
|
|
winogrande,acc,0.5067087608524072,0.014051220692330352,0
|
|
|