ChatSKKU5.8B / eval /10_shot.json
jojo0217's picture
Rename 10_shot.json to eval/10_shot.json
de62604
raw
history blame
1.19 kB
{
"results": {
"kobest_boolq": {
"acc": 0.594017094017094,
"acc_stderr": 0.013110658638863893,
"macro_f1": 0.5766968517264726,
"macro_f1_stderr": 0.013321751352234396
},
"kobest_copa": {
"acc": 0.796,
"acc_stderr": 0.012749374359024387,
"macro_f1": 0.7956042899052566,
"macro_f1_stderr": 0.012768263930194495
},
"kobest_hellaswag": {
"acc": 0.494,
"acc_stderr": 0.022381462412439324,
"acc_norm": 0.586,
"acc_norm_stderr": 0.02204949796982787,
"macro_f1": 0.49215887000493286,
"macro_f1_stderr": 0.02237097469536285
},
"kobest_sentineg": {
"acc": 0.9395465994962217,
"acc_stderr": 0.01197627988893257,
"macro_f1": 0.9393430099312452,
"macro_f1_stderr": 0.012043294700994262
}
},
"versions": {
"kobest_boolq": 0,
"kobest_copa": 0,
"kobest_hellaswag": 0,
"kobest_sentineg": 0
},
"config": {
"model": "gpt2",
"model_args": "pretrained=./output",
"num_fewshot": 10,
"batch_size": "4",
"device": "cuda:0",
"no_cache": true,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}