Update README.md
Browse files
README.md
CHANGED
@@ -47,23 +47,11 @@ widget:
|
|
47 |
| | |none | 25|acc_norm|0.2329|± |0.0124|
|
48 |
|hellaswag|Yaml |none | 10|acc |0.2856|± |0.0045|
|
49 |
| | |none | 10|acc_norm|0.3090|± |0.0046|
|
50 |
-
|
51 |
-
| Groups |Version|Filter|n-shot|Metric|Value | |Stderr|
|
52 |
-
|------------------|-------|------|-----:|------|-----:|---|-----:|
|
53 |
|mmlu |N/A |none | 0|acc |0.2536|± |0.0483|
|
54 |
| - humanities |N/A |none | 5|acc |0.2408|± |0.0341|
|
55 |
| - other |N/A |none | 5|acc |0.2475|± |0.0443|
|
56 |
| - social_sciences|N/A |none | 5|acc |0.2567|± |0.0456|
|
57 |
| - stem |N/A |none | 5|acc |0.2756|± |0.0653|
|
58 |
-
|
59 |
-
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
60 |
-
|--------------|-------|------|-----:|------|-----:|---|-----:|
|
61 |
|truthfulqa_mc2|Yaml |none | 0|acc |0.3909|± |0.0148|
|
62 |
-
|
63 |
-
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
64 |
-
|----------|-------|------|-----:|------|-----:|---|-----:|
|
65 |
|winogrande|Yaml |none | 5|acc |0.5107|± | 0.014|
|
66 |
-
|
67 |
-
|Tasks|Version| Filter |n-shot| Metric |Value| |Stderr|
|
68 |
-
|-----|-------|----------|-----:|-----------|----:|---|-----:|
|
69 |
|gsm8k|Yaml |get-answer| 5|exact_match| 0|± | 0|
|
|
|
47 |
| | |none | 25|acc_norm|0.2329|± |0.0124|
|
48 |
|hellaswag|Yaml |none | 10|acc |0.2856|± |0.0045|
|
49 |
| | |none | 10|acc_norm|0.3090|± |0.0046|
|
|
|
|
|
|
|
50 |
|mmlu |N/A |none | 0|acc |0.2536|± |0.0483|
|
51 |
| - humanities |N/A |none | 5|acc |0.2408|± |0.0341|
|
52 |
| - other |N/A |none | 5|acc |0.2475|± |0.0443|
|
53 |
| - social_sciences|N/A |none | 5|acc |0.2567|± |0.0456|
|
54 |
| - stem |N/A |none | 5|acc |0.2756|± |0.0653|
|
|
|
|
|
|
|
55 |
|truthfulqa_mc2|Yaml |none | 0|acc |0.3909|± |0.0148|
|
|
|
|
|
|
|
56 |
|winogrande|Yaml |none | 5|acc |0.5107|± | 0.014|
|
|
|
|
|
|
|
57 |
|gsm8k|Yaml |get-answer| 5|exact_match| 0|± | 0|
|