Update README.md
Browse files
README.md
CHANGED
@@ -37,4 +37,36 @@ widget:
|
|
37 |
example_title: Math Problem
|
38 |
- text: In the context of computer programming, an algorithm is
|
39 |
example_title: Algorithm Definition
|
40 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
example_title: Math Problem
|
38 |
- text: In the context of computer programming, an algorithm is
|
39 |
example_title: Algorithm Definition
|
40 |
+
---
|
41 |
+
|
42 |
+
|
43 |
+
|
44 |
+
| Tasks |Version|Filter|n-shot| Metric |Value | |Stderr|
|
45 |
+
|-------------|-------|------|-----:|--------|-----:|---|-----:|
|
46 |
+
|arc_challenge|Yaml |none | 25|acc |0.1937|± |0.0115|
|
47 |
+
| | |none | 25|acc_norm|0.2329|± |0.0124|
|
48 |
+
|
49 |
+
| Tasks |Version|Filter|n-shot| Metric |Value | |Stderr|
|
50 |
+
|---------|-------|------|-----:|--------|-----:|---|-----:|
|
51 |
+
|hellaswag|Yaml |none | 10|acc |0.2856|± |0.0045|
|
52 |
+
| | |none | 10|acc_norm|0.3090|± |0.0046|
|
53 |
+
|
54 |
+
| Groups |Version|Filter|n-shot|Metric|Value | |Stderr|
|
55 |
+
|------------------|-------|------|-----:|------|-----:|---|-----:|
|
56 |
+
|mmlu |N/A |none | 0|acc |0.2536|± |0.0483|
|
57 |
+
| - humanities |N/A |none | 5|acc |0.2408|± |0.0341|
|
58 |
+
| - other |N/A |none | 5|acc |0.2475|± |0.0443|
|
59 |
+
| - social_sciences|N/A |none | 5|acc |0.2567|± |0.0456|
|
60 |
+
| - stem |N/A |none | 5|acc |0.2756|± |0.0653|
|
61 |
+
|
62 |
+
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
63 |
+
|--------------|-------|------|-----:|------|-----:|---|-----:|
|
64 |
+
|truthfulqa_mc2|Yaml |none | 0|acc |0.3909|± |0.0148|
|
65 |
+
|
66 |
+
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
67 |
+
|----------|-------|------|-----:|------|-----:|---|-----:|
|
68 |
+
|winogrande|Yaml |none | 5|acc |0.5107|± | 0.014|
|
69 |
+
|
70 |
+
|Tasks|Version| Filter |n-shot| Metric |Value| |Stderr|
|
71 |
+
|-----|-------|----------|-----:|-----------|----:|---|-----:|
|
72 |
+
|gsm8k|Yaml |get-answer| 5|exact_match| 0|± | 0|
|