Spaces:
Runtime error
Runtime error
lixuejing
commited on
Commit
Β·
4ab9b19
1
Parent(s):
05d96a1
remove baseline_row
Browse files- src/display/utils.py +0 -25
src/display/utils.py
CHANGED
@@ -61,31 +61,6 @@ class EvalQueueColumn: # Queue column
|
|
61 |
|
62 |
## All the model information that we might need
|
63 |
|
64 |
-
# Average β¬οΈ human baseline is 0.897 (source: averaging human baselines below)
|
65 |
-
# ARC human baseline is 0.80 (source: https://lab42.global/arc/)
|
66 |
-
# HellaSwag human baseline is 0.95 (source: https://deepgram.com/learn/hellaswag-llm-benchmark-guide)
|
67 |
-
# MMLU human baseline is 0.898 (source: https://openreview.net/forum?id=d7KBjmI3GmQ)
|
68 |
-
# TruthfulQA human baseline is 0.94(source: https://arxiv.org/pdf/2109.07958.pdf)
|
69 |
-
# Winogrande: https://leaderboard.allenai.org/winogrande/submissions/public
|
70 |
-
# GSM8K: paper
|
71 |
-
# Define the human baselines
|
72 |
-
human_baseline_row = {
|
73 |
-
AutoEvalColumn.model.name: "<p>Human performance</p>",
|
74 |
-
AutoEvalColumn.revision.name: "N/A",
|
75 |
-
AutoEvalColumn.precision.name: None,
|
76 |
-
AutoEvalColumn.average.name: 92.75,
|
77 |
-
AutoEvalColumn.merged.name: False,
|
78 |
-
AutoEvalColumn.arc.name: 80.0,
|
79 |
-
AutoEvalColumn.hellaswag.name: 95.0,
|
80 |
-
AutoEvalColumn.mmlu.name: 89.8,
|
81 |
-
AutoEvalColumn.truthfulqa.name: 94.0,
|
82 |
-
AutoEvalColumn.winogrande.name: 94.0,
|
83 |
-
AutoEvalColumn.gsm8k.name: 100,
|
84 |
-
AutoEvalColumn.c_sem.name: 100,
|
85 |
-
AutoEvalColumn.dummy.name: "human_baseline",
|
86 |
-
AutoEvalColumn.model_type.name: "",
|
87 |
-
AutoEvalColumn.flagged.name: False,
|
88 |
-
}
|
89 |
|
90 |
@dataclass
|
91 |
class ModelDetails:
|
|
|
61 |
|
62 |
## All the model information that we might need
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
@dataclass
|
66 |
class ModelDetails:
|