Spaces:

xuejing2409
/

open_chinese_vlm

Runtime error

lixuejing commited on Jan 7

Commit

4ab9b19

1 Parent(s): 05d96a1

remove baseline_row

Files changed (1) hide show

src/display/utils.py CHANGED Viewed

@@ -61,31 +61,6 @@ class EvalQueueColumn:  # Queue column
 ## All the model information that we might need
-# Average ⬆️ human baseline is 0.897 (source: averaging human baselines below)
-# ARC human baseline is 0.80 (source: https://lab42.global/arc/)
-# HellaSwag human baseline is 0.95 (source: https://deepgram.com/learn/hellaswag-llm-benchmark-guide)
-# MMLU human baseline is 0.898 (source: https://openreview.net/forum?id=d7KBjmI3GmQ)
-# TruthfulQA human baseline is 0.94(source: https://arxiv.org/pdf/2109.07958.pdf)
-# Winogrande: https://leaderboard.allenai.org/winogrande/submissions/public
-# GSM8K: paper
-# Define the human baselines
-human_baseline_row = {
-    AutoEvalColumn.model.name: "<p>Human performance</p>",
-    AutoEvalColumn.revision.name: "N/A",
-    AutoEvalColumn.precision.name: None,
-    AutoEvalColumn.average.name: 92.75,
-    AutoEvalColumn.merged.name: False,
-    AutoEvalColumn.arc.name: 80.0,
-    AutoEvalColumn.hellaswag.name: 95.0,
-    AutoEvalColumn.mmlu.name: 89.8,
-    AutoEvalColumn.truthfulqa.name: 94.0,
-    AutoEvalColumn.winogrande.name: 94.0,
-    AutoEvalColumn.gsm8k.name: 100,
-    AutoEvalColumn.c_sem.name: 100,
-    AutoEvalColumn.dummy.name: "human_baseline",
-    AutoEvalColumn.model_type.name: "",
-    AutoEvalColumn.flagged.name: False,
-}
 @dataclass
 class ModelDetails:

 ## All the model information that we might need
 @dataclass
 class ModelDetails: