Spaces:
Sleeping
Sleeping
update
Browse files- .gitattributes +36 -0
- app.py +19 -4
- results/{GPT-3.5-Turbo.csv β Claude-3-Haiku.jpg} +2 -2
- results/{Claude-3-Haiku.csv β Claude-3-Haiku.pkl} +2 -2
- results/{Claude-3-Opus.csv β Claude-3-Opus.jpg} +2 -2
- results/{CodeLlama-70B.csv β Claude-3-Opus.pkl} +2 -2
- results/CodeLlama-70B.jpg +3 -0
- results/CodeLlama-70B.pkl +3 -0
- results/GPT-3.5-Turbo.jpg +3 -0
- results/GPT-3.5-Turbo.pkl +3 -0
- results/GPT-4-0125-preview.csv +0 -3
- results/GPT-4-0125-preview.jpg +3 -0
- results/Gemma-7B.csv +0 -3
- results/Gemma-7B.jpg +3 -0
- results/Gemma-7B.pkl +3 -0
- results/Llama-2-70b-chat-hf.csv +0 -3
- results/Llama-2-70b-chat-hf.jpg +3 -0
- results/Llama-2-70b-chat-hf.pkl +3 -0
- results/Mistral-7B-Instruct-v0.2.csv +0 -3
- results/Mistral-7B-Instruct-v0.2.jpg +3 -0
- results/Mistral-7B-Instruct-v0.2.pkl +3 -0
- results/Mixtral-8x7B-Instruct-0.1.csv +0 -3
- results/Mixtral-8x7B-Instruct-0.1.jpg +3 -0
- results/Mixtral-8x7B-Instruct-0.1.pkl +3 -0
- results/Qwen1.5-72B-Chat.csv +0 -3
- results/Qwen1.5-72B-Chat.jpg +3 -0
- results/Qwen1.5-72B-Chat.pkl +3 -0
- results/StripedHyena-Nous-7B.csv +0 -3
- results/StripedHyena-Nous-7B.jpg +3 -0
- results/StripedHyena-Nous-7B.pkl +3 -0
- results/Yi-34B-Chat.jpg +3 -0
- results/Yi-34B-Chat.pkl +3 -0
.gitattributes
CHANGED
@@ -45,3 +45,39 @@ results/Claude-3-Haiku.csv filter=lfs diff=lfs merge=lfs -text
|
|
45 |
results/Mixtral-8x7B-Instruct-0.1.csv filter=lfs diff=lfs merge=lfs -text
|
46 |
results/CodeLlama-70B.csv filter=lfs diff=lfs merge=lfs -text
|
47 |
results/Gemma-7B.csv filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
results/Mixtral-8x7B-Instruct-0.1.csv filter=lfs diff=lfs merge=lfs -text
|
46 |
results/CodeLlama-70B.csv filter=lfs diff=lfs merge=lfs -text
|
47 |
results/Gemma-7B.csv filter=lfs diff=lfs merge=lfs -text
|
48 |
+
results/Yi-34B-Chat.csv filter=lfs diff=lfs merge=lfs -text
|
49 |
+
results/Claude-3-Haiku.jpg filter=lfs diff=lfs merge=lfs -text
|
50 |
+
results/Gemma-7B.pkl filter=lfs diff=lfs merge=lfs -text
|
51 |
+
results/Llama-2-70b-chat-hf.pkl filter=lfs diff=lfs merge=lfs -text
|
52 |
+
results/Mistral-7B-Instruct-v0.2.png filter=lfs diff=lfs merge=lfs -text
|
53 |
+
results/Mixtral-8x7B-Instruct-0.1.png filter=lfs diff=lfs merge=lfs -text
|
54 |
+
results/Claude-3-Opus.jpg filter=lfs diff=lfs merge=lfs -text
|
55 |
+
results/CodeLlama-70B.pkl filter=lfs diff=lfs merge=lfs -text
|
56 |
+
results/Gemma-7B.jpg filter=lfs diff=lfs merge=lfs -text
|
57 |
+
results/Mistral-7B-Instruct-v0.2.pkl filter=lfs diff=lfs merge=lfs -text
|
58 |
+
results/Mixtral-8x7B-Instruct-0.1.jpg filter=lfs diff=lfs merge=lfs -text
|
59 |
+
results/Claude-3-Haiku.png filter=lfs diff=lfs merge=lfs -text
|
60 |
+
results/GPT-3.5-Turbo.jpg filter=lfs diff=lfs merge=lfs -text
|
61 |
+
results/GPT-3.5-Turbo.pkl filter=lfs diff=lfs merge=lfs -text
|
62 |
+
results/Qwen1.5-72B-Chat.jpg filter=lfs diff=lfs merge=lfs -text
|
63 |
+
results/StripedHyena-Nous-7B.pkl filter=lfs diff=lfs merge=lfs -text
|
64 |
+
results/Yi-34B-Chat.png filter=lfs diff=lfs merge=lfs -text
|
65 |
+
results/CodeLlama-70B.png filter=lfs diff=lfs merge=lfs -text
|
66 |
+
results/GPT-3.5-Turbo.png filter=lfs diff=lfs merge=lfs -text
|
67 |
+
results/Llama-2-70b-chat-hf.jpg filter=lfs diff=lfs merge=lfs -text
|
68 |
+
results/StripedHyena-Nous-7B.png filter=lfs diff=lfs merge=lfs -text
|
69 |
+
results/Llama-2-70b-chat-hf.png filter=lfs diff=lfs merge=lfs -text
|
70 |
+
results/Mistral-7B-Instruct-v0.2.jpg filter=lfs diff=lfs merge=lfs -text
|
71 |
+
results/StripedHyena-Nous-7B.jpg filter=lfs diff=lfs merge=lfs -text
|
72 |
+
results/Qwen1.5-72B-Chat.png filter=lfs diff=lfs merge=lfs -text
|
73 |
+
results/Yi-34B-Chat.pkl filter=lfs diff=lfs merge=lfs -text
|
74 |
+
results/CodeLlama-70B.jpg filter=lfs diff=lfs merge=lfs -text
|
75 |
+
results/GPT-4-0125-preview.jpg filter=lfs diff=lfs merge=lfs -text
|
76 |
+
results/Mixtral-8x7B-Instruct-0.1.pkl filter=lfs diff=lfs merge=lfs -text
|
77 |
+
results/Yi-34B-Chat.jpg filter=lfs diff=lfs merge=lfs -text
|
78 |
+
results/Claude-3-Haiku.pkl filter=lfs diff=lfs merge=lfs -text
|
79 |
+
results/Gemma-7B.png filter=lfs diff=lfs merge=lfs -text
|
80 |
+
results/Qwen1.5-72B-Chat.pkl filter=lfs diff=lfs merge=lfs -text
|
81 |
+
results/Claude-3-Opus.pkl filter=lfs diff=lfs merge=lfs -text
|
82 |
+
results/Claude-3-Opus.png filter=lfs diff=lfs merge=lfs -text
|
83 |
+
results/GPT-4-0125-preview.png filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -3,9 +3,9 @@ import pandas as pd
|
|
3 |
from glob import glob
|
4 |
|
5 |
|
6 |
-
csv_results = glob("results/*.
|
7 |
# load the csv files into a dict with keys being name of the file and values being the data
|
8 |
-
data = {file: pd.
|
9 |
|
10 |
|
11 |
def calculate_accuracy(df):
|
@@ -41,7 +41,7 @@ for file, df in data.items():
|
|
41 |
# Get the breakdown accuracy and round each value
|
42 |
breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)]
|
43 |
# Prepare the model name from the file name
|
44 |
-
model_name = file.split("/")[-1].replace(".
|
45 |
# Append the data to the list
|
46 |
data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy)
|
47 |
|
@@ -61,10 +61,25 @@ accuracy_df.columns = headers_with_icons
|
|
61 |
accuracy_df.sort_values(by="β Overall", ascending=False, inplace=True)
|
62 |
|
63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
with gr.Blocks() as demo:
|
65 |
-
gr.Markdown("#
|
66 |
# add link to home page and dataset
|
67 |
|
68 |
leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons)
|
69 |
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
demo.launch()
|
|
|
3 |
from glob import glob
|
4 |
|
5 |
|
6 |
+
csv_results = glob("results/*.pkl")
|
7 |
# load the csv files into a dict with keys being name of the file and values being the data
|
8 |
+
data = {file: pd.read_pickle(file) for file in csv_results}
|
9 |
|
10 |
|
11 |
def calculate_accuracy(df):
|
|
|
41 |
# Get the breakdown accuracy and round each value
|
42 |
breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)]
|
43 |
# Prepare the model name from the file name
|
44 |
+
model_name = file.split("/")[-1].replace(".pkl", "") # Corrected the file extension
|
45 |
# Append the data to the list
|
46 |
data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy)
|
47 |
|
|
|
61 |
accuracy_df.sort_values(by="β Overall", ascending=False, inplace=True)
|
62 |
|
63 |
|
64 |
+
def load_heatmap(evt: gr.SelectData):
|
65 |
+
print(
|
66 |
+
f"You selected {evt.value} at row {evt.index[0]}, column {evt.index[1]} from the Dataframe."
|
67 |
+
)
|
68 |
+
|
69 |
+
heatmap_image = gr.Image(f"results/{evt.value}.jpg")
|
70 |
+
return heatmap_image
|
71 |
+
|
72 |
+
|
73 |
with gr.Blocks() as demo:
|
74 |
+
gr.Markdown("# FSM Benchmark Leaderboard")
|
75 |
# add link to home page and dataset
|
76 |
|
77 |
leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons)
|
78 |
|
79 |
+
gr.Markdown("## Heatmap")
|
80 |
+
|
81 |
+
heatamp_image = gr.Image(label="", show_label=False)
|
82 |
+
|
83 |
+
leader_board.select(fn=load_heatmap, outputs=[heatamp_image])
|
84 |
+
|
85 |
demo.launch()
|
results/{GPT-3.5-Turbo.csv β Claude-3-Haiku.jpg}
RENAMED
File without changes
|
results/{Claude-3-Haiku.csv β Claude-3-Haiku.pkl}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06b6c5cf244b982fcc45365fbeff804642130b80ec6f6f9b0cafcf69cb13e9fc
|
3 |
+
size 19540844
|
results/{Claude-3-Opus.csv β Claude-3-Opus.jpg}
RENAMED
File without changes
|
results/{CodeLlama-70B.csv β Claude-3-Opus.pkl}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:424425308bdce6ebf2b59825b1b8b2796421792c44dafae8f74df06a07f3ef19
|
3 |
+
size 20115450
|
results/CodeLlama-70B.jpg
ADDED
Git LFS Details
|
results/CodeLlama-70B.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c0496d8536b01f37858e37e1b34eedd25efa1f205035868883bfcdc2ae6fb88
|
3 |
+
size 16436822
|
results/GPT-3.5-Turbo.jpg
ADDED
Git LFS Details
|
results/GPT-3.5-Turbo.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8038b3c4d84ba654e8d2aebb41bd803efc0d477051a81f6fd2be95302e4a0c1d
|
3 |
+
size 9470933
|
results/GPT-4-0125-preview.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:2d4cbbacdff8172888d8d5e8917680f524d7cd73dcbcc7aa8d0e54c0246a752c
|
3 |
-
size 18088521
|
|
|
|
|
|
|
|
results/GPT-4-0125-preview.jpg
ADDED
Git LFS Details
|
results/Gemma-7B.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:3bdc088d6c7eb18257ac35c1d2b2ee9f9849a69950016f6e9a0bf04be48a5ae2
|
3 |
-
size 12624700
|
|
|
|
|
|
|
|
results/Gemma-7B.jpg
ADDED
Git LFS Details
|
results/Gemma-7B.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f02f45ce5d312717f22676deec9d4a321cce8b74321059620056f99064ee7a15
|
3 |
+
size 12654079
|
results/Llama-2-70b-chat-hf.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:42a31de917b05ed5405474a348d072426474a8fb2ce7ff462dbb121e25f4b6ad
|
3 |
-
size 20760268
|
|
|
|
|
|
|
|
results/Llama-2-70b-chat-hf.jpg
ADDED
Git LFS Details
|
results/Llama-2-70b-chat-hf.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7bfc053d1fcc58f5f22e03c813d02cd634235c44b9a351fe084d4a1f659186a
|
3 |
+
size 20685075
|
results/Mistral-7B-Instruct-v0.2.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:29ad4985661fc41e659a631fc74ba433cd08a571048f11436ccf87ff74f0db09
|
3 |
-
size 27242025
|
|
|
|
|
|
|
|
results/Mistral-7B-Instruct-v0.2.jpg
ADDED
Git LFS Details
|
results/Mistral-7B-Instruct-v0.2.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99fa146558b73614994e83a667371795a7de461aca6e3580ebb72761fa1758bb
|
3 |
+
size 27226799
|
results/Mixtral-8x7B-Instruct-0.1.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a93e2b963a5ac8129b5284f3fd7987964ef96fa0e64194de704a3549c611de1f
|
3 |
-
size 17978176
|
|
|
|
|
|
|
|
results/Mixtral-8x7B-Instruct-0.1.jpg
ADDED
Git LFS Details
|
results/Mixtral-8x7B-Instruct-0.1.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:01909843a40e65ff7e340cedbad451ab2337c7423161a46342e83d384ec24162
|
3 |
+
size 17979541
|
results/Qwen1.5-72B-Chat.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0ba395c0b55330f689827527831e57e50ae9d824b6635b2bb569713afcf26d4b
|
3 |
-
size 14219193
|
|
|
|
|
|
|
|
results/Qwen1.5-72B-Chat.jpg
ADDED
Git LFS Details
|
results/Qwen1.5-72B-Chat.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec5494612aaef1b9ac05c580fdac67469fc7bb3129e66b80ab720afeb9c71f22
|
3 |
+
size 14196803
|
results/StripedHyena-Nous-7B.csv
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:f662367ea0d33a368aaa7a72cfeed41d2f3dc05be6289a6fe485a028c7cb98d5
|
3 |
-
size 29219512
|
|
|
|
|
|
|
|
results/StripedHyena-Nous-7B.jpg
ADDED
Git LFS Details
|
results/StripedHyena-Nous-7B.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48481a400fcecb1ddc80f4ebbf4c235f6b645ab28ad57a72ed1688e3cf17c192
|
3 |
+
size 29177951
|
results/Yi-34B-Chat.jpg
ADDED
Git LFS Details
|
results/Yi-34B-Chat.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff7da8d16b7c70f06c2035d65039610b722b3b12cf64cb2f0efa7fcd41e1a82a
|
3 |
+
size 20489399
|