taesiri commited on
Commit
a101f39
β€’
1 Parent(s): 3a79468
.gitattributes CHANGED
@@ -45,3 +45,39 @@ results/Claude-3-Haiku.csv filter=lfs diff=lfs merge=lfs -text
45
  results/Mixtral-8x7B-Instruct-0.1.csv filter=lfs diff=lfs merge=lfs -text
46
  results/CodeLlama-70B.csv filter=lfs diff=lfs merge=lfs -text
47
  results/Gemma-7B.csv filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  results/Mixtral-8x7B-Instruct-0.1.csv filter=lfs diff=lfs merge=lfs -text
46
  results/CodeLlama-70B.csv filter=lfs diff=lfs merge=lfs -text
47
  results/Gemma-7B.csv filter=lfs diff=lfs merge=lfs -text
48
+ results/Yi-34B-Chat.csv filter=lfs diff=lfs merge=lfs -text
49
+ results/Claude-3-Haiku.jpg filter=lfs diff=lfs merge=lfs -text
50
+ results/Gemma-7B.pkl filter=lfs diff=lfs merge=lfs -text
51
+ results/Llama-2-70b-chat-hf.pkl filter=lfs diff=lfs merge=lfs -text
52
+ results/Mistral-7B-Instruct-v0.2.png filter=lfs diff=lfs merge=lfs -text
53
+ results/Mixtral-8x7B-Instruct-0.1.png filter=lfs diff=lfs merge=lfs -text
54
+ results/Claude-3-Opus.jpg filter=lfs diff=lfs merge=lfs -text
55
+ results/CodeLlama-70B.pkl filter=lfs diff=lfs merge=lfs -text
56
+ results/Gemma-7B.jpg filter=lfs diff=lfs merge=lfs -text
57
+ results/Mistral-7B-Instruct-v0.2.pkl filter=lfs diff=lfs merge=lfs -text
58
+ results/Mixtral-8x7B-Instruct-0.1.jpg filter=lfs diff=lfs merge=lfs -text
59
+ results/Claude-3-Haiku.png filter=lfs diff=lfs merge=lfs -text
60
+ results/GPT-3.5-Turbo.jpg filter=lfs diff=lfs merge=lfs -text
61
+ results/GPT-3.5-Turbo.pkl filter=lfs diff=lfs merge=lfs -text
62
+ results/Qwen1.5-72B-Chat.jpg filter=lfs diff=lfs merge=lfs -text
63
+ results/StripedHyena-Nous-7B.pkl filter=lfs diff=lfs merge=lfs -text
64
+ results/Yi-34B-Chat.png filter=lfs diff=lfs merge=lfs -text
65
+ results/CodeLlama-70B.png filter=lfs diff=lfs merge=lfs -text
66
+ results/GPT-3.5-Turbo.png filter=lfs diff=lfs merge=lfs -text
67
+ results/Llama-2-70b-chat-hf.jpg filter=lfs diff=lfs merge=lfs -text
68
+ results/StripedHyena-Nous-7B.png filter=lfs diff=lfs merge=lfs -text
69
+ results/Llama-2-70b-chat-hf.png filter=lfs diff=lfs merge=lfs -text
70
+ results/Mistral-7B-Instruct-v0.2.jpg filter=lfs diff=lfs merge=lfs -text
71
+ results/StripedHyena-Nous-7B.jpg filter=lfs diff=lfs merge=lfs -text
72
+ results/Qwen1.5-72B-Chat.png filter=lfs diff=lfs merge=lfs -text
73
+ results/Yi-34B-Chat.pkl filter=lfs diff=lfs merge=lfs -text
74
+ results/CodeLlama-70B.jpg filter=lfs diff=lfs merge=lfs -text
75
+ results/GPT-4-0125-preview.jpg filter=lfs diff=lfs merge=lfs -text
76
+ results/Mixtral-8x7B-Instruct-0.1.pkl filter=lfs diff=lfs merge=lfs -text
77
+ results/Yi-34B-Chat.jpg filter=lfs diff=lfs merge=lfs -text
78
+ results/Claude-3-Haiku.pkl filter=lfs diff=lfs merge=lfs -text
79
+ results/Gemma-7B.png filter=lfs diff=lfs merge=lfs -text
80
+ results/Qwen1.5-72B-Chat.pkl filter=lfs diff=lfs merge=lfs -text
81
+ results/Claude-3-Opus.pkl filter=lfs diff=lfs merge=lfs -text
82
+ results/Claude-3-Opus.png filter=lfs diff=lfs merge=lfs -text
83
+ results/GPT-4-0125-preview.png filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -3,9 +3,9 @@ import pandas as pd
3
  from glob import glob
4
 
5
 
6
- csv_results = glob("results/*.csv")
7
  # load the csv files into a dict with keys being name of the file and values being the data
8
- data = {file: pd.read_csv(file) for file in csv_results}
9
 
10
 
11
  def calculate_accuracy(df):
@@ -41,7 +41,7 @@ for file, df in data.items():
41
  # Get the breakdown accuracy and round each value
42
  breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)]
43
  # Prepare the model name from the file name
44
- model_name = file.split("/")[-1].replace(".csv", "") # Corrected the file extension
45
  # Append the data to the list
46
  data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy)
47
 
@@ -61,10 +61,25 @@ accuracy_df.columns = headers_with_icons
61
  accuracy_df.sort_values(by="⭐ Overall", ascending=False, inplace=True)
62
 
63
 
 
 
 
 
 
 
 
 
 
64
  with gr.Blocks() as demo:
65
- gr.Markdown("# FSMBench Leaderboard")
66
  # add link to home page and dataset
67
 
68
  leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons)
69
 
 
 
 
 
 
 
70
  demo.launch()
 
3
  from glob import glob
4
 
5
 
6
+ csv_results = glob("results/*.pkl")
7
  # load the csv files into a dict with keys being name of the file and values being the data
8
+ data = {file: pd.read_pickle(file) for file in csv_results}
9
 
10
 
11
  def calculate_accuracy(df):
 
41
  # Get the breakdown accuracy and round each value
42
  breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)]
43
  # Prepare the model name from the file name
44
+ model_name = file.split("/")[-1].replace(".pkl", "") # Corrected the file extension
45
  # Append the data to the list
46
  data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy)
47
 
 
61
  accuracy_df.sort_values(by="⭐ Overall", ascending=False, inplace=True)
62
 
63
 
64
+ def load_heatmap(evt: gr.SelectData):
65
+ print(
66
+ f"You selected {evt.value} at row {evt.index[0]}, column {evt.index[1]} from the Dataframe."
67
+ )
68
+
69
+ heatmap_image = gr.Image(f"results/{evt.value}.jpg")
70
+ return heatmap_image
71
+
72
+
73
  with gr.Blocks() as demo:
74
+ gr.Markdown("# FSM Benchmark Leaderboard")
75
  # add link to home page and dataset
76
 
77
  leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons)
78
 
79
+ gr.Markdown("## Heatmap")
80
+
81
+ heatamp_image = gr.Image(label="", show_label=False)
82
+
83
+ leader_board.select(fn=load_heatmap, outputs=[heatamp_image])
84
+
85
  demo.launch()
results/{GPT-3.5-Turbo.csv β†’ Claude-3-Haiku.jpg} RENAMED
File without changes
results/{Claude-3-Haiku.csv β†’ Claude-3-Haiku.pkl} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45623535997485afdee5b0312f2b5fdcc26cf531fbb56b6c3af6e126dfbe7b0f
3
- size 19570166
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06b6c5cf244b982fcc45365fbeff804642130b80ec6f6f9b0cafcf69cb13e9fc
3
+ size 19540844
results/{Claude-3-Opus.csv β†’ Claude-3-Opus.jpg} RENAMED
File without changes
results/{CodeLlama-70B.csv β†’ Claude-3-Opus.pkl} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3726905a1656174f3c29edfced6f2eec63222f6be8965c0d970264901d8cfc75
3
- size 16476347
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:424425308bdce6ebf2b59825b1b8b2796421792c44dafae8f74df06a07f3ef19
3
+ size 20115450
results/CodeLlama-70B.jpg ADDED

Git LFS Details

  • SHA256: 7abcb23c529db6b65b212085ef2c777b6f1ad509eb4f3e909a03973db6e8f14a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.34 MB
results/CodeLlama-70B.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c0496d8536b01f37858e37e1b34eedd25efa1f205035868883bfcdc2ae6fb88
3
+ size 16436822
results/GPT-3.5-Turbo.jpg ADDED

Git LFS Details

  • SHA256: 61024aa36a2f5840f9b6b4121603f742e42d97499fe2488c8bce58aca9973110
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
results/GPT-3.5-Turbo.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8038b3c4d84ba654e8d2aebb41bd803efc0d477051a81f6fd2be95302e4a0c1d
3
+ size 9470933
results/GPT-4-0125-preview.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d4cbbacdff8172888d8d5e8917680f524d7cd73dcbcc7aa8d0e54c0246a752c
3
- size 18088521
 
 
 
 
results/GPT-4-0125-preview.jpg ADDED

Git LFS Details

  • SHA256: 14ad1f24a0557c34d2f7841ced2279897dbd06f1211afd8efc1bbf3e7ff976e5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.24 MB
results/Gemma-7B.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bdc088d6c7eb18257ac35c1d2b2ee9f9849a69950016f6e9a0bf04be48a5ae2
3
- size 12624700
 
 
 
 
results/Gemma-7B.jpg ADDED

Git LFS Details

  • SHA256: 802f9edb1b79dbaf3aa907ebc83c10b8a04a60a9fec7a781d8b12b3c3c2303c2
  • Pointer size: 132 Bytes
  • Size of remote file: 1.32 MB
results/Gemma-7B.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f02f45ce5d312717f22676deec9d4a321cce8b74321059620056f99064ee7a15
3
+ size 12654079
results/Llama-2-70b-chat-hf.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:42a31de917b05ed5405474a348d072426474a8fb2ce7ff462dbb121e25f4b6ad
3
- size 20760268
 
 
 
 
results/Llama-2-70b-chat-hf.jpg ADDED

Git LFS Details

  • SHA256: d92948e30dcbf7b15fcae057cc1aa59c561d003f70bd92ad5f760a829bdc34cf
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
results/Llama-2-70b-chat-hf.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7bfc053d1fcc58f5f22e03c813d02cd634235c44b9a351fe084d4a1f659186a
3
+ size 20685075
results/Mistral-7B-Instruct-v0.2.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:29ad4985661fc41e659a631fc74ba433cd08a571048f11436ccf87ff74f0db09
3
- size 27242025
 
 
 
 
results/Mistral-7B-Instruct-v0.2.jpg ADDED

Git LFS Details

  • SHA256: 69ad485f2ba53016c37890c304ce2cb13591c5892ca40dd6ad79e5dca7ae0ed0
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
results/Mistral-7B-Instruct-v0.2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99fa146558b73614994e83a667371795a7de461aca6e3580ebb72761fa1758bb
3
+ size 27226799
results/Mixtral-8x7B-Instruct-0.1.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a93e2b963a5ac8129b5284f3fd7987964ef96fa0e64194de704a3549c611de1f
3
- size 17978176
 
 
 
 
results/Mixtral-8x7B-Instruct-0.1.jpg ADDED

Git LFS Details

  • SHA256: b25140825099c0da671f95c86bf318d6ae4385361a35829eefe7c02b4b0ae720
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
results/Mixtral-8x7B-Instruct-0.1.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01909843a40e65ff7e340cedbad451ab2337c7423161a46342e83d384ec24162
3
+ size 17979541
results/Qwen1.5-72B-Chat.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ba395c0b55330f689827527831e57e50ae9d824b6635b2bb569713afcf26d4b
3
- size 14219193
 
 
 
 
results/Qwen1.5-72B-Chat.jpg ADDED

Git LFS Details

  • SHA256: 8603cbe86499194308a92f5528d2058b7ddf3acae80f189ad4f506d466b9e419
  • Pointer size: 132 Bytes
  • Size of remote file: 1.32 MB
results/Qwen1.5-72B-Chat.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec5494612aaef1b9ac05c580fdac67469fc7bb3129e66b80ab720afeb9c71f22
3
+ size 14196803
results/StripedHyena-Nous-7B.csv DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f662367ea0d33a368aaa7a72cfeed41d2f3dc05be6289a6fe485a028c7cb98d5
3
- size 29219512
 
 
 
 
results/StripedHyena-Nous-7B.jpg ADDED

Git LFS Details

  • SHA256: 247580203c4c2a61afce2c81dde0f919e2bb8743b830132551307ba6829afdf3
  • Pointer size: 132 Bytes
  • Size of remote file: 1.38 MB
results/StripedHyena-Nous-7B.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48481a400fcecb1ddc80f4ebbf4c235f6b645ab28ad57a72ed1688e3cf17c192
3
+ size 29177951
results/Yi-34B-Chat.jpg ADDED

Git LFS Details

  • SHA256: 6bebddad6a68daf263ee35cc2cb88f195f6a285ba2ad0c21bf35c1d8a1a716b8
  • Pointer size: 132 Bytes
  • Size of remote file: 1.33 MB
results/Yi-34B-Chat.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff7da8d16b7c70f06c2035d65039610b722b3b12cf64cb2f0efa7fcd41e1a82a
3
+ size 20489399