Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# Import necessary libraries
|
2 |
import streamlit as st
|
3 |
import pandas as pd
|
4 |
from huggingface_hub import HfApi
|
@@ -6,8 +5,8 @@ from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
|
|
6 |
from itertools import combinations
|
7 |
import re
|
8 |
from functools import cache
|
9 |
-
from io import StringIO
|
10 |
-
|
11 |
@cache
|
12 |
def cached_model_info(api, model):
|
13 |
try:
|
@@ -15,32 +14,29 @@ def cached_model_info(api, model):
|
|
15 |
except (RepositoryNotFoundError, RevisionNotFoundError):
|
16 |
return None
|
17 |
|
18 |
-
# Convert markdown table to DataFrame and extract Hugging Face URLs
|
19 |
def convert_markdown_table_to_dataframe(md_content):
|
20 |
cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
|
21 |
-
# Use StringIO for reading the cleaned_content
|
22 |
df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python', skipinitialspace=True)
|
23 |
-
# Skip rows if needed or directly process without dropping
|
24 |
df.columns = df.columns.str.strip()
|
|
|
25 |
model_link_pattern = r'\[(.*?)\]\((.*?)\)\s*\[.*?\]\(.*?\)'
|
26 |
-
|
27 |
-
df['
|
|
|
28 |
return df
|
29 |
|
30 |
-
# Function to get and update model info in the DataFrame
|
31 |
def get_and_update_model_info(df):
|
32 |
api = HfApi()
|
33 |
for index, row in df.iterrows():
|
34 |
model_info = cached_model_info(api, row['Model'].strip())
|
35 |
if model_info:
|
36 |
-
df.
|
37 |
-
df.
|
38 |
else:
|
39 |
-
df.
|
40 |
-
df.
|
41 |
return df
|
42 |
|
43 |
-
# Calculate the highest combined score for a given column
|
44 |
def calculate_highest_combined_score(data, column):
|
45 |
scores = data[column].dropna().tolist()
|
46 |
models = data['Model'].tolist()
|
@@ -53,7 +49,6 @@ def calculate_highest_combined_score(data, column):
|
|
53 |
top_combinations[r] = top_combinations[r][:3]
|
54 |
return column, top_combinations
|
55 |
|
56 |
-
# Display the results of the highest combined scores
|
57 |
def display_highest_combined_scores(data, score_columns):
|
58 |
for column in score_columns:
|
59 |
if column in data.columns:
|
@@ -93,21 +88,14 @@ def create_bar_chart(df, category):
|
|
93 |
def main():
|
94 |
st.title("Model Leaderboard")
|
95 |
st.markdown("Displaying top combinations of models based on scores.")
|
96 |
-
|
97 |
-
|
98 |
-
content = """Your markdown table content here"""
|
99 |
-
|
100 |
if content:
|
101 |
df = convert_markdown_table_to_dataframe(content)
|
102 |
df = get_and_update_model_info(df)
|
103 |
-
|
104 |
-
# Assuming your DataFrame has these score columns already or you've added them
|
105 |
score_columns = ['Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']
|
106 |
-
|
107 |
-
# Ensure the score columns are numeric and handle NaNs or conversion issues
|
108 |
for col in score_columns:
|
109 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
110 |
-
|
111 |
display_highest_combined_scores(df, score_columns)
|
112 |
|
113 |
# Create tabs for leaderboard and about section
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
from huggingface_hub import HfApi
|
|
|
5 |
from itertools import combinations
|
6 |
import re
|
7 |
from functools import cache
|
8 |
+
from io import StringIO
|
9 |
+
|
10 |
@cache
|
11 |
def cached_model_info(api, model):
|
12 |
try:
|
|
|
14 |
except (RepositoryNotFoundError, RevisionNotFoundError):
|
15 |
return None
|
16 |
|
|
|
17 |
def convert_markdown_table_to_dataframe(md_content):
|
18 |
cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
|
|
|
19 |
df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python', skipinitialspace=True)
|
|
|
20 |
df.columns = df.columns.str.strip()
|
21 |
+
|
22 |
model_link_pattern = r'\[(.*?)\]\((.*?)\)\s*\[.*?\]\(.*?\)'
|
23 |
+
# Correctly process 'Model' column to extract URLs and model names
|
24 |
+
df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
|
25 |
+
df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
|
26 |
return df
|
27 |
|
|
|
28 |
def get_and_update_model_info(df):
|
29 |
api = HfApi()
|
30 |
for index, row in df.iterrows():
|
31 |
model_info = cached_model_info(api, row['Model'].strip())
|
32 |
if model_info:
|
33 |
+
df.at[index, 'Likes'] = model_info.likes
|
34 |
+
df.at[index, 'Tags'] = ', '.join(model_info.tags)
|
35 |
else:
|
36 |
+
df.at[index, 'Likes'] = -1
|
37 |
+
df.at[index, 'Tags'] = ''
|
38 |
return df
|
39 |
|
|
|
40 |
def calculate_highest_combined_score(data, column):
|
41 |
scores = data[column].dropna().tolist()
|
42 |
models = data['Model'].tolist()
|
|
|
49 |
top_combinations[r] = top_combinations[r][:3]
|
50 |
return column, top_combinations
|
51 |
|
|
|
52 |
def display_highest_combined_scores(data, score_columns):
|
53 |
for column in score_columns:
|
54 |
if column in data.columns:
|
|
|
88 |
def main():
|
89 |
st.title("Model Leaderboard")
|
90 |
st.markdown("Displaying top combinations of models based on scores.")
|
91 |
+
# Placeholder for actual markdown content
|
92 |
+
content = """Your markdown content here"""
|
|
|
|
|
93 |
if content:
|
94 |
df = convert_markdown_table_to_dataframe(content)
|
95 |
df = get_and_update_model_info(df)
|
|
|
|
|
96 |
score_columns = ['Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']
|
|
|
|
|
97 |
for col in score_columns:
|
98 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
|
99 |
display_highest_combined_scores(df, score_columns)
|
100 |
|
101 |
# Create tabs for leaderboard and about section
|