Alt_LLM_LeaderBoard

Running

App Files Files Community

CultriX commited on Feb 14, 2024

Commit

bc527a4

verified ·

1 Parent(s): 03d3ca3

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -24

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# Import necessary libraries
 import streamlit as st
 import pandas as pd
 from huggingface_hub import HfApi
@@ -6,8 +5,8 @@ from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
 from itertools import combinations
 import re
 from functools import cache
-from io import StringIO  # Corrected import for StringIO
-# Define function to cache model info from Hugging Face API
 @cache
 def cached_model_info(api, model):
     try:
@@ -15,32 +14,29 @@ def cached_model_info(api, model):
     except (RepositoryNotFoundError, RevisionNotFoundError):
         return None
-# Convert markdown table to DataFrame and extract Hugging Face URLs
 def convert_markdown_table_to_dataframe(md_content):
     cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
-    # Use StringIO for reading the cleaned_content
     df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python', skipinitialspace=True)
-    # Skip rows if needed or directly process without dropping
     df.columns = df.columns.str.strip()
     model_link_pattern = r'\[(.*?)\]\((.*?)\)\s*\[.*?\]\(.*?\)'
-    df['URL'] = df.apply(lambda x: re.search(model_link_pattern, x['Model']).group(2) if re.search(model_link_pattern, x['Model']) else None, axis=1)
-    df['Model'] = df.apply(lambda x: re.sub(model_link_pattern, r'\1', x['Model']), axis=1)
     return df
-# Function to get and update model info in the DataFrame
 def get_and_update_model_info(df):
     api = HfApi()
     for index, row in df.iterrows():
         model_info = cached_model_info(api, row['Model'].strip())
         if model_info:
-            df.loc[index, 'Likes'] = model_info.likes
-            df.loc[index, 'Tags'] = ', '.join(model_info.tags)
         else:
-            df.loc[index, 'Likes'] = -1  # Indicates missing info
-            df.loc[index, 'Tags'] = ''
     return df
-# Calculate the highest combined score for a given column
 def calculate_highest_combined_score(data, column):
     scores = data[column].dropna().tolist()
     models = data['Model'].tolist()
@@ -53,7 +49,6 @@ def calculate_highest_combined_score(data, column):
         top_combinations[r] = top_combinations[r][:3]
     return column, top_combinations
-# Display the results of the highest combined scores
 def display_highest_combined_scores(data, score_columns):
     for column in score_columns:
         if column in data.columns:
@@ -93,21 +88,14 @@ def create_bar_chart(df, category):
 def main():
     st.title("Model Leaderboard")
     st.markdown("Displaying top combinations of models based on scores.")
-    # Placeholder content - ensure you replace this with your actual markdown or method to fetch/create content
-    content = """Your markdown table content here"""
     if content:
         df = convert_markdown_table_to_dataframe(content)
         df = get_and_update_model_info(df)
-        # Assuming your DataFrame has these score columns already or you've added them
         score_columns = ['Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']
-        # Ensure the score columns are numeric and handle NaNs or conversion issues
         for col in score_columns:
             df[col] = pd.to_numeric(df[col], errors='coerce')
         display_highest_combined_scores(df, score_columns)
     # Create tabs for leaderboard and about section

 import streamlit as st
 import pandas as pd
 from huggingface_hub import HfApi
 from itertools import combinations
 import re
 from functools import cache
+from io import StringIO
 @cache
 def cached_model_info(api, model):
     try:
     except (RepositoryNotFoundError, RevisionNotFoundError):
         return None
 def convert_markdown_table_to_dataframe(md_content):
     cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
     df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python', skipinitialspace=True)
     df.columns = df.columns.str.strip()
     model_link_pattern = r'\[(.*?)\]\((.*?)\)\s*\[.*?\]\(.*?\)'
+    # Correctly process 'Model' column to extract URLs and model names
+    df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
+    df['Model'] = df['Model'].apply(lambda x: re.sub(model_link_pattern, r'\1', x))
     return df
 def get_and_update_model_info(df):
     api = HfApi()
     for index, row in df.iterrows():
         model_info = cached_model_info(api, row['Model'].strip())
         if model_info:
+            df.at[index, 'Likes'] = model_info.likes
+            df.at[index, 'Tags'] = ', '.join(model_info.tags)
         else:
+            df.at[index, 'Likes'] = -1
+            df.at[index, 'Tags'] = ''
     return df
 def calculate_highest_combined_score(data, column):
     scores = data[column].dropna().tolist()
     models = data['Model'].tolist()
         top_combinations[r] = top_combinations[r][:3]
     return column, top_combinations
 def display_highest_combined_scores(data, score_columns):
     for column in score_columns:
         if column in data.columns:
 def main():
     st.title("Model Leaderboard")
     st.markdown("Displaying top combinations of models based on scores.")
+    # Placeholder for actual markdown content
+    content = """Your markdown content here"""
     if content:
         df = convert_markdown_table_to_dataframe(content)
         df = get_and_update_model_info(df)
         score_columns = ['Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']
         for col in score_columns:
             df[col] = pd.to_numeric(df[col], errors='coerce')
         display_highest_combined_scores(df, score_columns)
     # Create tabs for leaderboard and about section