CultriX commited on
Commit
c52658c
·
verified ·
1 Parent(s): 77a90c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -81
app.py CHANGED
@@ -1,30 +1,13 @@
1
- # Importing necessary libraries
2
- import re
3
- import streamlit as st
4
- import requests
5
- import pandas as pd
6
- from io import StringIO
7
- import plotly.graph_objs as go
8
- from huggingface_hub import HfApi
9
- from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
10
- from yall import create_yall
11
- from functools import cache
12
-
13
-
14
- # Importing necessary libraries
15
  import streamlit as st
16
  import pandas as pd
17
- from io import StringIO
18
- import plotly.graph_objs as go
19
  from huggingface_hub import HfApi
20
  from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
21
  from itertools import combinations
22
- import time
23
- from collections import Counter
24
  import re
25
  from functools import cache
26
 
27
- # Function to get model info from Hugging Face API using caching
28
  @cache
29
  def cached_model_info(api, model):
30
  try:
@@ -32,11 +15,11 @@ def cached_model_info(api, model):
32
  except (RepositoryNotFoundError, RevisionNotFoundError):
33
  return None
34
 
35
- # Function to convert markdown table to DataFrame and extract Hugging Face URLs
36
  def convert_markdown_table_to_dataframe(md_content):
37
  cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
38
- df = pd.read_csv(StringIO(cleaned_content), sep="\|", engine='python')
39
- df = df.drop(0, axis=0)
40
  df.columns = df.columns.str.strip()
41
  model_link_pattern = r'\[(.*?)\]\((.*?)\)\s*\[.*?\]\(.*?\)'
42
  df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
@@ -52,29 +35,42 @@ def get_and_update_model_info(df):
52
  df.loc[index, 'Likes'] = model_info.likes
53
  df.loc[index, 'Tags'] = ', '.join(model_info.tags)
54
  else:
55
- df.loc[index, 'Likes'] = -1
56
  df.loc[index, 'Tags'] = ''
57
  return df
58
 
 
 
 
 
 
 
 
 
 
 
59
  # Define the score columns
60
  score_columns = ['Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']
61
 
62
- # Function to calculate the highest combined score for a given column
 
 
63
  def calculate_highest_combined_score(data, column):
64
- scores = data[column].dropna().tolist() # Ensure to drop NaN values to avoid calculation errors
65
- models = data['Model'].dropna().tolist()
66
- top_combinations = {2: [], 3: [], 4: [], 5: [], 6: []}
67
  for r in range(2, 7):
68
  for combination in combinations(zip(scores, models), r):
69
  combined_score = sum(score for score, _ in combination)
70
  top_combinations[r].append((combined_score, tuple(model for _, model in combination)))
71
- top_combinations[r] = sorted(top_combinations[r], key=lambda x: x[0], reverse=True)[:3]
 
72
  return column, top_combinations
73
 
74
- # Function to display the results of the highest combined scores
75
- def display_highest_combined_scores(data):
76
  for column in score_columns:
77
- if column in data:
78
  _, top_combinations = calculate_highest_combined_score(data, column)
79
  st.subheader(f"Top Combinations for {column}")
80
  for r, combinations in top_combinations.items():
@@ -83,30 +79,6 @@ def display_highest_combined_scores(data):
83
  st.write(f"Score: {score}, Models: {', '.join(combination)}")
84
 
85
 
86
-
87
- # Function to get model info from DataFrame and update it with likes and tags
88
- @st.cache
89
- def get_model_info(df):
90
- api = HfApi()
91
-
92
- for index, row in df.iterrows():
93
- model_info = cached_model_info(api, row['Model'].strip())
94
- if model_info:
95
- df.loc[index, 'Likes'] = model_info.likes
96
- df.loc[index, 'Tags'] = ', '.join(model_info.tags)
97
- else:
98
- df.loc[index, 'Likes'] = -1
99
- df.loc[index, 'Tags'] = ''
100
- return df
101
-
102
- # Function to get model info from Hugging Face API using caching
103
- @cache
104
- def cached_model_info(api, model):
105
- try:
106
- return api.model_info(repo_id=str(model))
107
- except (RepositoryNotFoundError, RevisionNotFoundError):
108
- return None
109
-
110
  # Function to calculate the highest combined score for a given column
111
  def calculate_highest_combined_score(data, column):
112
  scores = data[column].tolist()
@@ -119,32 +91,6 @@ def calculate_highest_combined_score(data, column):
119
  top_combinations[r] = sorted(top_combinations[r], key=lambda x: x[0], reverse=True)[:3]
120
  return column, top_combinations
121
 
122
-
123
- # Function to create and display charts (existing functions can be reused or modified as needed)
124
-
125
-
126
- @st.cache_data
127
- def get_model_info(df):
128
- api = HfApi()
129
-
130
- # Initialize new columns for likes and tags
131
- df['Likes'] = None
132
- df['Tags'] = None
133
-
134
- # Iterate through DataFrame rows
135
- for index, row in df.iterrows():
136
- model = row['Model'].strip()
137
- try:
138
- model_info = api.model_info(repo_id=str(model))
139
- df.loc[index, 'Likes'] = model_info.likes
140
- df.loc[index, 'Tags'] = ', '.join(model_info.tags)
141
-
142
- except (RepositoryNotFoundError, RevisionNotFoundError):
143
- df.loc[index, 'Likes'] = -1
144
- df.loc[index, 'Tags'] = ''
145
-
146
- return df
147
-
148
  # Function to create bar chart for a given category
149
  def create_bar_chart(df, category):
150
  """Create and display a bar chart for a given category."""
 
1
+ # Import necessary libraries
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import streamlit as st
3
  import pandas as pd
 
 
4
  from huggingface_hub import HfApi
5
  from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError
6
  from itertools import combinations
 
 
7
  import re
8
  from functools import cache
9
 
10
+ # Define function to cache model info from Hugging Face API
11
  @cache
12
  def cached_model_info(api, model):
13
  try:
 
15
  except (RepositoryNotFoundError, RevisionNotFoundError):
16
  return None
17
 
18
+ # Convert markdown table to DataFrame and extract Hugging Face URLs
19
  def convert_markdown_table_to_dataframe(md_content):
20
  cleaned_content = re.sub(r'\|\s*$', '', re.sub(r'^\|\s*', '', md_content, flags=re.MULTILINE), flags=re.MULTILINE)
21
+ df = pd.read_csv(pd.compat.StringIO(cleaned_content), sep="\|", engine='python')
22
+ df = df.drop(0).reset_index(drop=True)
23
  df.columns = df.columns.str.strip()
24
  model_link_pattern = r'\[(.*?)\]\((.*?)\)\s*\[.*?\]\(.*?\)'
25
  df['URL'] = df['Model'].apply(lambda x: re.search(model_link_pattern, x).group(2) if re.search(model_link_pattern, x) else None)
 
35
  df.loc[index, 'Likes'] = model_info.likes
36
  df.loc[index, 'Tags'] = ', '.join(model_info.tags)
37
  else:
38
+ df.loc[index, 'Likes'] = -1 # Indicates missing info
39
  df.loc[index, 'Tags'] = ''
40
  return df
41
 
42
+
43
+
44
+ # Function to get model info from Hugging Face API using caching
45
+ @cache
46
+ def cached_model_info(api, model):
47
+ try:
48
+ return api.model_info(repo_id=str(model))
49
+ except (RepositoryNotFoundError, RevisionNotFoundError):
50
+ return None
51
+
52
  # Define the score columns
53
  score_columns = ['Average', 'AGIEval', 'GPT4All', 'TruthfulQA', 'Bigbench']
54
 
55
+
56
+
57
+ # Calculate the highest combined score for a given column
58
  def calculate_highest_combined_score(data, column):
59
+ scores = data[column].dropna().tolist()
60
+ models = data['Model'].tolist()
61
+ top_combinations = {r: [] for r in range(2, 7)}
62
  for r in range(2, 7):
63
  for combination in combinations(zip(scores, models), r):
64
  combined_score = sum(score for score, _ in combination)
65
  top_combinations[r].append((combined_score, tuple(model for _, model in combination)))
66
+ top_combinations[r].sort(key=lambda x: x[0], reverse=True)
67
+ top_combinations[r] = top_combinations[r][:3]
68
  return column, top_combinations
69
 
70
+ # Display the results of the highest combined scores
71
+ def display_highest_combined_scores(data, score_columns):
72
  for column in score_columns:
73
+ if column in data.columns:
74
  _, top_combinations = calculate_highest_combined_score(data, column)
75
  st.subheader(f"Top Combinations for {column}")
76
  for r, combinations in top_combinations.items():
 
79
  st.write(f"Score: {score}, Models: {', '.join(combination)}")
80
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  # Function to calculate the highest combined score for a given column
83
  def calculate_highest_combined_score(data, column):
84
  scores = data[column].tolist()
 
91
  top_combinations[r] = sorted(top_combinations[r], key=lambda x: x[0], reverse=True)[:3]
92
  return column, top_combinations
93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  # Function to create bar chart for a given category
95
  def create_bar_chart(df, category):
96
  """Create and display a bar chart for a given category."""