Evan Frick
make human pref come first
b2821de
raw
history blame
4.26 kB
import streamlit as st
import pandas as pd
import json
from os.path import split as path_split, splitext as path_splitext
st.set_page_config(
page_title="PPE Metrics Explorer",
layout="wide", # This makes the app use the entire screen width
initial_sidebar_state="expanded",
)
# Set the title of the app
st.title("PPE Metrics Explorer")
@st.cache_data
def load_data(file_path):
"""
Load json data from a file.
"""
with open(file_path, 'r') as file:
data = json.load(file)
return data
def contains_list(column):
return column.apply(lambda x: isinstance(x, list)).any()
def main():
# Load the JSON data
data = load_data('results.json')
# Extract the list of benchmarks
benchmarks = list(sorted(data.keys(), key=lambda s: "A" + s if s == "human_preference_v1" else s))
# Dropdown for selecting benchmark
selected_benchmark = st.selectbox("Select a Benchmark", benchmarks)
# Extract data for the selected benchmark
benchmark_data = data[selected_benchmark]
# Prepare a list to store records
records = []
# Iterate over each model in the selected benchmark
for model, metrics in benchmark_data.items():
model = path_split(path_splitext(model)[0])[-1]
# Flatten the metrics dictionary if there are nested metrics
# For example, in "human_preference_v1", there are subcategories like "overall", "hard_prompt", etc.
# We'll aggregate these or allow the user to select subcategories as needed
if isinstance(metrics, dict):
# Check if metrics contain nested dictionaries
nested_keys = list(metrics.keys())
# If there are nested keys, we can allow the user to select a subcategory
# For simplicity, let's assume we want to display all nested metrics concatenated
flattened_metrics = {}
for subkey, submetrics in metrics.items():
if isinstance(submetrics, dict):
for metric_name, value in submetrics.items():
# Create a compound key
key = f"{subkey} - {metric_name}"
flattened_metrics[key] = value
else:
flattened_metrics[subkey] = submetrics
records.append({
"Model": model,
**flattened_metrics
})
else:
# If metrics are not nested, just add them directly
records.append({
"Model": model,
"Value": metrics
})
# Create a DataFrame
df = pd.DataFrame(records)
# Drop columns that contain lists
df = df.loc[:, ~df.apply(contains_list)]
if "human" not in selected_benchmark:
df = df[sorted(df.columns, key=str.lower)]
# Set 'Model' as the index
df.set_index("Model", inplace=True)
# Create two columns: one for spacing and one for the search bar
col1, col2, col3 = st.columns([1, 3, 1]) # Adjust the ratios as needed
with col1:
# **Column Search Functionality**
# st.markdown("#### Filter Columns")
column_search = st.text_input("", placeholder="Search metrics...", key="search")
# column_search = st.text_input("Search for metrics (column names):", "")
if column_search:
# Filter columns that contain the search term (case-insensitive)
filtered_columns = [col for col in df.columns if column_search.lower() in col.lower()]
if filtered_columns:
df_display = df[filtered_columns]
else:
st.warning("No columns match your search.")
df_display = pd.DataFrame() # Empty DataFrame
else:
# If no search term, display all columns
df_display = df
# Display the DataFrame
st.dataframe(df_display.sort_values(df_display.columns[0], ascending=False) if len(df_display) else df_display, use_container_width=True)
# Optional: Allow user to download the data as CSV
csv = df_display.to_csv()
st.download_button(
label="Download data as CSV",
data=csv,
file_name=f"{selected_benchmark}_metrics.csv",
mime='text/csv',
)
if __name__ == "__main__":
main()