diff --git a/.gitattributes b/.gitattributes index 0863c91bade25d407d50716bc6628d38a63454b4..5eb15f71d2fdf21bd2a2723b48a1d36b9d338cad 100644 --- a/.gitattributes +++ b/.gitattributes @@ -271,3 +271,16 @@ results_qwen/Llama-3-70b-chat-hf.jpg filter=lfs diff=lfs merge=lfs -text results_qwen/gpt-4.csv filter=lfs diff=lfs merge=lfs -text results_qwen/gpt-4.jpg filter=lfs diff=lfs merge=lfs -text results_qwen/Llama-3-70b-chat-hf.pkl filter=lfs diff=lfs merge=lfs -text +all_results.pkl filter=lfs diff=lfs merge=lfs -text +results/Llama-3-70b-chat-hf.png filter=lfs diff=lfs merge=lfs -text +results/dbrx-instruct.png filter=lfs diff=lfs merge=lfs -text +results/gpt-3.5-0613.png filter=lfs diff=lfs merge=lfs -text +results/gpt-4-1106.png filter=lfs diff=lfs merge=lfs -text +results/Llama-3-70b-chat-hf.jpg filter=lfs diff=lfs merge=lfs -text +results/dbrx-instruct.jpg filter=lfs diff=lfs merge=lfs -text +results/gpt-3.5-0613.jpg filter=lfs diff=lfs merge=lfs -text +results/gpt-4-1106.jpg filter=lfs diff=lfs merge=lfs -text +results/gpt-4-1106.pkl filter=lfs diff=lfs merge=lfs -text +results/Llama-3-70b-chat-hf.pkl filter=lfs diff=lfs merge=lfs -text +results/dbrx-instruct.pkl filter=lfs diff=lfs merge=lfs -text +results/gpt-3.5-0613.pkl filter=lfs diff=lfs merge=lfs -text diff --git a/app.py b/app.py index be2c8f73355fc397e158fafa82320cd378aeb4d0..2087fe147847529bafdbd42806eb4481cb20a953 100644 --- a/app.py +++ b/app.py @@ -1,98 +1,49 @@ -import gradio as gr -import pandas as pd -from glob import glob -import matplotlib.pyplot as plt -import seaborn as sns -from matplotlib.colors import ListedColormap, BoundaryNorm -from glob import glob import os +from glob import glob - +import gradio as gr import matplotlib.pyplot as plt -import seaborn as sns -from matplotlib.colors import ListedColormap, BoundaryNorm import pandas as pd +import seaborn as sns +from matplotlib.colors import BoundaryNorm, ListedColormap + +all_results = pd.read_pickle("all_results.pkl") + + +def get_accuracy_dataframe(df): + # Calculate overall model accuracy + df['parsed_judge_response'] = df['parsed_judge_response'].astype(float) + model_accuracy = df.groupby('model_name')['parsed_judge_response'].mean().reset_index() + + # Calculate model accuracy per difficulty level + df['difficulty_level'] = df['difficulty_level'].astype(int) + model_accuracy_per_level = df.groupby(['model_name', 'difficulty_level'])['parsed_judge_response'].mean().reset_index() + model_accuracy_per_level_df = model_accuracy_per_level.pivot(index='model_name', columns='difficulty_level', values='parsed_judge_response') + + # Merge overall accuracy and level-based accuracy into a single DataFrame + model_accuracy_df = model_accuracy.merge(model_accuracy_per_level_df, on='model_name') + model_accuracy_df.rename(columns={1: 'level_1', 2: 'level_2', 3: 'level_3', 4: 'level_4', 5: 'level_5'}, inplace=True) + model_accuracy_df.rename(columns={'parsed_judge_response': 'Accuracy'}, inplace=True) + + # Multiply by 100 and format to one decimal point + model_accuracy_df = model_accuracy_df.applymap(lambda x: round(x * 100, 1) if isinstance(x, float) else x) + + # Add headers with icons + model_accuracy_df.columns = [ + "🤖 Model Name", + "⭐ Overall", + "📈 Level 1", + "🔍 Level 2", + "📘 Level 3", + "🔬 Level 4", + ] - -# Load text benchmark results -noncot_results = glob("results/*.pkl") -noncot_results_qwen = glob("results_qwen/*.pkl") -# Load vision benchmark results -vision_results = glob("results-vision/*.pkl") -# Load CoT text benchmark results -cot_text_results = glob("results-cot/*.pkl") -# Load CoT vision benchmark results -# cot_vision_results = glob("results-vision-CoT/*.pkl") - - -# Function to load data, add model type and name -def load_data(files, model_type): - data = [] - for file in files: - df = pd.read_pickle(file) - df["Model Type"] = model_type - df["Model Name"] = file.split("/")[-1].replace(".pkl", "") - data.append(df) - return pd.concat(data, ignore_index=True) - - -# Load and label all data -data = load_data(noncot_results, "Text Only") -data_qwen = load_data(noncot_results_qwen, "Text Only") -vision_data = load_data(vision_results, "Vision") -cot_text_data = load_data(cot_text_results, "CoT Text Only") -# cot_vision_data = load_data(cot_vision_results, "CoT Vision") - -# Combine all data into a single DataFrame -all_data = pd.concat([data_qwen, vision_data, cot_text_data], ignore_index=True) - -all_model_names = all_data["Model Name"].unique() -all_text_only_model_names = list( - all_data[all_data["Model Type"] == "Text Only"]["Model Name"].unique() -) -all_cot_text_only_models = list( - all_data[all_data["Model Type"] == "CoT Text Only"]["Model Name"].unique() -) - - -text_only_filtered_raw = None -text_only_filtered_raw_cot = None - -## Continue with the cold code -- -# TODO: Update me to read from all_data for later - - -# Load the csv files into a dict with keys being name of the file and values being the data -data = {file: pd.read_pickle(file) for file in noncot_results} -# Load the vision files into a dict -vision_data = {file: pd.read_pickle(file) for file in vision_results} -# Load the CoT text files into a dict -cot_text_data = {file: pd.read_pickle(file) for file in cot_text_results} -# Load the CoT vision files into a dict -# cot_vision_data = {file: pd.read_pickle(file) for file in cot_vision_results} - -data_qwen = {file: pd.read_pickle(file) for file in noncot_results_qwen} - - -intersection_df = pd.read_pickle( - "./intersection_results/gpt-3.5-judge-by_Qwen_5times_intersection_subset_1.pkl" -) -# accuracy for each model -intersection_df_acc = ( - intersection_df.groupby("model_name")["parsed_judge_response"].mean().reset_index() -) -intersection_df_acc["Accuracy"] = intersection_df_acc["parsed_judge_response"] * 100 -intersection_df_acc.drop("parsed_judge_response", axis=1, inplace=True) -intersection_df_acc.sort_values("Accuracy", ascending=False, inplace=True) - - -def calculate_accuracy(df): - return df["parsed_judge_response"].mean() * 100 + model_accuracy_df.sort_values(by="⭐ Overall", ascending=False, inplace=True) + + return model_accuracy_df -def accuracy_breakdown(df): - # 4 level accuracy - return (df.groupby("difficulty_level")["parsed_judge_response"].mean() * 100).values +accuracy_df = get_accuracy_dataframe(all_results) # Define the column names with icons @@ -114,450 +65,126 @@ column_names = [ "Level 4 Accuracy", ] - -# Function to process data -def process_data(data): - data_for_df = [] - for file, df in data.items(): - overall_accuracy = round(calculate_accuracy(df), 2) - breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)] - model_name = file.split("/")[-1].replace(".pkl", "") - data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy) - return data_for_df - - -# Process all data -text_data_for_df = process_data(data) -text_data_for_df_qwen = process_data(data_qwen) - -vision_data_for_df = process_data(vision_data) -cot_text_data_for_df = process_data(cot_text_data) -# cot_vision_data_for_df = process_data(cot_vision_data) - -# Create DataFrames -accuracy_df = pd.DataFrame(text_data_for_df, columns=column_names) -accuracy_df_qwen = pd.DataFrame(text_data_for_df_qwen, columns=column_names) -vision_accuracy_df = pd.DataFrame(vision_data_for_df, columns=column_names) -cot_text_accuracy_df = pd.DataFrame(cot_text_data_for_df, columns=column_names) -# cot_vision_accuracy_df = pd.DataFrame(cot_vision_data_for_df, columns=column_names) - - -# Function to finalize DataFrame -def finalize_df(df): - df = df.round(1) # Round to one decimal place - df = df.applymap(lambda x: f"{x:.1f}" if isinstance(x, (int, float)) else x) - df.columns = headers_with_icons - df.sort_values(by="⭐ Overall", ascending=False, inplace=True) - # add a new column with the order (index) - df["#"] = range(1, len(df) + 1) - # bring rank to the first column - cols = df.columns.tolist() - cols = cols[-1:] + cols[:-1] - df = df[cols] - - return df - - -# Finalize all DataFrames -accuracy_df = finalize_df(accuracy_df) -accuracy_df_qwen = finalize_df(accuracy_df_qwen) -vision_accuracy_df = finalize_df(vision_accuracy_df) -cot_text_accuracy_df = finalize_df(cot_text_accuracy_df) -# cot_vision_accuracy_df = finalize_df(cot_vision_accuracy_df) - - def load_heatmap(evt: gr.SelectData): heatmap_image = gr.Image(f"results/{evt.value}.jpg") return heatmap_image -def load_heatmap_qwen(evt: gr.SelectData): - heatmap_image = gr.Image(f"results_qwen/{evt.value}.jpg") - return heatmap_image +# # Function to process data +# def process_data(data): +# data_for_df = [] +# for file, df in data.items(): +# overall_accuracy = round(calculate_accuracy(df), 2) +# breakdown_accuracy = [round(acc, 2) for acc in accuracy_breakdown(df)] +# model_name = file.split("/")[-1].replace(".pkl", "") +# data_for_df.append([model_name, overall_accuracy] + breakdown_accuracy) +# return data_for_df -def load_vision_heatmap(evt: gr.SelectData): - heatmap_image = gr.Image(f"results-vision/{evt.value}.jpg") - return heatmap_image +# # Function to finalize DataFrame +# def finalize_df(df): +# df = df.round(1) # Round to one decimal place +# df = df.applymap(lambda x: f"{x:.1f}" if isinstance(x, (int, float)) else x) +# df.columns = headers_with_icons +# df.sort_values(by="⭐ Overall", ascending=False, inplace=True) +# # add a new column with the order (index) +# df["#"] = range(1, len(df) + 1) +# # bring rank to the first column +# cols = df.columns.tolist() +# cols = cols[-1:] + cols[:-1] +# df = df[cols] -def load_cot_heatmap(evt: gr.SelectData): - heatmap_image = gr.Image(f"results-cot/{evt.value}.jpg") - return heatmap_image +# return df -def load_cot_vision_heatmap(evt: gr.SelectData): - heatmap_image = gr.Image(f"results-vision-CoT/{evt.value}.jpg") +def load_heatmap(evt: gr.SelectData): + heatmap_image = gr.Image(f"results/{evt.value}.jpg") return heatmap_image -def calculate_order_by_first_substring(selected_models): - global text_only_filtered_raw - first_columns = all_data[all_data["substring_index"] == 1] - query_ids_df = first_columns[first_columns["Model Type"] == "Text Only"] - query_ids_df = query_ids_df[query_ids_df["Model Name"].isin(selected_models)] - - query_ids_df = query_ids_df.groupby("query_id").filter( - lambda x: x["parsed_judge_response"].eq(1).all() - ) - - fsm_ids = query_ids_df.fsm_id.unique() - - text_only = all_data[all_data["Model Type"] == "Text Only"] - text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)] - text_only_filtered_raw = text_only_filtered.copy() - - query_ids = text_only_filtered.query_id.unique() - text_only_filtered = ( - text_only_filtered.groupby(["Model Name"])["parsed_judge_response"] - .mean() - .reset_index() - ) - - text_only_filtered["Accuracy"] = text_only_filtered["parsed_judge_response"] * 100 - text_only_filtered.drop("parsed_judge_response", axis=1, inplace=True) - - text_only_filtered["Accuracy"] = text_only_filtered["Accuracy"].apply( - lambda x: round(x, 2) - ) - text_only_filtered.sort_values("Accuracy", ascending=False, inplace=True) - - number_of_queries = len(query_ids) - number_of_fsms = len(fsm_ids) - - return text_only_filtered, number_of_queries, number_of_fsms - - -def calculate_order_by_first_substring_cot(selected_models): - global text_only_filtered_raw_cot - first_columns = all_data[all_data["substring_index"] == 1] - query_ids_df = first_columns[first_columns["Model Type"] == "CoT Text Only"] - query_ids_df = query_ids_df[query_ids_df["Model Name"].isin(selected_models)] - - query_ids_df = query_ids_df.groupby("query_id").filter( - lambda x: x["parsed_judge_response"].eq(1).all() - ) - - fsm_ids = query_ids_df.fsm_id.unique() - - text_only = all_data[all_data["Model Type"] == "CoT Text Only"] - text_only_filtered = text_only[text_only["fsm_id"].isin(fsm_ids)] - text_only_filtered_raw_cot = text_only_filtered.copy() - - query_ids = text_only_filtered.query_id.unique() - text_only_filtered = ( - text_only_filtered.groupby(["Model Name"])["parsed_judge_response"] - .mean() - .reset_index() - ) - - text_only_filtered["Accuracy"] = text_only_filtered["parsed_judge_response"] * 100 - text_only_filtered.drop("parsed_judge_response", axis=1, inplace=True) - - text_only_filtered["Accuracy"] = text_only_filtered["Accuracy"].apply( - lambda x: round(x, 2) - ) - text_only_filtered.sort_values("Accuracy", ascending=False, inplace=True) - - number_of_queries = len(query_ids) - number_of_fsms = len(fsm_ids) - - return text_only_filtered, number_of_queries, number_of_fsms - - -def generate_heatmap_for_specific_model(model_name): - global text_only_filtered_raw - - cmap = ListedColormap(["lightblue", "red", "green"]) - bounds = [-1.5, -0.5, 0.5, 1.5] - norm = BoundaryNorm(bounds, cmap.N) - - model_df = text_only_filtered_raw[ - text_only_filtered_raw["Model Name"] == model_name - ] - model_df["fsm_info"] = model_df.apply( - lambda x: f"{x['num_states']} states, {x['num_alphabet']} alphabet", axis=1 - ) - model_df = model_df.sort_values(by=["num_states", "num_alphabet"]) - - pivot_df = ( - model_df.pivot_table( - index="fsm_info", - columns="substring_index", - values="parsed_judge_response", - aggfunc="first", - ) - .fillna(-1) - .astype(float) - ) - - # Dynamically adjust figure size - num_rows, num_cols = pivot_df.shape - fig_width = max(12, num_cols * 0.5) # Adjust width per column - fig_height = max(8, num_rows * 0.4) # Adjust height per row - - fig, ax = plt.subplots(figsize=(fig_width, fig_height)) - sns.heatmap( - pivot_df, - cmap=cmap, - linewidths=1, - linecolor="black", - norm=norm, - cbar=False, - square=True, - ax=ax, - ) - plt.title(f"Heatmap for Model: {model_name}", fontsize=12) - plt.xlabel("Substring Index") - plt.ylabel("FSM (States, Alphabet)") - plt.xticks(rotation=45) - - sns.despine(ax=ax, top=True, right=True, left=True, bottom=True) - - return fig - - -def generate_heatmap_for_specific_model_cot(model_name): - global text_only_filtered_raw_cot - - cmap = ListedColormap(["lightblue", "red", "green"]) - bounds = [-1.5, -0.5, 0.5, 1.5] - norm = BoundaryNorm(bounds, cmap.N) - - model_df = text_only_filtered_raw_cot[ - text_only_filtered_raw_cot["Model Name"] == model_name - ] - model_df["fsm_info"] = model_df.apply( - lambda x: f"{x['num_states']} states, {x['num_alphabet']} alphabet", axis=1 - ) - model_df = model_df.sort_values(by=["num_states", "num_alphabet"]) - - pivot_df = ( - model_df.pivot_table( - index="fsm_info", - columns="substring_index", - values="parsed_judge_response", - aggfunc="first", - ) - .fillna(-1) - .astype(float) - ) - - # Dynamically adjust figure size - num_rows, num_cols = pivot_df.shape - fig_width = max(12, num_cols * 0.5) # Adjust width per column - fig_height = max(8, num_rows * 0.4) # Adjust height per row - - fig, ax = plt.subplots(figsize=(fig_width, fig_height)) - sns.heatmap( - pivot_df, - cmap=cmap, - linewidths=1, - linecolor="black", - norm=norm, - cbar=False, - square=True, - ax=ax, - ) - plt.title(f"Heatmap for Model: {model_name}", fontsize=12) - plt.xlabel("Substring Index") - plt.ylabel("FSM (States, Alphabet)") - plt.xticks(rotation=45) - - sns.despine(ax=ax, top=True, right=True, left=True, bottom=True) - - return fig - - -def generate_heatmap_for_intersection_model(model_name): - global intersection_df - - cmap = ListedColormap(["lightblue", "red", "green"]) - bounds = [-1.5, -0.5, 0.5, 1.5] - norm = BoundaryNorm(bounds, cmap.N) - - # Filter for a specific model - model_df = intersection_df[intersection_df["model_name"] == model_name].copy() - - if model_df.empty: - print(f"No data found for model {model_name}. Skipping heatmap generation.") - return None - - model_df["fsm_info"] = model_df.apply( - lambda x: f"{x['num_states']} states, {x['num_alphabet']} alphabet", axis=1 - ) - model_df = model_df.sort_values(by=["num_states", "num_alphabet"]) - - pivot_df = ( - model_df.pivot_table( - index="fsm_info", - columns="substring_index", - values="parsed_judge_response", - aggfunc="first", - ) - .fillna(-1) - .astype(float) - ) - - # Dynamically adjust figure size - num_rows, num_cols = pivot_df.shape - fig_width = max(12, num_cols * 0.5) - fig_height = max(8, num_rows * 0.4) - - fig, ax = plt.subplots(figsize=(fig_width, fig_height)) - sns.heatmap( - pivot_df, - cmap=cmap, - linewidths=1, - linecolor="black", - norm=norm, - cbar=False, - square=True, - ax=ax, - ) - plt.title(f"Heatmap for Model: {model_name}", fontsize=12) - plt.xlabel("Substring Index") - plt.ylabel("FSM (States, Alphabet)") - plt.xticks(rotation=45) - - sns.despine(ax=ax, top=True, right=True, left=True, bottom=True) - - plt.close(fig) - return fig - - -def show_constraint_heatmap(evt: gr.SelectData): - model_name = evt.value - return generate_heatmap_for_specific_model(model_name) - - -def show_constraint_heatmap_cot(evt: gr.SelectData): - model_name = evt.value - return generate_heatmap_for_specific_model_cot(model_name) - - -def show_intersection_heatmap(evt: gr.SelectData): - model_name = evt.value - return generate_heatmap_for_intersection_model(model_name) - - with gr.Blocks() as demo: gr.Markdown("# FSM Benchmark Leaderboard") with gr.Tab("Text-only Benchmark"): - gr.Markdown("# Text-only Leaderboard (Judged by Qwen)") - leader_board = gr.Dataframe(accuracy_df_qwen, headers=headers_with_icons) + leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons) gr.Markdown("## Heatmap") heatmap_image_qwen = gr.Image(label="", show_label=False) - leader_board.select(fn=load_heatmap_qwen, outputs=[heatmap_image_qwen]) + leader_board.select(fn=load_heatmap, outputs=[heatmap_image_qwen]) - with gr.Tab("Vision Benchmark", visible=False): - gr.Markdown("# Vision Benchmark Leaderboard") - leader_board_vision = gr.Dataframe( - vision_accuracy_df, headers=headers_with_icons - ) - gr.Markdown("## Heatmap") - heatmap_image_vision = gr.Image(label="", show_label=False) - leader_board_vision.select( - fn=load_vision_heatmap, outputs=[heatmap_image_vision] - ) - - with gr.Tab("Text-only Benchmark (CoT)", visible=False): - gr.Markdown("# Text-only Leaderboard (CoT)") - cot_leader_board_text = gr.Dataframe( - cot_text_accuracy_df, headers=headers_with_icons - ) - gr.Markdown("## Heatmap") - cot_heatmap_image_text = gr.Image(label="", show_label=False) - cot_leader_board_text.select( - fn=load_cot_heatmap, outputs=[cot_heatmap_image_text] - ) - - # with gr.Tab("Vision Benchmark (CoT)"): - # gr.Markdown("# Vision Benchmark Leaderboard (CoT)") - # cot_leader_board_vision = gr.Dataframe( - # cot_vision_accuracy_df, headers=headers_with_icons + # with gr.Tab("Vision Benchmark", visible=False): + # gr.Markdown("# Vision Benchmark Leaderboard") + # leader_board_vision = gr.Dataframe( + # vision_accuracy_df, headers=headers_with_icons # ) # gr.Markdown("## Heatmap") - # cot_heatmap_image_vision = gr.Image(label="", show_label=False) - # cot_leader_board_vision.select( - # fn=load_cot_vision_heatmap, outputs=[cot_heatmap_image_vision] + # heatmap_image_vision = gr.Image(label="", show_label=False) + # leader_board_vision.select( + # fn=load_vision_heatmap, outputs=[heatmap_image_vision] # ) - with gr.Tab("Constraint Text-only Results"): - gr.Markdown("## Constraint Text-only Leaderboard by first substring") - included_models = gr.CheckboxGroup( - label="Models to include", - choices=all_text_only_model_names, - value=all_text_only_model_names, - interactive=True, - ) - with gr.Row(): - number_of_queries = gr.Textbox(label="Number of included queries") - - number_of_fsms = gr.Textbox(label="Number of included FSMs") - - constrained_leader_board_text = gr.Dataframe() - constrained_leader_board_plot = gr.Plot() - - included_models.select( - fn=calculate_order_by_first_substring, - inputs=[included_models], - outputs=[constrained_leader_board_text, number_of_queries, number_of_fsms], - queue=True, - ) - - with gr.Tab("Constraint Text-only Results (CoT)", visible=False): - gr.Markdown("## Constraint Text-only Leaderboard by first substrin (CoT)") - included_models_cot = gr.CheckboxGroup( - label="Models to include", - choices=all_cot_text_only_models, - value=all_cot_text_only_models, - interactive=True, - ) - with gr.Row(): - number_of_queries_cot = gr.Textbox(label="Number of included queries") - number_of_fsms_cot = gr.Textbox(label="Number of included FSMs") - - constrained_leader_board_text_cot = gr.Dataframe() - constrained_leader_board_plot_cot = gr.Plot() - - with gr.Tab("Majority Vote (Subset 1)", visible=False): - gr.Markdown("## Majority Vote (Subset 1)") - intersection_leader_board = gr.Dataframe( - intersection_df_acc, headers=headers_with_icons - ) - heatmap_image = gr.Plot(label="Model Heatmap") - - with gr.Tab("Text-only Benchmark (deprecated)", visible=False): - gr.Markdown("# Text-only Leaderboard") - leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons) - gr.Markdown("## Heatmap") - heatmap_image = gr.Image(label="", show_label=False) - leader_board.select(fn=load_heatmap, outputs=[heatmap_image]) - - # ============ Callbacks ============ - - included_models_cot.select( - fn=calculate_order_by_first_substring_cot, - inputs=[included_models_cot], - outputs=[ - constrained_leader_board_text_cot, - number_of_queries_cot, - number_of_fsms_cot, - ], - queue=True, - ) - - constrained_leader_board_text.select( - fn=show_constraint_heatmap, outputs=[constrained_leader_board_plot] - ) - - constrained_leader_board_text_cot.select( - fn=show_constraint_heatmap_cot, outputs=[constrained_leader_board_plot_cot] - ) - - intersection_leader_board.select( - fn=show_intersection_heatmap, outputs=[heatmap_image] - ) + # with gr.Tab("Text-only Benchmark (CoT)", visible=False): + # gr.Markdown("# Text-only Leaderboard (CoT)") + # cot_leader_board_text = gr.Dataframe( + # cot_text_accuracy_df, headers=headers_with_icons + # ) + # gr.Markdown("## Heatmap") + # cot_heatmap_image_text = gr.Image(label="", show_label=False) + # cot_leader_board_text.select( + # fn=load_cot_heatmap, outputs=[cot_heatmap_image_text] + # ) + + # with gr.Tab("Constraint Text-only Results (CoT)", visible=False): + # gr.Markdown("## Constraint Text-only Leaderboard by first substrin (CoT)") + # included_models_cot = gr.CheckboxGroup( + # label="Models to include", + # choices=all_cot_text_only_models, + # value=all_cot_text_only_models, + # interactive=True, + # ) + # with gr.Row(): + # number_of_queries_cot = gr.Textbox(label="Number of included queries") + # number_of_fsms_cot = gr.Textbox(label="Number of included FSMs") + + # constrained_leader_board_text_cot = gr.Dataframe() + # constrained_leader_board_plot_cot = gr.Plot() + + # with gr.Tab("Majority Vote (Subset 1)", visible=False): + # gr.Markdown("## Majority Vote (Subset 1)") + # intersection_leader_board = gr.Dataframe( + # intersection_df_acc, headers=headers_with_icons + # ) + # heatmap_image = gr.Plot(label="Model Heatmap") + + # with gr.Tab("Text-only Benchmark (deprecated)", visible=False): + # gr.Markdown("# Text-only Leaderboard") + # leader_board = gr.Dataframe(accuracy_df, headers=headers_with_icons) + # gr.Markdown("## Heatmap") + # heatmap_image = gr.Image(label="", show_label=False) + # leader_board.select(fn=load_heatmap, outputs=[heatmap_image]) + + # # ============ Callbacks ============ + + # included_models_cot.select( + # fn=calculate_order_by_first_substring_cot, + # inputs=[included_models_cot], + # outputs=[ + # constrained_leader_board_text_cot, + # number_of_queries_cot, + # number_of_fsms_cot, + # ], + # queue=True, + # ) + + # constrained_leader_board_text.select( + # fn=show_constraint_heatmap, outputs=[constrained_leader_board_plot] + # ) + + # constrained_leader_board_text_cot.select( + # fn=show_constraint_heatmap_cot, outputs=[constrained_leader_board_plot_cot] + # ) + + # intersection_leader_board.select( + # fn=show_intersection_heatmap, outputs=[heatmap_image] + # ) demo.launch() diff --git a/intersection_results/gpt-3.5-judge-by_Qwen_5times_intersection_subset_1.pkl b/intersection_results/gpt-3.5-judge-by_Qwen_5times_intersection_subset_1.pkl deleted file mode 100644 index c10a8e2a4534e7c4ac2039b2244adfc71854c85f..0000000000000000000000000000000000000000 --- a/intersection_results/gpt-3.5-judge-by_Qwen_5times_intersection_subset_1.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f1cc52129234d9667a4cc388bd1da3a2021f1bbb7ea556e20ee6d5e159b2b1a8 -size 1482609 diff --git a/results-cot/CodeLlama-70b-Instruct-hf.csv b/results-cot/CodeLlama-70b-Instruct-hf.csv deleted file mode 100644 index 313c4251293a45cfd0227016ad4e9314f587d891..0000000000000000000000000000000000000000 --- a/results-cot/CodeLlama-70b-Instruct-hf.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:135ac55c9ad0f9d88d5054906a43ab4990fee1027f7381fe21389069a753dc75 -size 20713132 diff --git a/results-cot/CodeLlama-70b-Instruct-hf.jpg b/results-cot/CodeLlama-70b-Instruct-hf.jpg deleted file mode 100644 index ebf8fa97b1a84989d9b63d4535296d94e345ed71..0000000000000000000000000000000000000000 --- a/results-cot/CodeLlama-70b-Instruct-hf.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d05235bca80da5d54da4a0845b041d5daaf7235a2da7fa3937d26b0b74ff1f28 -size 1317213 diff --git a/results-cot/CodeLlama-70b-Instruct-hf.pkl b/results-cot/CodeLlama-70b-Instruct-hf.pkl deleted file mode 100644 index 8fe93f6164a00c5c3f82a78c59a4c0b2000d439b..0000000000000000000000000000000000000000 --- a/results-cot/CodeLlama-70b-Instruct-hf.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a8d72952877e3e4023251396d037ebae8145e3e82e2d4a328ce132171ea70267 -size 20756425 diff --git a/results-cot/CodeLlama-70b-Instruct-hf.png b/results-cot/CodeLlama-70b-Instruct-hf.png deleted file mode 100644 index 137851e148bc7b389fbc8fc3f22b1f4939ab2fc9..0000000000000000000000000000000000000000 --- a/results-cot/CodeLlama-70b-Instruct-hf.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:66661c2f3cb4168589300115c6f2adf7ab6ef2fc5580d084d22d18217b2b6296 -size 1015775 diff --git a/results-cot/Mixtral-8x7B-Instruct-v0.1.csv b/results-cot/Mixtral-8x7B-Instruct-v0.1.csv deleted file mode 100644 index be977117c18c4111c69538376dc0cef355ae12e3..0000000000000000000000000000000000000000 --- a/results-cot/Mixtral-8x7B-Instruct-v0.1.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:093e919d90609c3be8d6818cf56ca018214da3a42b78aeaf85f92581b72c5ad4 -size 19494123 diff --git a/results-cot/Mixtral-8x7B-Instruct-v0.1.jpg b/results-cot/Mixtral-8x7B-Instruct-v0.1.jpg deleted file mode 100644 index e7272ca29856b38bc954b055f34688b373fac25a..0000000000000000000000000000000000000000 --- a/results-cot/Mixtral-8x7B-Instruct-v0.1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8c747a78a4b70330c97682209acda5e13d61d0ae3b9a372d4d01269163b7842f -size 1325857 diff --git a/results-cot/Mixtral-8x7B-Instruct-v0.1.pkl b/results-cot/Mixtral-8x7B-Instruct-v0.1.pkl deleted file mode 100644 index 7b06a4c6eab4e1a7c5a577de535213618125121a..0000000000000000000000000000000000000000 --- a/results-cot/Mixtral-8x7B-Instruct-v0.1.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:686692584c6ba027c454d699bbf585b95e5c99bfc426810ea74b327a975b9cf3 -size 19489822 diff --git a/results-cot/Mixtral-8x7B-Instruct-v0.1.png b/results-cot/Mixtral-8x7B-Instruct-v0.1.png deleted file mode 100644 index 98e12193ed03f9a2a0681599f7aeeb5f30acd8fb..0000000000000000000000000000000000000000 --- a/results-cot/Mixtral-8x7B-Instruct-v0.1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:01fafa25ac093e91e57f234b61c449e12a2f6610208d80ca7b1405b8831d0784 -size 1015852 diff --git a/results-cot/Qwen1.5-72B-Chat.csv b/results-cot/Qwen1.5-72B-Chat.csv deleted file mode 100644 index 622753313053bafa231036e9de8d5802aad52b34..0000000000000000000000000000000000000000 --- a/results-cot/Qwen1.5-72B-Chat.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:32681449776facf1084405001e69ed7926b79c69f9717fb159e3eb064b333636 -size 15795431 diff --git a/results-cot/Qwen1.5-72B-Chat.jpg b/results-cot/Qwen1.5-72B-Chat.jpg deleted file mode 100644 index 6641c86256dda3df6db405e9e290363be15cb623..0000000000000000000000000000000000000000 --- a/results-cot/Qwen1.5-72B-Chat.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:882652b7fd7ca1c03fbcd5c031f024933405b92b978514042feabe775c6e8789 -size 1314105 diff --git a/results-cot/Qwen1.5-72B-Chat.pkl b/results-cot/Qwen1.5-72B-Chat.pkl deleted file mode 100644 index 8ae84826b39afcd6390b4ebcf795ab0c1673e406..0000000000000000000000000000000000000000 --- a/results-cot/Qwen1.5-72B-Chat.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1c20383298d4b6482ca7c30bf91822e24099dc67b71a3be10271005e25208c40 -size 15778970 diff --git a/results-cot/Qwen1.5-72B-Chat.png b/results-cot/Qwen1.5-72B-Chat.png deleted file mode 100644 index c7f585620ae35171d2df8f9417b4a7eed58deb3d..0000000000000000000000000000000000000000 --- a/results-cot/Qwen1.5-72B-Chat.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9e6b7014c29a2184e88d63f3f2b9c2373531174b24e59b76442cc90d9d4b93a7 -size 1014011 diff --git a/results-cot/claude-3-sonnet-20240229.csv b/results-cot/claude-3-sonnet-20240229.csv deleted file mode 100644 index 612df78c35f533ede0a13c3852d0bbf3c2395494..0000000000000000000000000000000000000000 --- a/results-cot/claude-3-sonnet-20240229.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:77c47e8698b8aa86de42bda82cb27c1efc0174c18b3ba306ff263cbd260de20e -size 13144187 diff --git a/results-cot/claude-3-sonnet-20240229.jpg b/results-cot/claude-3-sonnet-20240229.jpg deleted file mode 100644 index 7f8da53a1c308e57d6400785fbe51bd40f0b92d7..0000000000000000000000000000000000000000 --- a/results-cot/claude-3-sonnet-20240229.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f24cb016fbc657a82dbeb33a256608f26512beab44e613179e50d01f90cb9420 -size 1334468 diff --git a/results-cot/claude-3-sonnet-20240229.pkl b/results-cot/claude-3-sonnet-20240229.pkl deleted file mode 100644 index f40b5f12f00a3aa4961b015eea2a8e739390fc54..0000000000000000000000000000000000000000 --- a/results-cot/claude-3-sonnet-20240229.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7e5a528d8fa3491adbc4e2e5a982e65a8a9c2fb5a96d321c864935dbe506453e -size 13047413 diff --git a/results-cot/claude-3-sonnet-20240229.png b/results-cot/claude-3-sonnet-20240229.png deleted file mode 100644 index 4d40c7b30e53473b41125362d9b87aa4acfb9521..0000000000000000000000000000000000000000 --- a/results-cot/claude-3-sonnet-20240229.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d5ede53fb575b73f93df09a94f8b66e2bafe6276a81d02b2d51944d956717a46 -size 1012281 diff --git a/results-cot/dbrx-instruct.csv b/results-cot/dbrx-instruct.csv deleted file mode 100644 index a42c8bf253eb24f6ac82bbfd91f7739572a6c979..0000000000000000000000000000000000000000 --- a/results-cot/dbrx-instruct.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:698626edcabf06c89b9b8fac2f929927e9b7351306f525cb87fc6e06ce1bc3e3 -size 19267224 diff --git a/results-cot/dbrx-instruct.jpg b/results-cot/dbrx-instruct.jpg deleted file mode 100644 index e94c2a086c0c1663df0f3460e2bef9c2ce54b570..0000000000000000000000000000000000000000 --- a/results-cot/dbrx-instruct.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e661b322a1b63403384fb607f1d2ae33281f81b97649db8ce52e48568324053b -size 1328621 diff --git a/results-cot/dbrx-instruct.pkl b/results-cot/dbrx-instruct.pkl deleted file mode 100644 index 9838390e0f30a6a0b98784d1a9c2b9ae9d9e7d35..0000000000000000000000000000000000000000 --- a/results-cot/dbrx-instruct.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8fa9067a57abeace401272335d2ce589be63a3a13e79b712505e8e3cc8e68d16 -size 19284287 diff --git a/results-cot/dbrx-instruct.png b/results-cot/dbrx-instruct.png deleted file mode 100644 index 23b3ccc713b42cf7fe05fe4894b6cdb1c6f0cd91..0000000000000000000000000000000000000000 --- a/results-cot/dbrx-instruct.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a77e5ca2cd619c238058f1f257542beeccb9425c42ea3e543a089e1695bede28 -size 1015065 diff --git a/results-cot/deepseek-llm-67b-chat.csv b/results-cot/deepseek-llm-67b-chat.csv deleted file mode 100644 index 5cc4db3de4bc73aa48eaf9f49671b4fb82048d88..0000000000000000000000000000000000000000 --- a/results-cot/deepseek-llm-67b-chat.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:191a06465559524615f8cca0c46ca2af417a289e9fbab2109e2d2a3c92432fe2 -size 16692090 diff --git a/results-cot/deepseek-llm-67b-chat.jpg b/results-cot/deepseek-llm-67b-chat.jpg deleted file mode 100644 index 1eb4d3834eacefe6c3faba9ddef66cf4070317b1..0000000000000000000000000000000000000000 --- a/results-cot/deepseek-llm-67b-chat.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fbb526b038f9477ff2ffed0267d9242040c3367849444968505af3321be1469b -size 1321996 diff --git a/results-cot/deepseek-llm-67b-chat.pkl b/results-cot/deepseek-llm-67b-chat.pkl deleted file mode 100644 index b0fca8c78dc3a29dc364dbf38122fa17fafb4306..0000000000000000000000000000000000000000 --- a/results-cot/deepseek-llm-67b-chat.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:005500788ecc004bb7a86054f8921e6331addd52c62d8e611a9d82b649ed4925 -size 16712146 diff --git a/results-cot/deepseek-llm-67b-chat.png b/results-cot/deepseek-llm-67b-chat.png deleted file mode 100644 index 0bbd8ad79d36f2dd479e2f8e42d6d7870305e4a6..0000000000000000000000000000000000000000 --- a/results-cot/deepseek-llm-67b-chat.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:60e621da12e87a0d62640465b1f3104e0e44429e2b3651474f928f074b76bd2d -size 1015933 diff --git a/results-cot/gemini-pro.csv b/results-cot/gemini-pro.csv deleted file mode 100644 index 98aaae6e00b98b052859247fc777ccf11aae3711..0000000000000000000000000000000000000000 --- a/results-cot/gemini-pro.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd52b8cfe861dd9fc106dafbe11569f9f2bf5848482e9b42c0ad3e88ffc83035 -size 14773471 diff --git a/results-cot/gemini-pro.jpg b/results-cot/gemini-pro.jpg deleted file mode 100644 index bd7e881e640f7e3bb5e1331c26e310605d13a02d..0000000000000000000000000000000000000000 --- a/results-cot/gemini-pro.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d52c5a061dd3107ff0c970821247db3289e297a08626ec1062130cb3e6bd9c6c -size 1325189 diff --git a/results-cot/gemini-pro.pkl b/results-cot/gemini-pro.pkl deleted file mode 100644 index e0b905a48fede5ed99940262f8eaba8b94de7046..0000000000000000000000000000000000000000 --- a/results-cot/gemini-pro.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:72caa2e3c4ef872b3d6065f55179aa85d3eaa7978f9abb6fc1e3f58db19d8a69 -size 14759970 diff --git a/results-cot/gemini-pro.png b/results-cot/gemini-pro.png deleted file mode 100644 index 7b5b7158a6669ed89b4ee52945fcc5ebc6b1ae00..0000000000000000000000000000000000000000 --- a/results-cot/gemini-pro.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ab3de80d31f3903d88328e24731c44cd7e938e81224f9e8e5d3349a5664264d1 -size 1015405 diff --git a/results-cot/gemma-7b-it.csv b/results-cot/gemma-7b-it.csv deleted file mode 100644 index 0d06a59ba90adf63b6d6c0c0383d30cff9f04455..0000000000000000000000000000000000000000 --- a/results-cot/gemma-7b-it.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f8535fa3f2ef5a94b1b552859930e0476ca0f3c77ec4c277893a9ab9ef45d6c3 -size 16793758 diff --git a/results-cot/gemma-7b-it.jpg b/results-cot/gemma-7b-it.jpg deleted file mode 100644 index d6e3586342946c5ba9ec74c251a68756505020d9..0000000000000000000000000000000000000000 --- a/results-cot/gemma-7b-it.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:28be12e5ad08179e972700c578cc8089b946407e17effa2e25fb2d5129894918 -size 1339444 diff --git a/results-cot/gemma-7b-it.pkl b/results-cot/gemma-7b-it.pkl deleted file mode 100644 index 804dbd36e2521da4f0f239bb16ca5c3cd8422f38..0000000000000000000000000000000000000000 --- a/results-cot/gemma-7b-it.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3c581027f8b78df5934117276cec3e53613f5ac953d045f71af4121b3ec2e1a4 -size 16822239 diff --git a/results-cot/gemma-7b-it.png b/results-cot/gemma-7b-it.png deleted file mode 100644 index 0441c1b599e5ed7ab28a0b07028edbc0f5e8ac60..0000000000000000000000000000000000000000 --- a/results-cot/gemma-7b-it.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5d10e044726def8fdebc8bd89b6cda148c315fd8d808dd7f168d4c5dbf92c2f2 -size 1010299 diff --git a/results-cot/gpt-3.5-turbo-0125.csv b/results-cot/gpt-3.5-turbo-0125.csv deleted file mode 100644 index cc4b185afbfdc779e6faf8723808ba01ec551952..0000000000000000000000000000000000000000 --- a/results-cot/gpt-3.5-turbo-0125.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e9ea8fd5fd1e335397c14e3b312f60111614de4c20e50e5f3bc9d4fb6f049d70 -size 14471960 diff --git a/results-cot/gpt-3.5-turbo-0125.jpg b/results-cot/gpt-3.5-turbo-0125.jpg deleted file mode 100644 index 46cf387611398da103c2a7404890aacbf46703a3..0000000000000000000000000000000000000000 --- a/results-cot/gpt-3.5-turbo-0125.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ecdcdb3508a90af17ff384cb0a0e1065e33ffad5a69b813a51bfb0bf8287dc92 -size 1321591 diff --git a/results-cot/gpt-3.5-turbo-0125.pkl b/results-cot/gpt-3.5-turbo-0125.pkl deleted file mode 100644 index 989d51802e47a49dfe2527cb6248a549c168eeb4..0000000000000000000000000000000000000000 --- a/results-cot/gpt-3.5-turbo-0125.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:702c5dd6ffb22938a59815c65759ca5a201369b59633ee6c80c7030151e53634 -size 14487477 diff --git a/results-cot/gpt-3.5-turbo-0125.png b/results-cot/gpt-3.5-turbo-0125.png deleted file mode 100644 index e865f2e5c4e6dbf4ac4fed7ac12a11dd6bea6b16..0000000000000000000000000000000000000000 --- a/results-cot/gpt-3.5-turbo-0125.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6ef6e2275200de4bb9853889b68935806cc3f74716007d812808c49c1c19d46f -size 1016408 diff --git a/results-cot/gpt-4-turbo-2024-04-09.csv b/results-cot/gpt-4-turbo-2024-04-09.csv deleted file mode 100644 index cce419b48805df17a368097b4bbf42e24b3bd7c7..0000000000000000000000000000000000000000 --- a/results-cot/gpt-4-turbo-2024-04-09.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:011eeea335d67be1a859d51ca2accfd6bc56a2222394db52e68c9f444fec1c9b -size 20522713 diff --git a/results-cot/gpt-4-turbo-2024-04-09.jpg b/results-cot/gpt-4-turbo-2024-04-09.jpg deleted file mode 100644 index 2fa3991811978cf1a28a3375793ca806590c0ddd..0000000000000000000000000000000000000000 --- a/results-cot/gpt-4-turbo-2024-04-09.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e10f603e693e6142bdac19f094b4b40782edc6da98ddd261ed402cae67dbec72 -size 1224609 diff --git a/results-cot/gpt-4-turbo-2024-04-09.pkl b/results-cot/gpt-4-turbo-2024-04-09.pkl deleted file mode 100644 index 9444b60c319115bb45a86281fc823f6a8d2b985d..0000000000000000000000000000000000000000 --- a/results-cot/gpt-4-turbo-2024-04-09.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e1383c823ccc8e09830dd4b96742c19fdf7515b56a715edd6ce131ff07fa029c -size 20513069 diff --git a/results-cot/gpt-4-turbo-2024-04-09.png b/results-cot/gpt-4-turbo-2024-04-09.png deleted file mode 100644 index 831e158fed78fa59dc2389aa4ea99b26ae18566f..0000000000000000000000000000000000000000 --- a/results-cot/gpt-4-turbo-2024-04-09.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c8cf146ef1e4456578ccc5c5fcd077e75aa0724146dbaed512cec70c5f7b8f31 -size 1007912 diff --git a/results-vision/claude-3-opus-20240229.csv b/results-vision/claude-3-opus-20240229.csv deleted file mode 100644 index 353c5435d519ffa06ceda83e4039a09c33182f49..0000000000000000000000000000000000000000 --- a/results-vision/claude-3-opus-20240229.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:74ed9f652dc23c7408b307cb477e91692b561739bf23b33ba93319d9d073f294 -size 10855333 diff --git a/results-vision/claude-3-opus-20240229.jpg b/results-vision/claude-3-opus-20240229.jpg deleted file mode 100644 index de065673640e3aab2366a409a60b7c4b19135567..0000000000000000000000000000000000000000 --- a/results-vision/claude-3-opus-20240229.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9938c9e622c49c715bbf459478f40f6c270dabf08a74a928ee9dfc66d26e34e4 -size 1336877 diff --git a/results-vision/claude-3-opus-20240229.pkl b/results-vision/claude-3-opus-20240229.pkl deleted file mode 100644 index 83fb1478871c253da05fa0e029023b342c366e6b..0000000000000000000000000000000000000000 --- a/results-vision/claude-3-opus-20240229.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:db495f6b9f264c3d9978c7d479b8206fbf8e67843c302c42416e0c6ee1aaa0a4 -size 10789555 diff --git a/results-vision/claude-3-opus-20240229.png b/results-vision/claude-3-opus-20240229.png deleted file mode 100644 index 6bf2a13e2b4fc550a8bd530efc12e2aa20fdc06f..0000000000000000000000000000000000000000 --- a/results-vision/claude-3-opus-20240229.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a8c1627064dfd947f682a877cf1bdd158b5a507a45690818941dd98a55b1090 -size 1011262 diff --git a/results-vision/claude-3-opus-vision.jpg b/results-vision/claude-3-opus-vision.jpg deleted file mode 100644 index de065673640e3aab2366a409a60b7c4b19135567..0000000000000000000000000000000000000000 --- a/results-vision/claude-3-opus-vision.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9938c9e622c49c715bbf459478f40f6c270dabf08a74a928ee9dfc66d26e34e4 -size 1336877 diff --git a/results-vision/claude-3-opus-vision.pkl b/results-vision/claude-3-opus-vision.pkl deleted file mode 100644 index 83fb1478871c253da05fa0e029023b342c366e6b..0000000000000000000000000000000000000000 --- a/results-vision/claude-3-opus-vision.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:db495f6b9f264c3d9978c7d479b8206fbf8e67843c302c42416e0c6ee1aaa0a4 -size 10789555 diff --git a/results-vision/claude-3-opus-vision.png b/results-vision/claude-3-opus-vision.png deleted file mode 100644 index 6bf2a13e2b4fc550a8bd530efc12e2aa20fdc06f..0000000000000000000000000000000000000000 --- a/results-vision/claude-3-opus-vision.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3a8c1627064dfd947f682a877cf1bdd158b5a507a45690818941dd98a55b1090 -size 1011262 diff --git a/results-vision/gemini-pro-vision.csv b/results-vision/gemini-pro-vision.csv deleted file mode 100644 index 4b57fea14ec05914d24bbd0d63c31c29b1786890..0000000000000000000000000000000000000000 --- a/results-vision/gemini-pro-vision.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a33999636396dc5a3dfc858de2b0a421a6fa48d78d51ad74838f2d6afc261999 -size 6158469 diff --git a/results-vision/gemini-pro-vision.jpg b/results-vision/gemini-pro-vision.jpg deleted file mode 100644 index 67c9443145f1514b31bc414f8c339a50de1b75a5..0000000000000000000000000000000000000000 --- a/results-vision/gemini-pro-vision.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6e23d166686b405078a2e3b1e9fbb7db2e8c0afe24c8d44a620ff14a784cc329 -size 1331663 diff --git a/results-vision/gemini-pro-vision.pkl b/results-vision/gemini-pro-vision.pkl deleted file mode 100644 index bf039d36e1087d1ef110bfc38bbea3dedd6bcf63..0000000000000000000000000000000000000000 --- a/results-vision/gemini-pro-vision.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6dec98de51c833c57d8a322c9084391b0aeeffd67a6ee0edbed2f23061aeb1e0 -size 6118424 diff --git a/results-vision/gemini-pro-vision.png b/results-vision/gemini-pro-vision.png deleted file mode 100644 index d510d0150fc46903af6d5122fd7f4e112074abed..0000000000000000000000000000000000000000 --- a/results-vision/gemini-pro-vision.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11e6cdebfbd102153baf74fe60d20b2af9d26c280df4863f6173c759b74d6306 -size 1014550 diff --git a/results-vision/gpt-4v.jpg b/results-vision/gpt-4v.jpg deleted file mode 100644 index a626bc7f03a4509e16234ffbe4ba2bafb5e9b791..0000000000000000000000000000000000000000 --- a/results-vision/gpt-4v.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aed42e95b34a548d133fbb0b557b27a1633f66620feb20e971816571591f2659 -size 1329586 diff --git a/results-vision/gpt-4v.pkl b/results-vision/gpt-4v.pkl deleted file mode 100644 index 3646ddf333c644e5f6371784f11663d688295674..0000000000000000000000000000000000000000 --- a/results-vision/gpt-4v.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e98609b7b262836c49cdaf5d3dfc02b7037dc9fcc1f75a41d58a984015318759 -size 6363780 diff --git a/results-vision/gpt-4v.png b/results-vision/gpt-4v.png deleted file mode 100644 index 81f9dcd4d4d0f9c21ab1f35259c7bde0fb5a14a1..0000000000000000000000000000000000000000 --- a/results-vision/gpt-4v.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6cf5130388a71fa198ec6094e3709dbfeedb242fb5eb04e823697ad3e4636246 -size 1013085 diff --git a/results/CodeLlama-70b-Instruct-hf.csv b/results/CodeLlama-70b-Instruct-hf.csv deleted file mode 100644 index 210a8fb0395397f78578e149d36a8b1791bc62e5..0000000000000000000000000000000000000000 --- a/results/CodeLlama-70b-Instruct-hf.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3726905a1656174f3c29edfced6f2eec63222f6be8965c0d970264901d8cfc75 -size 16476347 diff --git a/results/CodeLlama-70b-Instruct-hf.jpg b/results/CodeLlama-70b-Instruct-hf.jpg index da6cbe46201c0c2a2c7c51c1edf0d99d3d88f997..00f7e00e1535297ee0bacc7543cb125ff27ce556 100644 --- a/results/CodeLlama-70b-Instruct-hf.jpg +++ b/results/CodeLlama-70b-Instruct-hf.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7abcb23c529db6b65b212085ef2c777b6f1ad509eb4f3e909a03973db6e8f14a -size 1337988 +oid sha256:775abffec22b340287e4135903e47fa07097fd38e26a9d2d11dc9db852bc1edd +size 1322923 diff --git a/results/CodeLlama-70b-Instruct-hf.pkl b/results/CodeLlama-70b-Instruct-hf.pkl index 2d36d5895ee75a2fe9ff7f82b7bc2245cb6d9b68..97f70aa9681a378e58e44e780a5b58b8a5445157 100644 --- a/results/CodeLlama-70b-Instruct-hf.pkl +++ b/results/CodeLlama-70b-Instruct-hf.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c0496d8536b01f37858e37e1b34eedd25efa1f205035868883bfcdc2ae6fb88 -size 16436822 +oid sha256:5f2b9c13f7266d94ffe12c040118a5a0b208e85cbf4a5aab5b12eee4bd0c5384 +size 14963090 diff --git a/results/CodeLlama-70b-Instruct-hf.png b/results/CodeLlama-70b-Instruct-hf.png index 7bb600341d471e2a2c6d1f0cc9650005ef877426..913d89408049f90c7700ec267d45ddd9adbcfd9d 100644 --- a/results/CodeLlama-70b-Instruct-hf.png +++ b/results/CodeLlama-70b-Instruct-hf.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:23475dc62942ec4852d709065b43765092ed44892c0cb4722e19f01260bbe599 -size 1017526 +oid sha256:edd3d40b297c5a8b3292c03c974c2c5705348dd29a763db6ef8651bcdcd5e9c8 +size 1014517 diff --git a/results/GPT-4-0125-preview.jpg b/results/GPT-4-0125-preview.jpg index e1872829473cdb10db2c500c1389be8c715e1e0e..1b7b0319680c5d2cfcf343308b13801a3858f816 100644 --- a/results/GPT-4-0125-preview.jpg +++ b/results/GPT-4-0125-preview.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:14ad1f24a0557c34d2f7841ced2279897dbd06f1211afd8efc1bbf3e7ff976e5 -size 1237830 +oid sha256:9760db664e239b5e05ad002259bb0b2e76c0a9a7c6ad03cdb6a09a2e8c265077 +size 1238797 diff --git a/results/GPT-4-0125-preview.pkl b/results/GPT-4-0125-preview.pkl index e181284876642cdf3fda46d236263222ca007813..c39dbaa38e00d1a1797ced345fcb1759ba90b574 100644 --- a/results/GPT-4-0125-preview.pkl +++ b/results/GPT-4-0125-preview.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a7e07fc568a31f3c7c77b212d271f466dfd2d1bd52fb4702761badd75dbc945 -size 18068025 +oid sha256:46a3b7e6c49fcfb82921451a9f3fdb2c3630eee5b24fcc4ae6ca48c1af777e63 +size 15972703 diff --git a/results/GPT-4-0125-preview.png b/results/GPT-4-0125-preview.png index 7d17efb5542cf8c4deca94ae9317ed00307cdc55..4d2d5f1bae98843abdd78257d534b4ef8fb80636 100644 --- a/results/GPT-4-0125-preview.png +++ b/results/GPT-4-0125-preview.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a73d7c9a27ca500e5dc3ebafe47ab88fa3d520bd1425ec1f0b9dbd97f4f05f6c -size 1007864 +oid sha256:53dd196468fa09306ebd284a7a46fa7c363f4b6e5c768184d049548f2eeca205 +size 1007830 diff --git a/results/Llama-2-70b-chat-hf.csv b/results/Llama-2-70b-chat-hf.csv deleted file mode 100644 index 2b3ec21ec3beeba95bc754ac42b8c2d82f94b134..0000000000000000000000000000000000000000 --- a/results/Llama-2-70b-chat-hf.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:42a31de917b05ed5405474a348d072426474a8fb2ce7ff462dbb121e25f4b6ad -size 20760268 diff --git a/results/Llama-2-70b-chat-hf.jpg b/results/Llama-2-70b-chat-hf.jpg index 492105b84d8686009916497ac3212e57f7c24d41..0e8e84953e40368d90c67ae0c71b4211744e7039 100644 --- a/results/Llama-2-70b-chat-hf.jpg +++ b/results/Llama-2-70b-chat-hf.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d92948e30dcbf7b15fcae057cc1aa59c561d003f70bd92ad5f760a829bdc34cf -size 1330948 +oid sha256:4f7e46612db42e32889834bc632ca34fab32216cb39f413d3f058c70681c32e9 +size 1326313 diff --git a/results/Llama-2-70b-chat-hf.pkl b/results/Llama-2-70b-chat-hf.pkl index ef284a2b6882252f1e1ae985064e00dc9de9b524..d054837fbce4aa6b6a9aead032ea87125acd15b5 100644 --- a/results/Llama-2-70b-chat-hf.pkl +++ b/results/Llama-2-70b-chat-hf.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d7bfc053d1fcc58f5f22e03c813d02cd634235c44b9a351fe084d4a1f659186a -size 20685075 +oid sha256:c35b23dcd598daf63fb74f7d21e06a217f4b93cc152666ac93c000a2a9b94808 +size 18381134 diff --git a/results/Llama-2-70b-chat-hf.png b/results/Llama-2-70b-chat-hf.png index d301d85329c93331fc8c43d0dcb9d33884f089e2..e31e17ac143f34556156300f14a727243f88ceec 100644 --- a/results/Llama-2-70b-chat-hf.png +++ b/results/Llama-2-70b-chat-hf.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:9ab73df40148c356ee8c78b03ce3e15b881d1f2f7c81fecef5ce9b394af0fd10 -size 1012047 +oid sha256:47c89899f3372c274cf073fbf451918cf17ff11347fb70b05dca4e2890b35649 +size 1012937 diff --git a/results/Llama-3-70b-chat-hf.jpg b/results/Llama-3-70b-chat-hf.jpg new file mode 100644 index 0000000000000000000000000000000000000000..620ad4612171d18e63cc76535302dfff54381ebb --- /dev/null +++ b/results/Llama-3-70b-chat-hf.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cae0df063a8185360cc474e03f1456c09d5f6e111f8d654ec06f857e88e658dc +size 1288099 diff --git a/results/Llama-3-70b-chat-hf.pkl b/results/Llama-3-70b-chat-hf.pkl new file mode 100644 index 0000000000000000000000000000000000000000..8e9356b2a6c7d2efda96608ca9049b22497399fe --- /dev/null +++ b/results/Llama-3-70b-chat-hf.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab6231fa25049b3451ed54256f9c02c5b4235a04595a50aeefe82b33596fd76a +size 15966316 diff --git a/results/Llama-3-70b-chat-hf.png b/results/Llama-3-70b-chat-hf.png new file mode 100644 index 0000000000000000000000000000000000000000..ba45ab6f528a8e3e96f36151a037968cba0393d0 --- /dev/null +++ b/results/Llama-3-70b-chat-hf.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b702600899a9f010b2c9ff8fb1172d6dcda12d787943df61ede830cd21693f +size 1007396 diff --git a/results/Mistral-7B-Instruct-v0.2.csv b/results/Mistral-7B-Instruct-v0.2.csv deleted file mode 100644 index 4d0b979c5831639b55d9cde7ba065517b4a9a453..0000000000000000000000000000000000000000 --- a/results/Mistral-7B-Instruct-v0.2.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:29ad4985661fc41e659a631fc74ba433cd08a571048f11436ccf87ff74f0db09 -size 27242025 diff --git a/results/Mistral-7B-Instruct-v0.2.jpg b/results/Mistral-7B-Instruct-v0.2.jpg index 3d4cc4b426654c84c4e809470090d6c89dcfa0e0..f3c39b0b2812afac77e329f05a0a14f93a901c43 100644 --- a/results/Mistral-7B-Instruct-v0.2.jpg +++ b/results/Mistral-7B-Instruct-v0.2.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:69ad485f2ba53016c37890c304ce2cb13591c5892ca40dd6ad79e5dca7ae0ed0 -size 1330076 +oid sha256:365677f0f3296b85dd3907cccfbd647ac752fc85c7ad24cf780fe7d95f579659 +size 1324731 diff --git a/results/Mistral-7B-Instruct-v0.2.pkl b/results/Mistral-7B-Instruct-v0.2.pkl index 5d7003da3a133fc1f248f70b470aa4e2e0db975b..26a6b915f60d97b28fb16b0689e91e10c89d428b 100644 --- a/results/Mistral-7B-Instruct-v0.2.pkl +++ b/results/Mistral-7B-Instruct-v0.2.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:99fa146558b73614994e83a667371795a7de461aca6e3580ebb72761fa1758bb -size 27226799 +oid sha256:6ee64e642a1e03676eb214dd13e455f77a9c2ab2a699935a3f66aff5e6c2110e +size 25112380 diff --git a/results/Mistral-7B-Instruct-v0.2.png b/results/Mistral-7B-Instruct-v0.2.png index 12d4d3f447260f61ea105cc5dc9ac853ae65cc74..f634e9814c35087f44c0499395d70fcbb9e6eb9f 100644 --- a/results/Mistral-7B-Instruct-v0.2.png +++ b/results/Mistral-7B-Instruct-v0.2.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:56496484301242d64a7d088ca7f04eaafebaebf6199dd9b631deea1e5db3d070 -size 1013710 +oid sha256:1eea8bf43a8c2a8ef01c4dcfbb22c744902b778adcb70d3dd47bb132410a57a6 +size 1013351 diff --git a/results/Mixtral-8x7B-Instruct-v0.1.csv b/results/Mixtral-8x7B-Instruct-v0.1.csv deleted file mode 100644 index 93b75d40f382ee7eea9fcae943041cdae92a90dd..0000000000000000000000000000000000000000 --- a/results/Mixtral-8x7B-Instruct-v0.1.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a93e2b963a5ac8129b5284f3fd7987964ef96fa0e64194de704a3549c611de1f -size 17978176 diff --git a/results/Mixtral-8x7B-Instruct-v0.1.jpg b/results/Mixtral-8x7B-Instruct-v0.1.jpg index 1cd59defaed303e46393d5539da5bb38ef6d59ab..ce247014f4c501ae7e7d20227b0bc2f0d2d2420b 100644 --- a/results/Mixtral-8x7B-Instruct-v0.1.jpg +++ b/results/Mixtral-8x7B-Instruct-v0.1.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b25140825099c0da671f95c86bf318d6ae4385361a35829eefe7c02b4b0ae720 -size 1326662 +oid sha256:8bd1ca2488a739fcdf331326c29a1bed80bb887ca0a4ae304569503712de9310 +size 1322552 diff --git a/results/Mixtral-8x7B-Instruct-v0.1.pkl b/results/Mixtral-8x7B-Instruct-v0.1.pkl index 5dc74a314547d5e6b6ab4b8a8f5853a68a665450..caa22e28301b059a7448b497d1c8a981f93c19d8 100644 --- a/results/Mixtral-8x7B-Instruct-v0.1.pkl +++ b/results/Mixtral-8x7B-Instruct-v0.1.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01909843a40e65ff7e340cedbad451ab2337c7423161a46342e83d384ec24162 -size 17979541 +oid sha256:a91a52a2452f33a7e880f2c93bb3384eef3c09ef245ee3c48280f0a75a18c3bd +size 15622486 diff --git a/results/Mixtral-8x7B-Instruct-v0.1.png b/results/Mixtral-8x7B-Instruct-v0.1.png index 954d9f574203223975362e195d2e181a904f5675..32fe06f83799e247597c38db572b972132bc311d 100644 --- a/results/Mixtral-8x7B-Instruct-v0.1.png +++ b/results/Mixtral-8x7B-Instruct-v0.1.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:776d8e20cf72a39601589f2246ddefdfaef7a5f8988cec975f5fc0089c2966a2 -size 1016086 +oid sha256:76018c7481c7f08d5a622f761620fcdfb60ee6851083a2feffcdcc6d6e231fd4 +size 1015123 diff --git a/results/Qwen1.5-72B-Chat.csv b/results/Qwen1.5-72B-Chat.csv deleted file mode 100644 index 4080e2f019798be13744122344a14ed2e911ae27..0000000000000000000000000000000000000000 --- a/results/Qwen1.5-72B-Chat.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0ba395c0b55330f689827527831e57e50ae9d824b6635b2bb569713afcf26d4b -size 14219193 diff --git a/results/Qwen1.5-72B-Chat.jpg b/results/Qwen1.5-72B-Chat.jpg index 108e59f1b576ba42f7216b003757616f809f0cad..e30a59df05f7e0519d51b38ccb03581ae22ed7b9 100644 --- a/results/Qwen1.5-72B-Chat.jpg +++ b/results/Qwen1.5-72B-Chat.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8603cbe86499194308a92f5528d2058b7ddf3acae80f189ad4f506d466b9e419 -size 1316054 +oid sha256:3fa1bf7ad4a076a198c9725d65d51814b64e9e2233d7fa21581e6a92913fe1d0 +size 1312846 diff --git a/results/Qwen1.5-72B-Chat.pkl b/results/Qwen1.5-72B-Chat.pkl index 6cb5aeec0996ab4736391b7bc24aef58b483651c..26d6806e09a682e65c43f547e9c276c20e4fd0c5 100644 --- a/results/Qwen1.5-72B-Chat.pkl +++ b/results/Qwen1.5-72B-Chat.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec5494612aaef1b9ac05c580fdac67469fc7bb3129e66b80ab720afeb9c71f22 -size 14196803 +oid sha256:fe09314e36e70f99fc301f0c21b3de4f67257a535be70dc1653a95a5a8da5003 +size 12117010 diff --git a/results/Qwen1.5-72B-Chat.png b/results/Qwen1.5-72B-Chat.png index b3f9c5d39762b8df69e9a50bb3c33c8f6f21f293..578292b39fe318421f7faf619281e980b5e4566a 100644 --- a/results/Qwen1.5-72B-Chat.png +++ b/results/Qwen1.5-72B-Chat.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f8cc6a60934f27dbe80556bee285ecfb243cb30e6a7a3eef4bab10063b081e36 -size 1013966 +oid sha256:af79b199f0084ffcd2edf6da4f3a0718c34da4832911fb59aff8887f9bcb7e3a +size 1014450 diff --git a/results/StripedHyena-Nous-7B.csv b/results/StripedHyena-Nous-7B.csv deleted file mode 100644 index 2932e2894a2b6318a7b8a7349211f6206649738c..0000000000000000000000000000000000000000 --- a/results/StripedHyena-Nous-7B.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f662367ea0d33a368aaa7a72cfeed41d2f3dc05be6289a6fe485a028c7cb98d5 -size 29219512 diff --git a/results/StripedHyena-Nous-7B.jpg b/results/StripedHyena-Nous-7B.jpg deleted file mode 100644 index 2c3bf7a386c3fd7836cf3d324d221c25bb3c0eac..0000000000000000000000000000000000000000 --- a/results/StripedHyena-Nous-7B.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:247580203c4c2a61afce2c81dde0f919e2bb8743b830132551307ba6829afdf3 -size 1375054 diff --git a/results/StripedHyena-Nous-7B.pkl b/results/StripedHyena-Nous-7B.pkl deleted file mode 100644 index 61d7b61a32139fac8cc4c287147955445342c21c..0000000000000000000000000000000000000000 --- a/results/StripedHyena-Nous-7B.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:48481a400fcecb1ddc80f4ebbf4c235f6b645ab28ad57a72ed1688e3cf17c192 -size 29177951 diff --git a/results/StripedHyena-Nous-7B.png b/results/StripedHyena-Nous-7B.png deleted file mode 100644 index 960cdaafbb1b1a661f0a6bd5c81342548d1eb718..0000000000000000000000000000000000000000 --- a/results/StripedHyena-Nous-7B.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:aa9c914dcfa0490f615aa7814c6449465942d31852994644f1213df3b6f25794 -size 1236313 diff --git a/results/Yi-34B-Chat.csv b/results/Yi-34B-Chat.csv deleted file mode 100644 index 55bda75d9cdd0e2383d293c564afedcaa9165551..0000000000000000000000000000000000000000 --- a/results/Yi-34B-Chat.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f7f09fb5f46ca144490bcb42ec89dd27f169680493501c211bf2bcfcd908da1c -size 20485423 diff --git a/results/Yi-34B-Chat.jpg b/results/Yi-34B-Chat.jpg index 6cf0b57fc5b4ae8e698a7c0a33eba69889e9f747..3e297f64e5e26fb1a51cb05aa7b230f6efda51d3 100644 --- a/results/Yi-34B-Chat.jpg +++ b/results/Yi-34B-Chat.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6bebddad6a68daf263ee35cc2cb88f195f6a285ba2ad0c21bf35c1d8a1a716b8 -size 1328365 +oid sha256:2aac2319f74bb4fceeed1a80134e9b2ad7e0ce0cb24d391e63bd822501d202ad +size 1323170 diff --git a/results/Yi-34B-Chat.pkl b/results/Yi-34B-Chat.pkl index 486e10b708ddc65ab9a7e31fec50ba017da90f75..5b8436218f4a10789f4eba9339956ece4de344a5 100644 --- a/results/Yi-34B-Chat.pkl +++ b/results/Yi-34B-Chat.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff7da8d16b7c70f06c2035d65039610b722b3b12cf64cb2f0efa7fcd41e1a82a -size 20489399 +oid sha256:f8e2d70bb8c16f9f28445729bdb58111e4303f103e829e1d5c3f4b01dc701866 +size 18357438 diff --git a/results/Yi-34B-Chat.png b/results/Yi-34B-Chat.png index a8dc1d6b11cf5a91047d6cc2d4773379529a537b..a15169237a8bcce2a3f654dad463ca1d2f3e068b 100644 --- a/results/Yi-34B-Chat.png +++ b/results/Yi-34B-Chat.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:312a621fa8bb31c9692b45aaf25703552a359199127e48cd7db1ae97780dec82 -size 1014882 +oid sha256:06c66a8890d9d44b170c282bf20dbb349b06f84075e55a9baec46a26aa7fb9a3 +size 1013684 diff --git a/results/claude-3-haiku-20240307.csv b/results/claude-3-haiku-20240307.csv deleted file mode 100644 index ef81b2523a8185ca6f0df424a7f6b8bda890e87b..0000000000000000000000000000000000000000 --- a/results/claude-3-haiku-20240307.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c11f91f84b50db9a3046bc1cf9445c46c6547d3f5828d9273ee5674e165477c6 -size 19915152 diff --git a/results/claude-3-haiku-20240307.jpg b/results/claude-3-haiku-20240307.jpg index 39d64edd9328a1699b22b66843c021c4c44217e2..dee1cb635583e4d463a22be8e1c75ff51be853b4 100644 --- a/results/claude-3-haiku-20240307.jpg +++ b/results/claude-3-haiku-20240307.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:c11ecd24a19d0acf8487beea975cddc2ce586298f6e8d4a9865cb8c49ffb20b0 -size 1304087 +oid sha256:1b7ab74bfdd0edc6b004ecf50c2cd6601d8c3ce3dee13d43b53a9e44921fd0cf +size 1303664 diff --git a/results/claude-3-haiku-20240307.pkl b/results/claude-3-haiku-20240307.pkl index 6577671617f5c270a3e59518eb2245883da69a21..7f69728cf6a1ccf7cd6cd1d4526c10360ab75ec7 100644 --- a/results/claude-3-haiku-20240307.pkl +++ b/results/claude-3-haiku-20240307.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:281c6a1b1be9ef8d73978088e49dd4070ee21dcf326d8f6638b83abb394e1860 -size 19884865 +oid sha256:e19e05ac9badf9f39455c366b8bf05ea40c0aae8cd66fd53016c50d085cea647 +size 17770468 diff --git a/results/claude-3-haiku-20240307.png b/results/claude-3-haiku-20240307.png index 438074adbf3a75a425d4db53c8f71ec15e97a863..e9dc0a9499e167d5f3b04ae910132c9e9ba6971a 100644 --- a/results/claude-3-haiku-20240307.png +++ b/results/claude-3-haiku-20240307.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:839090006279a0de9d90de9d11c86803e5bf035fce577408dcb4628b1d611b5e -size 1011782 +oid sha256:aff9411b96a6aaae1d50b352c2e778ff32f26cbbb2bfc5d5ddc572aaa21d986e +size 1012936 diff --git a/results/claude-3-opus-20240229.csv b/results/claude-3-opus-20240229.csv deleted file mode 100644 index 01fe358f8a7f37bc5ce960731db665861f6deff8..0000000000000000000000000000000000000000 --- a/results/claude-3-opus-20240229.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4513028095ff497d9594f87af6fdb2f75238c3444bdece4af1b22469c874f7e2 -size 20347963 diff --git a/results/claude-3-opus-20240229.jpg b/results/claude-3-opus-20240229.jpg index 762c646dffada519e81f41508c801fda71bc5d2b..7ad221a0aeb57b3992087d36a60eca381d7c76a9 100644 --- a/results/claude-3-opus-20240229.jpg +++ b/results/claude-3-opus-20240229.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6e85e8e7ad91700f1da3f4697f01e1438993c9b0e5ba4707afc535fed47fd1ab -size 1211081 +oid sha256:1cb2e58994183de9eebda61c1741522466657e5916c543ea8a646afb9133a2c9 +size 1212166 diff --git a/results/claude-3-opus-20240229.pkl b/results/claude-3-opus-20240229.pkl index b664791f02191a975b7ce4af8264a19e9574317e..fe3fc1b7fc65fea8cd5c450638f62c536677b62e 100644 --- a/results/claude-3-opus-20240229.pkl +++ b/results/claude-3-opus-20240229.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5a7c21c7fbad602b1e517dbcca4753b566f940480ce56b2403d31fe3d230ead4 -size 20331492 +oid sha256:7b47707ac006a490b607c1280c1013cb1de466434e5c518fff427a229bab46aa +size 18241965 diff --git a/results/claude-3-opus-20240229.png b/results/claude-3-opus-20240229.png index d3e08311ffdc49689de85e96b69f116a24c7fcf6..580184a49c23e662e81b235ed8266666e191c752 100644 --- a/results/claude-3-opus-20240229.png +++ b/results/claude-3-opus-20240229.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2ffda254a2d55eef6b4868bd5471c10a88abacf214c7876be637f9852e32a0f0 -size 1007451 +oid sha256:13ce4f0ea785209f7c193702c9e41988f6f8e74824cb05a53503264aee626b89 +size 1007456 diff --git a/results/claude-3-sonnet-20240229.csv b/results/claude-3-sonnet-20240229.csv deleted file mode 100644 index f2ad6516ce6ce2c533173a2c88e1239bf58b69a3..0000000000000000000000000000000000000000 --- a/results/claude-3-sonnet-20240229.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:24f28e804daecb522f6f29ca4774a5178bbf18738ac9a433a2c9f98652de6f2f -size 23432638 diff --git a/results/claude-3-sonnet-20240229.jpg b/results/claude-3-sonnet-20240229.jpg index ef321f274d511c701a93218c9fc48339b899b7f2..ade4e00f07072aa8472ac14ccf0d083382698565 100644 --- a/results/claude-3-sonnet-20240229.jpg +++ b/results/claude-3-sonnet-20240229.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:58a9d375bc0cc391cd3c849f60e1323864670a7ca295647a27f64357a80f7927 -size 1290090 +oid sha256:57c1109826861ee9b4e16ff1abd28cf58afb2e49495305257a3c466c6db21d5c +size 1290164 diff --git a/results/claude-3-sonnet-20240229.pkl b/results/claude-3-sonnet-20240229.pkl index cb8300362b82a2c8aa4fcc29a03dd225312e2fb4..7ec8cd8e0a3c58841484160e0a47f1339f1201a9 100644 --- a/results/claude-3-sonnet-20240229.pkl +++ b/results/claude-3-sonnet-20240229.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ea6e079352f2c37842cc53fc626d3aa51a462c486119d1b7b425b1209d654320 -size 23403073 +oid sha256:b80e820a34180cdb4e2393c170b08aa7046312f01a5bfbbbf7024ef7e1c94e22 +size 20950616 diff --git a/results/claude-3-sonnet-20240229.png b/results/claude-3-sonnet-20240229.png index efa4be608ed386833ff16988695b5a24133e6170..629481ec1c1f3b765b6ebb949f2f787369d356de 100644 --- a/results/claude-3-sonnet-20240229.png +++ b/results/claude-3-sonnet-20240229.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:d570a6a80f9c88e21bcce2c55b09c91efcbd8748261576ccbed3f1b806756b8f -size 1007061 +oid sha256:07eb457f7638ced88b2fd2f8880b821360963b72d2ecdc408d84bf059f32ab75 +size 1007664 diff --git a/results/dbrx-instruct.jpg b/results/dbrx-instruct.jpg new file mode 100644 index 0000000000000000000000000000000000000000..9a50e7b6d4fe985dbebbe369c30aca7df3890481 --- /dev/null +++ b/results/dbrx-instruct.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9eaa342dbdb13cb51a3f2584cc56720a37fc0a785e1e939a910ab3bf5384983 +size 1321040 diff --git a/results/dbrx-instruct.pkl b/results/dbrx-instruct.pkl new file mode 100644 index 0000000000000000000000000000000000000000..25e069ca4fc5d230ee7259b615d0813dbdf848fe --- /dev/null +++ b/results/dbrx-instruct.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c33881cb23d9859f5ab0b3e8c7df9218ae2046dce2f5ac20b418ce6ced19989 +size 15797034 diff --git a/results/dbrx-instruct.png b/results/dbrx-instruct.png new file mode 100644 index 0000000000000000000000000000000000000000..2a2fdbe990f39ff1121b4b5314ecc4a19fa19052 --- /dev/null +++ b/results/dbrx-instruct.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6394ede90549565b9df5256fc8b2779d6c05484d41b2b21ac725051b1368c782 +size 1016807 diff --git a/results/deepseek-llm-67b-chat.csv b/results/deepseek-llm-67b-chat.csv deleted file mode 100644 index a80e47c76536ce65983446b06f49b54da59aa96a..0000000000000000000000000000000000000000 --- a/results/deepseek-llm-67b-chat.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e25ee66a4db8dcc610cb835ddb05cbca2b6249c279d50b92043a6f27182653e8 -size 14589480 diff --git a/results/deepseek-llm-67b-chat.jpg b/results/deepseek-llm-67b-chat.jpg index ec83124928aca491f7878875298aac6c1295d5e0..ed5e43692fac7946fad783c8ba853d0e6ab9be4e 100644 --- a/results/deepseek-llm-67b-chat.jpg +++ b/results/deepseek-llm-67b-chat.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7d87ad7c1562889be8d76e441efefc5785d9ee803c68be1f32c5050ded893c7f -size 1326050 +oid sha256:ffe70df8cd91f20066816e092131ea5896121b689f02292b1ddbe21661963077 +size 1325680 diff --git a/results/deepseek-llm-67b-chat.pkl b/results/deepseek-llm-67b-chat.pkl index bf948b8306f06a9f8701b7c935c45f692458e5bb..ac219e477c1e646237803cccd9ba1deb18a8cadd 100644 --- a/results/deepseek-llm-67b-chat.pkl +++ b/results/deepseek-llm-67b-chat.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:1601f6ca2972ccae7d173d7aafa41ecd2ef25ab7b088b8cdd1e60dca4b2831fc -size 14572334 +oid sha256:7f4f015cc17f9b5ccd1c2367fac5b9fabc7a7917283c17ad875e87dea7e07f53 +size 12932443 diff --git a/results/deepseek-llm-67b-chat.png b/results/deepseek-llm-67b-chat.png index 86e52560a837afb10bed5b96ba107bbae937b42c..4a2223bf39f734024c840b7c6c6b509b7fad0150 100644 --- a/results/deepseek-llm-67b-chat.png +++ b/results/deepseek-llm-67b-chat.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8711f5a00452406a8a53d688ce263c8cedfd52138e688e41d030744c85da06f6 -size 1014516 +oid sha256:84fdb1e33c83ed49a6187abd140969372c8eda7f5d3f5cbb57d486ce7c530d75 +size 1013925 diff --git a/results/gemma-7b-it.csv b/results/gemma-7b-it.csv deleted file mode 100644 index 353ab2348cce4a6f1904e336cd62a50b2003b833..0000000000000000000000000000000000000000 --- a/results/gemma-7b-it.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3bdc088d6c7eb18257ac35c1d2b2ee9f9849a69950016f6e9a0bf04be48a5ae2 -size 12624700 diff --git a/results/gemma-7b-it.jpg b/results/gemma-7b-it.jpg index 27b45686d230b7760b9638f1a2674a486d724f6c..cb20abd3754304349715f22dd8e39171031d123c 100644 --- a/results/gemma-7b-it.jpg +++ b/results/gemma-7b-it.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:802f9edb1b79dbaf3aa907ebc83c10b8a04a60a9fec7a781d8b12b3c3c2303c2 -size 1324886 +oid sha256:b20f35a5909d458db4a08f13c6524977a5f0bf51c25c811adbd7dbbdd4c4bae3 +size 1324126 diff --git a/results/gemma-7b-it.pkl b/results/gemma-7b-it.pkl index 5dc21f5cbdc94fdd03e61d8abc8baf0b34d410c1..31d90c1b5bea0a45b3846fbfa3a9a7b019da4405 100644 --- a/results/gemma-7b-it.pkl +++ b/results/gemma-7b-it.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f02f45ce5d312717f22676deec9d4a321cce8b74321059620056f99064ee7a15 -size 12654079 +oid sha256:213281bf9dca05136d64b61e2d8920e947bb6c908c8ea989b006eb73ad0d9b92 +size 11293622 diff --git a/results/gemma-7b-it.png b/results/gemma-7b-it.png index 5c147983cdd982736811ef18a75da04b746f4c4c..cb1e7b512c05d13448830fa756adfdf41ce1dcc6 100644 --- a/results/gemma-7b-it.png +++ b/results/gemma-7b-it.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2e078db5926500a64bed1dcd4483b7780a0e2ffcd625fabb70f096eb8f9769cf -size 1010821 +oid sha256:95c811dc77e6afe9079e577060f3710668326c021d0775e44da11b48b1766488 +size 1010924 diff --git a/results/gpt-3.5-0613.jpg b/results/gpt-3.5-0613.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cb3e9360847c539173fa820750b73e0fca3e0622 --- /dev/null +++ b/results/gpt-3.5-0613.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cda6a309b0e8361a8741e233c8dd747dec9d50f66cc56b99f6f3b22335d9824 +size 1321225 diff --git a/results/gpt-3.5-0613.pkl b/results/gpt-3.5-0613.pkl new file mode 100644 index 0000000000000000000000000000000000000000..b93e0f43172ebf4bce23edd193679006176c7117 --- /dev/null +++ b/results/gpt-3.5-0613.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e10584a651923db281667540e54cb56d86a97a74676a95f4a97e6575800574e +size 8655742 diff --git a/results/gpt-3.5-0613.png b/results/gpt-3.5-0613.png new file mode 100644 index 0000000000000000000000000000000000000000..762edac14ebb6b581e3eeacaad5a38b4a9e9036b --- /dev/null +++ b/results/gpt-3.5-0613.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93badcf64e215daf80be4d75ebbd32d8ae8afc0541a9a480558fa30580165f4c +size 1012969 diff --git a/results/gpt-3.5-turbo-0125.csv b/results/gpt-3.5-turbo-0125.csv deleted file mode 100644 index 5f50409538b0260b33c3f8c9c6f343975331a1e5..0000000000000000000000000000000000000000 --- a/results/gpt-3.5-turbo-0125.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f700f8e2914178a69513e96176c69e67acd51bb14ac12ab174d5e2df60f56179 -size 9472558 diff --git a/results/gpt-3.5-turbo-0125.jpg b/results/gpt-3.5-turbo-0125.jpg index 12ac333fce8ea9a0e87d93d476b70bfe24854aff..2be8d64c7737ccda4e65df0027a5ebf0a853db5c 100644 --- a/results/gpt-3.5-turbo-0125.jpg +++ b/results/gpt-3.5-turbo-0125.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:61024aa36a2f5840f9b6b4121603f742e42d97499fe2488c8bce58aca9973110 -size 1331029 +oid sha256:0b8b383520abab7a0f1f8c2194f7597a879d2e5462d602f54f598518d6038f01 +size 1322712 diff --git a/results/gpt-3.5-turbo-0125.pkl b/results/gpt-3.5-turbo-0125.pkl index cd62e04628a3b5245ab2963678c6339743906909..5a2756f3b97fadc014f8e1b58b2f048f17901af3 100644 --- a/results/gpt-3.5-turbo-0125.pkl +++ b/results/gpt-3.5-turbo-0125.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8038b3c4d84ba654e8d2aebb41bd803efc0d477051a81f6fd2be95302e4a0c1d -size 9470933 +oid sha256:6248651aa308b7e1a4c903b0e0054ea8e1d3c643c5d1335d2d79c13a9cc68ddc +size 8052825 diff --git a/results/gpt-3.5-turbo-0125.png b/results/gpt-3.5-turbo-0125.png index 323c608d6674c1c653913eb09eeb4a0201cde71e..fbe3937c339ccfa7ae43e96a74a0951eaf302f6a 100644 --- a/results/gpt-3.5-turbo-0125.png +++ b/results/gpt-3.5-turbo-0125.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53e35193260f45c44c7d401c462f04876186747f79f666107ce74e7be3ff9e40 -size 1013723 +oid sha256:19906ee7210d494f2099dc7228b7bdf6963c399507fec85658f496c79f50cfd3 +size 1014303 diff --git a/results/gpt-4-0125-preview.csv b/results/gpt-4-0125-preview.csv deleted file mode 100644 index f63d7a302f9225c558d1239538fce78d6a8abd8e..0000000000000000000000000000000000000000 --- a/results/gpt-4-0125-preview.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d4cbbacdff8172888d8d5e8917680f524d7cd73dcbcc7aa8d0e54c0246a752c -size 18088521 diff --git a/results/gpt-4-1106.jpg b/results/gpt-4-1106.jpg new file mode 100644 index 0000000000000000000000000000000000000000..713b04381b3362205b7b05271d9910ab85f60be7 --- /dev/null +++ b/results/gpt-4-1106.jpg @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00b3589bff4da14aac5503f0b83331d3ea1b515ea900e57d40b555693b9ec1e4 +size 1238681 diff --git a/results/gpt-4-1106.pkl b/results/gpt-4-1106.pkl new file mode 100644 index 0000000000000000000000000000000000000000..100d225e3b4881c5ffd825a889ce4aafafaeda1f --- /dev/null +++ b/results/gpt-4-1106.pkl @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66aa682ff3b8862a20e86a1383733f231f97817735b5360c664ae96b6010f056 +size 16856594 diff --git a/results/gpt-4-1106.png b/results/gpt-4-1106.png new file mode 100644 index 0000000000000000000000000000000000000000..f5600321f2a6cecfda372303651efd7ad03ea9cc --- /dev/null +++ b/results/gpt-4-1106.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab1b0db31db1b9945e9aa0ae68cd95c0a572570f1814efc504d63493894f292d +size 1007682 diff --git a/results/gpt-4-turbo-2024-04-09.csv b/results/gpt-4-turbo-2024-04-09.csv deleted file mode 100644 index a49dd0e4e8bb60f5f44a1ee30cb8276af7c80330..0000000000000000000000000000000000000000 --- a/results/gpt-4-turbo-2024-04-09.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b629e534617928e013c363a0ec009faa184a700b04439278e2aab5d950251f60 -size 18513633 diff --git a/results/gpt-4-turbo-2024-04-09.jpg b/results/gpt-4-turbo-2024-04-09.jpg index 52d091c3240c123c8b919ceeeaf36de73a93b456..c12832ecabdcc41386e77efc033f4bd91d1863e2 100644 --- a/results/gpt-4-turbo-2024-04-09.jpg +++ b/results/gpt-4-turbo-2024-04-09.jpg @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5615d7a429fa93eeaa2dbbd7c2e65c77fea60652acd3c6bd9804ce4e0f26e01f -size 1229298 +oid sha256:1d68537f00e9c3c1a4f886114e6f24390d2467fe91a08f37d3cc91a779179221 +size 1229961 diff --git a/results/gpt-4-turbo-2024-04-09.pkl b/results/gpt-4-turbo-2024-04-09.pkl index 7b80ce7761b90389bc18b7b7523a9d50293f7401..b7c8baaf351257c84f3129495eacc83e7e5b9437 100644 --- a/results/gpt-4-turbo-2024-04-09.pkl +++ b/results/gpt-4-turbo-2024-04-09.pkl @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ff69f7166762b52c0012371fa8ece9fcd3c72be3e091f5aded2b67e0be3c7955 -size 18498743 +oid sha256:fe679c7d7fd96a8f0e62c6993795d8b5039ab70523c0f21ba851c3e95c33b9b7 +size 16528892 diff --git a/results/gpt-4-turbo-2024-04-09.png b/results/gpt-4-turbo-2024-04-09.png index 2c48c818876b99b10f7aa9187c20bcb078a4a8fe..7ffe3c400f708cab5a7f9ea54451a6e363577424 100644 --- a/results/gpt-4-turbo-2024-04-09.png +++ b/results/gpt-4-turbo-2024-04-09.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:68e783c4a61aef6f4ecc06943a8a8d8a00d15841dc1e5c489a885a259b64bf1f -size 1008010 +oid sha256:c212b5d2f5c413940ba464adacef8fcec7d83f1abdacc9548b520879ef4bdf1b +size 1007985 diff --git a/results_qwen/CodeLlama-70b-Instruct-hf.csv b/results_qwen/CodeLlama-70b-Instruct-hf.csv deleted file mode 100644 index 33b89ea7ec4e67d7115433fe61dab3c08d8f547e..0000000000000000000000000000000000000000 --- a/results_qwen/CodeLlama-70b-Instruct-hf.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8eb9e387ef5e3ec72c0cf7f9fb69721135481977c3b2277cee5511312b8975cd -size 14962036 diff --git a/results_qwen/CodeLlama-70b-Instruct-hf.jpg b/results_qwen/CodeLlama-70b-Instruct-hf.jpg deleted file mode 100644 index 1dce2127e77de0c85f9eeccfc54d54decbd2a2f8..0000000000000000000000000000000000000000 --- a/results_qwen/CodeLlama-70b-Instruct-hf.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ec863021f1f34f0dcb94379e600a79f87aff5364f35db4bf18319cfed9ace7cf -size 1312081 diff --git a/results_qwen/CodeLlama-70b-Instruct-hf.pkl b/results_qwen/CodeLlama-70b-Instruct-hf.pkl deleted file mode 100644 index e42b9e7cb258f83de6b928d555655914ba1f3032..0000000000000000000000000000000000000000 --- a/results_qwen/CodeLlama-70b-Instruct-hf.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:203ab4a96fdca08c9155d6cec16d72eac15e35506e4ef551fa0fe8cf867c96a5 -size 14974843 diff --git a/results_qwen/CodeLlama-70b-Instruct-hf.png b/results_qwen/CodeLlama-70b-Instruct-hf.png deleted file mode 100644 index 8151fc61b95d3b84e603857805512870dc113f7a..0000000000000000000000000000000000000000 --- a/results_qwen/CodeLlama-70b-Instruct-hf.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5fce36a6f9e4eb220a3fcf00c3b81089f53447c16729cba8cdd08eff6aa2796b -size 1005203 diff --git a/results_qwen/Llama-2-70b-chat-hf.csv b/results_qwen/Llama-2-70b-chat-hf.csv deleted file mode 100644 index 94b95f6534930c78e4e5985d0766c84b8ceed2aa..0000000000000000000000000000000000000000 --- a/results_qwen/Llama-2-70b-chat-hf.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f5e5a2bcd63b330efb3c92c9d2bfc3a708cb14348dec1bf4e7eb34e604348efa -size 18452553 diff --git a/results_qwen/Llama-2-70b-chat-hf.jpg b/results_qwen/Llama-2-70b-chat-hf.jpg deleted file mode 100644 index 09bb0b5f14796b2ed09a2e4ee78462f95a73b00a..0000000000000000000000000000000000000000 --- a/results_qwen/Llama-2-70b-chat-hf.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:0730118c903a7e8a0eed56186c3b5aab3978ad6b1a51990413b3086cd8be726c -size 1330830 diff --git a/results_qwen/Llama-2-70b-chat-hf.pkl b/results_qwen/Llama-2-70b-chat-hf.pkl deleted file mode 100644 index bcb7467791422877ff7d7247d6d6805b8289ef9c..0000000000000000000000000000000000000000 --- a/results_qwen/Llama-2-70b-chat-hf.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d74e63ba62f3f074e16b0731f5d3f53ecd6f6d431ca6579a46fb95e8e0fc0494 -size 18434995 diff --git a/results_qwen/Llama-2-70b-chat-hf.png b/results_qwen/Llama-2-70b-chat-hf.png deleted file mode 100644 index 9ca3cd895f312c6b60a4f3253a4868406f9fd449..0000000000000000000000000000000000000000 --- a/results_qwen/Llama-2-70b-chat-hf.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ccf3f5cf4b62f2e6eddd9b1e34991e92c8a503ca71201c5ef209d4e97d69df08 -size 1011649 diff --git a/results_qwen/Llama-3-70b-chat-hf.csv b/results_qwen/Llama-3-70b-chat-hf.csv deleted file mode 100644 index 987b00bacfad69f4bf260a2cd9f22b171d4e98d9..0000000000000000000000000000000000000000 --- a/results_qwen/Llama-3-70b-chat-hf.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:06cb2c239ab5bdbcda4463cc6c86f5fef197d73b3bdfa559481140f1a16b0fa6 -size 15924195 diff --git a/results_qwen/Llama-3-70b-chat-hf.jpg b/results_qwen/Llama-3-70b-chat-hf.jpg deleted file mode 100644 index c5522d7d6e45b9102aba01f817db00b3e98b89fe..0000000000000000000000000000000000000000 --- a/results_qwen/Llama-3-70b-chat-hf.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f3d7c86afdd14c7ba16adb5c90cd2395549be370318607c1bf90474a0cbb0d9a -size 1283581 diff --git a/results_qwen/Llama-3-70b-chat-hf.pkl b/results_qwen/Llama-3-70b-chat-hf.pkl deleted file mode 100644 index 4419aafa14fbaca01c8150e6805d812e51e25607..0000000000000000000000000000000000000000 --- a/results_qwen/Llama-3-70b-chat-hf.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:335776c0af8167d18b79d9fe05cb38eee5a6c56f4107c2d1d147c0b37a47af46 -size 15975547 diff --git a/results_qwen/Llama-3-70b-chat-hf.png b/results_qwen/Llama-3-70b-chat-hf.png deleted file mode 100644 index 142cf72f5111564c48df2ba50a587d0d830dc5b6..0000000000000000000000000000000000000000 --- a/results_qwen/Llama-3-70b-chat-hf.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c80e6eff6fd4ae1930de8fefae4c4aa73c40d649bba821f2b979f1f9f409c881 -size 1007229 diff --git a/results_qwen/Mistral-7B-Instruct-v0.2.csv b/results_qwen/Mistral-7B-Instruct-v0.2.csv deleted file mode 100644 index 8a8eb76ed5ff10b0c1f8f66990bf3c536233ce7d..0000000000000000000000000000000000000000 --- a/results_qwen/Mistral-7B-Instruct-v0.2.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7fc1fd3720541d6da41e0b3a8ba222576cf9deddc09483adeff44233c43e52b0 -size 25120060 diff --git a/results_qwen/Mistral-7B-Instruct-v0.2.jpg b/results_qwen/Mistral-7B-Instruct-v0.2.jpg deleted file mode 100644 index 977f7355cea0b7303549439b0ebc6aa035e6477f..0000000000000000000000000000000000000000 --- a/results_qwen/Mistral-7B-Instruct-v0.2.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a90102f516fba692b21fda394db6ffa35c625230e7e22d36022bfd850e17f8ec -size 1324782 diff --git a/results_qwen/Mistral-7B-Instruct-v0.2.pkl b/results_qwen/Mistral-7B-Instruct-v0.2.pkl deleted file mode 100644 index e284b4ecfaaf7eba7a974095ce9f855904aa065d..0000000000000000000000000000000000000000 --- a/results_qwen/Mistral-7B-Instruct-v0.2.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e5c9386da642fcba5d2d83da27bcb9c43324ade40332fe1f0d449391c49e95bd -size 25132544 diff --git a/results_qwen/Mistral-7B-Instruct-v0.2.png b/results_qwen/Mistral-7B-Instruct-v0.2.png deleted file mode 100644 index a692dfe3845ecc90262a0c6fb349ced6d760e7a9..0000000000000000000000000000000000000000 --- a/results_qwen/Mistral-7B-Instruct-v0.2.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f62d7b511052a8b79f53251bc719eb90b5dcc9b6cab3848b1d532ddef0c3665 -size 1009476 diff --git a/results_qwen/Mixtral-8x7B-Instruct-v0.1.csv b/results_qwen/Mixtral-8x7B-Instruct-v0.1.csv deleted file mode 100644 index a9d42484ef0a1a3d770843df147a22b035f06c17..0000000000000000000000000000000000000000 --- a/results_qwen/Mixtral-8x7B-Instruct-v0.1.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1b4ad69d274c473073093be7fb46a2d1070a3db35cfdfbb196a927efc2df204f -size 15659205 diff --git a/results_qwen/Mixtral-8x7B-Instruct-v0.1.jpg b/results_qwen/Mixtral-8x7B-Instruct-v0.1.jpg deleted file mode 100644 index 7036a37961570857606fc15c664d09faefb08509..0000000000000000000000000000000000000000 --- a/results_qwen/Mixtral-8x7B-Instruct-v0.1.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:99f126d1e67029d44ab0978fa8f60bac2a1066c7728074168edf763659b8cbfa -size 1323283 diff --git a/results_qwen/Mixtral-8x7B-Instruct-v0.1.pkl b/results_qwen/Mixtral-8x7B-Instruct-v0.1.pkl deleted file mode 100644 index d740e56c38b730c9b27f7ccbb87508d1e70d2ebf..0000000000000000000000000000000000000000 --- a/results_qwen/Mixtral-8x7B-Instruct-v0.1.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:579a96d609f2bd4650a078944535c7bf2d348f9304bf627248c1a40910815452 -size 15660660 diff --git a/results_qwen/Mixtral-8x7B-Instruct-v0.1.png b/results_qwen/Mixtral-8x7B-Instruct-v0.1.png deleted file mode 100644 index 95a20f9806c6a1d618f8a2655da589193fc58801..0000000000000000000000000000000000000000 --- a/results_qwen/Mixtral-8x7B-Instruct-v0.1.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5c3e1b87d8fecd69735bb14a9f50b29e9dad36c134df633e246e65b6946de14a -size 1011243 diff --git a/results_qwen/Qwen1.5-72B-Chat.csv b/results_qwen/Qwen1.5-72B-Chat.csv deleted file mode 100644 index faeb8d7d6a40168e63a7a5b3e28cbc8a84f3860c..0000000000000000000000000000000000000000 --- a/results_qwen/Qwen1.5-72B-Chat.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd7d69ce42103b008ad375df143d73d9022725be435bb1585a392df01d588d4d -size 12095649 diff --git a/results_qwen/Qwen1.5-72B-Chat.jpg b/results_qwen/Qwen1.5-72B-Chat.jpg deleted file mode 100644 index 7c74228efe477f0e64544f8506d019033c2c6560..0000000000000000000000000000000000000000 --- a/results_qwen/Qwen1.5-72B-Chat.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:40dc0aa6e910a124457a65f8d7a936704959b86313029798a59d1293d637b3ef -size 1311516 diff --git a/results_qwen/Qwen1.5-72B-Chat.pkl b/results_qwen/Qwen1.5-72B-Chat.pkl deleted file mode 100644 index 1634dd716027d1b607fecd94538f2892fb4f596f..0000000000000000000000000000000000000000 --- a/results_qwen/Qwen1.5-72B-Chat.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6716ad73c760159b278364b9c67e1171cf44075148e306575cf57a4e14faf9d7 -size 12128493 diff --git a/results_qwen/Qwen1.5-72B-Chat.png b/results_qwen/Qwen1.5-72B-Chat.png deleted file mode 100644 index fe7897e295e73ef7a5e89d49a0a219758c8a07e3..0000000000000000000000000000000000000000 --- a/results_qwen/Qwen1.5-72B-Chat.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:42da492bed81b2ab155365a33dc7fc29e3c20a79994653bfaab00b183013548d -size 1010447 diff --git a/results_qwen/StripedHyena-Nous-7B.csv b/results_qwen/StripedHyena-Nous-7B.csv deleted file mode 100644 index d37bd4286bc85f20dfd1b640043a93ee5f3d48ec..0000000000000000000000000000000000000000 --- a/results_qwen/StripedHyena-Nous-7B.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8699ac2a760851df8b3ce3b8531f5185e28cbad084494b71f758d6ede787f365 -size 33824580 diff --git a/results_qwen/StripedHyena-Nous-7B.jpg b/results_qwen/StripedHyena-Nous-7B.jpg deleted file mode 100644 index 5616da8e5d6aa485f52b31052e5e792b3c2ad7dd..0000000000000000000000000000000000000000 --- a/results_qwen/StripedHyena-Nous-7B.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3b24d9cb0d193eba3fdb203d7aedb491772910a41b09a7eef19b53d24fa62b26 -size 1321555 diff --git a/results_qwen/StripedHyena-Nous-7B.pkl b/results_qwen/StripedHyena-Nous-7B.pkl deleted file mode 100644 index 85054610bd3678cc1b9a0a8e4922c67753c34771..0000000000000000000000000000000000000000 --- a/results_qwen/StripedHyena-Nous-7B.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5e17907d0209ed8478772675566defd213ecc6cb96106225225b842466fad986 -size 33818513 diff --git a/results_qwen/StripedHyena-Nous-7B.png b/results_qwen/StripedHyena-Nous-7B.png deleted file mode 100644 index 976511c1a74bdf0ef2c20c4a11cd4df85321a30e..0000000000000000000000000000000000000000 --- a/results_qwen/StripedHyena-Nous-7B.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cfda7aa4aa09815ce8cc4e39cd2045b70b36e383d7281af270f1bdd039c0a229 -size 1007332 diff --git a/results_qwen/Yi-34B-Chat.csv b/results_qwen/Yi-34B-Chat.csv deleted file mode 100644 index bd075d893c12e1d69952de7d32fef176c4d2f7b2..0000000000000000000000000000000000000000 --- a/results_qwen/Yi-34B-Chat.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:1e65a08895fd3a369c29db7ed8e4a58399bc579689f33a6845594632d4d16346 -size 18312597 diff --git a/results_qwen/Yi-34B-Chat.jpg b/results_qwen/Yi-34B-Chat.jpg deleted file mode 100644 index 15c7a4e62ad3ec202d78cd2177a7fc1137e331b4..0000000000000000000000000000000000000000 --- a/results_qwen/Yi-34B-Chat.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:00a49b5e6920db0fddea904bcc5c34aab5f2ebf1e5e47f875f4a861481b54b9b -size 1330458 diff --git a/results_qwen/Yi-34B-Chat.pkl b/results_qwen/Yi-34B-Chat.pkl deleted file mode 100644 index 2c1ed0b8ec92e034296e649fe91a8142fe85ac90..0000000000000000000000000000000000000000 --- a/results_qwen/Yi-34B-Chat.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5468af21ae979604dfadd5de9f3c85c550b520deb17bb03130564b47b21334a9 -size 18366214 diff --git a/results_qwen/Yi-34B-Chat.png b/results_qwen/Yi-34B-Chat.png deleted file mode 100644 index 9bea806f9c5b590869686e7c8888eeb00af9a026..0000000000000000000000000000000000000000 --- a/results_qwen/Yi-34B-Chat.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:6de0b5f7cf86bbc26eb3ee89c52845b134239090fb1ca2f4f36ceb491b2c741b -size 1016037 diff --git a/results_qwen/claude-3-haiku-20240307.csv b/results_qwen/claude-3-haiku-20240307.csv deleted file mode 100644 index 239a6afd056cecf0b0696b5fde0aa1c95d014f53..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-haiku-20240307.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:eb5ca14f6f17cd4c3422e071a6e03dfc5504cb3709f5422a4b44c01daa00f57f -size 17778799 diff --git a/results_qwen/claude-3-haiku-20240307.jpg b/results_qwen/claude-3-haiku-20240307.jpg deleted file mode 100644 index d652d3b67d7a4ded88d6ce5a31782da14c5f3cd1..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-haiku-20240307.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4ca3f9ae806b5160a7ee69d55d4b1e607a0f2fa8018abfd178a345d4539703d -size 1297621 diff --git a/results_qwen/claude-3-haiku-20240307.pkl b/results_qwen/claude-3-haiku-20240307.pkl deleted file mode 100644 index 5e6adabc9ea34750edeb225d6db873faceea5616..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-haiku-20240307.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e4a38e2e7e8f22ec5c7c55561a25cd51cfe1e261f4c171e9995993b9d41bb028 -size 17780948 diff --git a/results_qwen/claude-3-haiku-20240307.png b/results_qwen/claude-3-haiku-20240307.png deleted file mode 100644 index 3a3f3330abf10ea556963a4d54d2d821de086b62..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-haiku-20240307.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:279f1efc22c5bf50306b8f675394e72fe763c3e345e8d8ad2b2a5e0faf6a79ac -size 1007586 diff --git a/results_qwen/claude-3-opus-20240229.csv b/results_qwen/claude-3-opus-20240229.csv deleted file mode 100644 index 115d11f9a55dc9e859dcb020ed1a3c29e94d9f95..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-opus-20240229.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:413814671b1a00fe6b8fa308ab8fc4e76a046799d2e275c5fe51a1606d0a5062 -size 18235109 diff --git a/results_qwen/claude-3-opus-20240229.jpg b/results_qwen/claude-3-opus-20240229.jpg deleted file mode 100644 index a55e04793440afeb6c2b8a8acefcda83a35cd073..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-opus-20240229.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5a02ca8e953854fbb5538d070597e878a6af5bd202c93ad1c35ce2c6ded363bc -size 1207092 diff --git a/results_qwen/claude-3-opus-20240229.pkl b/results_qwen/claude-3-opus-20240229.pkl deleted file mode 100644 index 715e7a3f86e461782d2491a664a879d4bc71e430..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-opus-20240229.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:be62c58b6b5e21ae49bdf5ba96ad4628d959b9843a2eba691bddd9c5bd717f1d -size 18253951 diff --git a/results_qwen/claude-3-opus-20240229.png b/results_qwen/claude-3-opus-20240229.png deleted file mode 100644 index 3606f0783c1d8bb7fcf42658bc5849ca018542b8..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-opus-20240229.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c199b8c8e681c485b8972025ed3488464dd166642dcac9ee150989a9c650961c -size 1007764 diff --git a/results_qwen/claude-3-sonnet-20240229.csv b/results_qwen/claude-3-sonnet-20240229.csv deleted file mode 100644 index 7722e5f257a58b41c710f8a29d473d84ef279eed..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-sonnet-20240229.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:04d09742be7bb2f08133917006a0b1df70233566b4e2eb87393965beaedf37c5 -size 20960824 diff --git a/results_qwen/claude-3-sonnet-20240229.jpg b/results_qwen/claude-3-sonnet-20240229.jpg deleted file mode 100644 index b83cf9fbee9b64f2950121a9fb1cfd7aee744295..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-sonnet-20240229.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:11c153b8dacfa82a38ba9f9ebe1734f2e704889c8ae17975da79795a0e68e578 -size 1279987 diff --git a/results_qwen/claude-3-sonnet-20240229.pkl b/results_qwen/claude-3-sonnet-20240229.pkl deleted file mode 100644 index c4dad091a7b46695c9485004fc57af5b799a2b2a..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-sonnet-20240229.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:9a75b7af6e5dede497165284750fffb7acf9bf287d53b447f75e734c0e69c306 -size 20960376 diff --git a/results_qwen/claude-3-sonnet-20240229.png b/results_qwen/claude-3-sonnet-20240229.png deleted file mode 100644 index 43f8b449d60665bd5653d51a8bec11d7f3e46ba7..0000000000000000000000000000000000000000 --- a/results_qwen/claude-3-sonnet-20240229.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:54079ec77137dd645d491681bf7e01d25b946048e4a2cbf4e9a439cdc0649881 -size 1005948 diff --git a/results_qwen/dbrx-instruct.csv b/results_qwen/dbrx-instruct.csv deleted file mode 100644 index 75eafd629fea2d874661682be05864af23688a4b..0000000000000000000000000000000000000000 --- a/results_qwen/dbrx-instruct.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:56ee4a0903ab2b5c5d1c478ccaee9063c93a6e82602aead01aa0c83ea75ab17a -size 15793228 diff --git a/results_qwen/dbrx-instruct.jpg b/results_qwen/dbrx-instruct.jpg deleted file mode 100644 index 231f181c70aba65887efd44f935b709ab8f9eaae..0000000000000000000000000000000000000000 --- a/results_qwen/dbrx-instruct.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8f8fa78756565fcba4f50ccce07c9f190457def26e8e47bae5818470c1e398c8 -size 1309652 diff --git a/results_qwen/dbrx-instruct.pkl b/results_qwen/dbrx-instruct.pkl deleted file mode 100644 index e67d441319e45ced857d4a950c7a668147c55bee..0000000000000000000000000000000000000000 --- a/results_qwen/dbrx-instruct.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2ce392c07fc7c1c7c64d941f7b77a06614f1b51f76ce5d5947dafa0191ddf8ee -size 15820291 diff --git a/results_qwen/dbrx-instruct.png b/results_qwen/dbrx-instruct.png deleted file mode 100644 index ca68e3858593a36b4ab2407814a6a49588276527..0000000000000000000000000000000000000000 --- a/results_qwen/dbrx-instruct.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d9215a754055c9dd8c6f6eaf9efb9c9365defcc7cfaad33be68deb5df223f34e -size 1006873 diff --git a/results_qwen/deepseek-llm-67b-chat.csv b/results_qwen/deepseek-llm-67b-chat.csv deleted file mode 100644 index 17a5a2f2987b003c5f9da7dd831ea0f85b48573d..0000000000000000000000000000000000000000 --- a/results_qwen/deepseek-llm-67b-chat.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:668c30672d315df2c899df7627039723fc35bba099ae410792729a087a65b9fb -size 12916783 diff --git a/results_qwen/deepseek-llm-67b-chat.jpg b/results_qwen/deepseek-llm-67b-chat.jpg deleted file mode 100644 index 0622f5e0387a9387249e20a8cb8778a45f17c621..0000000000000000000000000000000000000000 --- a/results_qwen/deepseek-llm-67b-chat.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c31e2ff0f6672617639ed92f33d41b96ea82d3c81590ad8f869555f7d8cbd5b6 -size 1325320 diff --git a/results_qwen/deepseek-llm-67b-chat.pkl b/results_qwen/deepseek-llm-67b-chat.pkl deleted file mode 100644 index 6cee8d189b8c52958f1ef02f86d4e2e6b8ff5a93..0000000000000000000000000000000000000000 --- a/results_qwen/deepseek-llm-67b-chat.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8e26477861326f0784a70903beadad6a4ca7dc61e18e3a76db9cc713d196dab0 -size 12957786 diff --git a/results_qwen/deepseek-llm-67b-chat.png b/results_qwen/deepseek-llm-67b-chat.png deleted file mode 100644 index dabcd85656e1486dbb10b3a7745db6ff9ea3226b..0000000000000000000000000000000000000000 --- a/results_qwen/deepseek-llm-67b-chat.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e7028fadc4c5172a89ff58a8074e0aa3d14eb7c343cb9dcb90a9266a6e090008 -size 1012489 diff --git a/results_qwen/gemma-7b-it.csv b/results_qwen/gemma-7b-it.csv deleted file mode 100644 index 600907250da3f7ad46eccb963ff633006d3b1d1a..0000000000000000000000000000000000000000 --- a/results_qwen/gemma-7b-it.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:abcf0738cde1e241535a90aae98338d1b9505a844c9561877e245b604676e489 -size 11246490 diff --git a/results_qwen/gemma-7b-it.jpg b/results_qwen/gemma-7b-it.jpg deleted file mode 100644 index 744b7731b70b363f2d5494eff846eefd38d71f3a..0000000000000000000000000000000000000000 --- a/results_qwen/gemma-7b-it.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bc4ca1f65ed04e50e3402823b7812d9f7a8080d1a2b022c2e8204ca82cbd2624 -size 1321678 diff --git a/results_qwen/gemma-7b-it.pkl b/results_qwen/gemma-7b-it.pkl deleted file mode 100644 index 19907336c232c1301e01a17aaeb5fa5af2396d2b..0000000000000000000000000000000000000000 --- a/results_qwen/gemma-7b-it.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2cd2216230dadc2adf5a73a1b64c799f27913cf4033e7333290962c8926b9557 -size 11301306 diff --git a/results_qwen/gemma-7b-it.png b/results_qwen/gemma-7b-it.png deleted file mode 100644 index 55faa0f4f7eb8085ed8c167fa1aa4f3e6f9621c6..0000000000000000000000000000000000000000 --- a/results_qwen/gemma-7b-it.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ed89e875f18947cd237e312dc5293e2947a1287bd63dd15197689e0bff2614a7 -size 1009926 diff --git a/results_qwen/gpt-3.5-turbo-0125.csv b/results_qwen/gpt-3.5-turbo-0125.csv deleted file mode 100644 index 3d16a053073b4dda300935112bedbe77a4b2ff2e..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-3.5-turbo-0125.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3da618470a9256a02824d308cab84e80400ded429b9157c20b89ba720f708bb1 -size 8041854 diff --git a/results_qwen/gpt-3.5-turbo-0125.jpg b/results_qwen/gpt-3.5-turbo-0125.jpg deleted file mode 100644 index 2f49cc441936da9ddb49b45aa2f994a810384537..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-3.5-turbo-0125.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:707a882b8cdb3da1d75bfc68be6fa0de2f694a75c09e4d752d43da4a5576c6ba -size 1325537 diff --git a/results_qwen/gpt-3.5-turbo-0125.pkl b/results_qwen/gpt-3.5-turbo-0125.pkl deleted file mode 100644 index e32577cf290c3b74083756d5a24dc9116eff3445..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-3.5-turbo-0125.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e224821031c0d87b38a113efff8a9100fe770d067d2d94662c81ad6207718963 -size 8069783 diff --git a/results_qwen/gpt-3.5-turbo-0125.png b/results_qwen/gpt-3.5-turbo-0125.png deleted file mode 100644 index 9ce5c1e6edc259942b6054f60eaf30f96d7a90e4..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-3.5-turbo-0125.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:445c3dd96c469ec350c63ac2c057eea10b836eb44ae2311b487a10e82ca2ed44 -size 1014246 diff --git a/results_qwen/gpt-35-turbo.csv b/results_qwen/gpt-35-turbo.csv deleted file mode 100644 index 050b25f5032a0abf7a3b9fc74943567ab064ed03..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-35-turbo.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:3789603ef21192c8267df2bfc434e536c06bb36387ee753cfc079ca5ca062367 -size 8664643 diff --git a/results_qwen/gpt-35-turbo.jpg b/results_qwen/gpt-35-turbo.jpg deleted file mode 100644 index add7a185ff7cd9921de2ae6f56ad9983cbdf63a7..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-35-turbo.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8e112d9e8a5b755c4c001a3c57cf9a3d7b46fb99582ddfcf779c92220d37fb44 -size 1331099 diff --git a/results_qwen/gpt-35-turbo.pkl b/results_qwen/gpt-35-turbo.pkl deleted file mode 100644 index 1e825b14e92afaa5a3c00d3373a0bed511241e32..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-35-turbo.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c505f8733712a8076c79a5d0e7c78773eb558364ba3baa08c540673bb4de3bdc -size 8672346 diff --git a/results_qwen/gpt-35-turbo.png b/results_qwen/gpt-35-turbo.png deleted file mode 100644 index d3b759d41a4451537f9a5a9434fbc47c445d0cf7..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-35-turbo.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b171b8c2cb964b6714feee9121eeb077269fdafe40e66401a6e0e700cef26273 -size 1016788 diff --git a/results_qwen/gpt-4-0125-preview.csv b/results_qwen/gpt-4-0125-preview.csv deleted file mode 100644 index 9597ed367f81ba1b3f4582492a35debadc462549..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4-0125-preview.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bd1c1982308ad0b1f510001f407bf50a9378dc94391993f4116c02beaa55c27f -size 15996843 diff --git a/results_qwen/gpt-4-0125-preview.jpg b/results_qwen/gpt-4-0125-preview.jpg deleted file mode 100644 index 87a5a71339d88bec26228796a27a3270de33e3ca..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4-0125-preview.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d24927f07dc58a6f8b4d96ba33d3bd5eb608648fbf09310ee6b899b071eb6705 -size 1226225 diff --git a/results_qwen/gpt-4-0125-preview.pkl b/results_qwen/gpt-4-0125-preview.pkl deleted file mode 100644 index 5ee21b8960586610b6edf8925a47e2f61a6e274e..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4-0125-preview.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2d31b186ce750657cbddcabbf6998069ecf2f6631954a8718c88313564063b30 -size 15985596 diff --git a/results_qwen/gpt-4-0125-preview.png b/results_qwen/gpt-4-0125-preview.png deleted file mode 100644 index e3ad06b0a5310d67053ab02a7b1ea1ba20c07dc6..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4-0125-preview.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5f0eb9f8af0da0eb7cf438ce275255121b5ff4bedd2481ff54b5c340456621e1 -size 1008173 diff --git a/results_qwen/gpt-4-turbo-2024-04-09.csv b/results_qwen/gpt-4-turbo-2024-04-09.csv deleted file mode 100644 index 3f20cc93498710603fabe7334a6b6245baed4ca9..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4-turbo-2024-04-09.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d50952850b16d84f951949aa629db60375aa2100cf86f1b9a143cdfd83fc1c30 -size 16522373 diff --git a/results_qwen/gpt-4-turbo-2024-04-09.jpg b/results_qwen/gpt-4-turbo-2024-04-09.jpg deleted file mode 100644 index 3e684cf279e584e44ac3e5305420c84f0c5c33c6..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4-turbo-2024-04-09.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a7596d4256d2516d20e9962a3600a60b9eeee806af6370a13e6cf05b544744e6 -size 1219815 diff --git a/results_qwen/gpt-4-turbo-2024-04-09.pkl b/results_qwen/gpt-4-turbo-2024-04-09.pkl deleted file mode 100644 index 347d5ff0e5cbf21d2658012fb281efe16cbebea7..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4-turbo-2024-04-09.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:661211c67ac6a0e6168e79df8d616b4abbc7e5d0fda335736cbe22912fade1f4 -size 16542716 diff --git a/results_qwen/gpt-4-turbo-2024-04-09.png b/results_qwen/gpt-4-turbo-2024-04-09.png deleted file mode 100644 index 16da11e82dd5211b79703df5169d1a3fbb7c93a3..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4-turbo-2024-04-09.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:22fd48ef25affc68411b79985c889b9db8884083ca17688831feca9f78e03d0c -size 1008070 diff --git a/results_qwen/gpt-4.csv b/results_qwen/gpt-4.csv deleted file mode 100644 index 6e881ad1e4cdde5b67a1f4e84d6627fe6e97f9f5..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4.csv +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:907089a79d61db23479270b245ee8d5a1f09f542ab433eb3091444194a1a2df3 -size 16848674 diff --git a/results_qwen/gpt-4.jpg b/results_qwen/gpt-4.jpg deleted file mode 100644 index 7a826708761b7c38bd30fe16cfa6e306617701aa..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4.jpg +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:be74b004d198bf84b83c8bbe91310e788590758d536f2eccc103ebeedc661300 -size 1227454 diff --git a/results_qwen/gpt-4.pkl b/results_qwen/gpt-4.pkl deleted file mode 100644 index eff331827bb92a7e1ba57ec6ff7e49e4c23d9075..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4.pkl +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:965993608291efa85de297840aa5f1c562534df157e7ad4532022c6fe58f4335 -size 16870611 diff --git a/results_qwen/gpt-4.png b/results_qwen/gpt-4.png deleted file mode 100644 index 0134731e6cb5b97eb524f4d0857fef40ff6205f7..0000000000000000000000000000000000000000 --- a/results_qwen/gpt-4.png +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:fff9e3d6a4293745ca2e67074a1dde2d12877af1b6f2b5cbb99c73d7956b11fa -size 1007936