Spaces:

LLM-Beetle
/

azerbaijani-llm-leaderboard

Running

App Files Files Community

azerbaijani-llm-leaderboard / app.py

MirakramAghalarov

Added Llama pic to repo

dd91d6d 13 days ago

raw

history blame

15.2 kB

	import gradio as gr
	import pandas as pd
	from apscheduler.schedulers.background import BackgroundScheduler
	from huggingface_hub import snapshot_download
	import os
	os.environ['CURL_CA_BUNDLE'] = ''

	from src.display.about import (
	EVALUATION_QUEUE_TEXT,
	INTRODUCTION_TEXT,
	LLM_BENCHMARKS_TEXT,
	LLM_DATASET_TEXT,
	TITLE,
	)
	from src.display.css_html_js import custom_css
	from src.display.utils import (
	BENCHMARK_COLS,
	COLS,
	EVAL_COLS,
	EVAL_TYPES,
	TYPES,
	AutoEvalColumn,
	fields,
	BENCHMARK_COLS_GROUP,
	COLS_GROUP,
	EVAL_COLS_GROUP,
	EVAL_TYPES_GROUP,
	TYPES_GROUP,
	AutoEvalColumnGroup,
	)
	from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, TOKEN, QUEUE_REPO, REPO_ID, RESULTS_REPO, EVAL_RESULTS_GROUP_PATH, RESULTS_GROUP_REPO
	from src.populate import get_evaluation_queue_df, get_leaderboard_df, get_evaluation_queue_df_group, get_leaderboard_group_df
	from src.submission.submit import add_new_eval


	def restart_space():
	API.restart_space(repo_id=REPO_ID, token=TOKEN)

	try:
	print(EVAL_REQUESTS_PATH)
	snapshot_download(
	repo_id=QUEUE_REPO,
	local_dir=EVAL_REQUESTS_PATH,
	repo_type="dataset",
	tqdm_class=None,
	etag_timeout=30,
	force_download=True,
	token=TOKEN
	)
	except Exception:
	restart_space()
	try:
	print(EVAL_RESULTS_PATH)
	snapshot_download(
	repo_id=RESULTS_REPO,
	local_dir=EVAL_RESULTS_PATH,
	repo_type="dataset",
	tqdm_class=None,
	etag_timeout=30,
	force_download=True,
	token=TOKEN
	)
	snapshot_download(
	repo_id=RESULTS_GROUP_REPO,
	local_dir=EVAL_RESULTS_GROUP_PATH,
	repo_type="dataset",
	tqdm_class=None,
	etag_timeout=30,
	force_download=True,
	token=TOKEN)
	except Exception:
	restart_space()


	raw_data, original_df = get_leaderboard_df(EVAL_RESULTS_PATH, COLS, BENCHMARK_COLS)
	raw_data_grouped, original_df_grouped = get_leaderboard_group_df(EVAL_RESULTS_GROUP_PATH, COLS_GROUP, BENCHMARK_COLS_GROUP)

	leaderboard_grouped_df = original_df_grouped.copy()
	leaderboard_df = original_df.copy()

	(
	finished_eval_queue_df,
	running_eval_queue_df,
	pending_eval_queue_df,
	) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)


	(
	finished_eval_queue_g_df,
	running_eval_queue_g_df,
	pending_eval_queue_g_df,
	) = get_evaluation_queue_df_group(EVAL_REQUESTS_PATH, EVAL_COLS_GROUP)

	# Searching and filtering
	def update_table(
	hidden_df: pd.DataFrame,
	columns: list,
	query: str,
	):
	filtered_df = filter_queries(query, hidden_df)
	df = select_columns(filtered_df, columns)
	return df


	def search_table(df: pd.DataFrame, query: str) -> pd.DataFrame:
	return df[(df[AutoEvalColumn.dummy.name].str.contains(query, case=False))]


	def select_columns(df: pd.DataFrame, columns: list) -> pd.DataFrame:
	always_here_cols = [
	AutoEvalColumn.model_submission_date.name,
	AutoEvalColumn.model.name,
	]
	# We use COLS to maintain sorting
	filtered_df = df[
	always_here_cols + [c for c in COLS if c in df.columns and c in columns] + [AutoEvalColumn.dummy.name]
	]
	return filtered_df


	def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
	final_df = []
	if query != "":
	queries = [q.strip() for q in query.split(";")]
	for _q in queries:
	if _q != "":
	temp_filtered_df = search_table(filtered_df, _q)
	if len(temp_filtered_df) > 0:
	final_df.append(temp_filtered_df)
	if len(final_df) > 0:
	filtered_df = pd.concat(final_df)
	filtered_df = filtered_df.drop_duplicates(
	subset=[AutoEvalColumn.model.name, AutoEvalColumn.model_submission_date.name]
	)

	return filtered_df


	demo = gr.Blocks(css=custom_css)
	with demo:
	gr.HTML(TITLE)
	with gr.Row():
	with gr.Column(scale=9):
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
	with gr.Column(scale=2, min_width=1):
	gr.Image('src/display/BirLLama.jpeg', scale=2,
	show_label=False,
	interactive=False,
	show_share_button=False,
	show_download_button=False)

	with gr.Tabs(elem_classes="tab-buttons") as tabs:
	with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
	with gr.Row():
	with gr.Row():
	search_bar = gr.Textbox(
	placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
	show_label=False,
	elem_id="search-bar",
	)
	with gr.Row():
	shown_columns = gr.CheckboxGroup(
	choices=[
	c.name
	for c in fields(AutoEvalColumnGroup)
	if not c.hidden and not c.never_hidden and not c.dummy
	],
	value=[
	c.name
	for c in fields(AutoEvalColumnGroup)
	if c.displayed_by_default and not c.hidden and not c.never_hidden
	],
	label="Select columns to show",
	elem_id="column-select",
	interactive=True,
	)

	leaderboard_table = gr.components.Dataframe(
	value=leaderboard_grouped_df[
	[c.name for c in fields(AutoEvalColumnGroup) if c.never_hidden]
	+ shown_columns.value
	+ [AutoEvalColumnGroup.dummy.name]
	],
	headers=[c.name for c in fields(AutoEvalColumnGroup) if c.never_hidden] + shown_columns.value + [AutoEvalColumnGroup.dummy.name],
	datatype=TYPES_GROUP,
	elem_id="leaderboard-table",
	interactive=False,
	visible=True,
	column_widths=["15%", "30%"]
	)

	# Dummy leaderboard for handling the case when the user uses backspace key
	hidden_leaderboard_table_for_search = gr.components.Dataframe(
	value=original_df_grouped[COLS_GROUP],
	headers=COLS_GROUP,
	datatype=TYPES_GROUP,
	visible=False,
	)
	search_bar.submit(
	update_table,
	[
	hidden_leaderboard_table_for_search,
	shown_columns,
	search_bar,
	],
	leaderboard_table,
	)
	for selector in [shown_columns]:
	selector.change(
	update_table,
	[
	hidden_leaderboard_table_for_search,
	shown_columns,
	search_bar,
	],
	leaderboard_table,
	queue=True,
	)

	with gr.TabItem("🏅 LLM Benchmark FineGrained", elem_id="llm-benchmark-tab-table-1", id=1):
	with gr.Row():
	with gr.Row():
	search_bar = gr.Textbox(
	placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
	show_label=False,
	elem_id="search-bar",
	)
	with gr.Row():
	shown_columns = gr.CheckboxGroup(
	choices=[
	c.name
	for c in fields(AutoEvalColumn)
	if not c.hidden and not c.never_hidden and not c.dummy
	],
	value=[
	c.name
	for c in fields(AutoEvalColumn)
	if c.displayed_by_default and not c.hidden and not c.never_hidden
	],
	label="Select columns to show",
	elem_id="column-select",
	interactive=True,
	)

	leaderboard_table = gr.components.Dataframe(
	value=leaderboard_df[
	[c.name for c in fields(AutoEvalColumn) if c.never_hidden]
	+ shown_columns.value
	+ [AutoEvalColumn.dummy.name]
	],
	headers=[c.name for c in fields(AutoEvalColumn) if c.never_hidden] + shown_columns.value + [AutoEvalColumn.dummy.name],
	datatype=TYPES,
	elem_id="leaderboard-table",
	interactive=False,
	visible=True,
	column_widths=["15%", "30%"]
	)

	# Dummy leaderboard for handling the case when the user uses backspace key
	hidden_leaderboard_table_for_search = gr.components.Dataframe(
	value=original_df[COLS],
	headers=COLS,
	datatype=TYPES,
	visible=False,
	)
	search_bar.submit(
	update_table,
	[
	hidden_leaderboard_table_for_search,
	shown_columns,
	search_bar,
	],
	leaderboard_table,
	)
	for selector in [shown_columns]:
	selector.change(
	update_table,
	[
	hidden_leaderboard_table_for_search,
	shown_columns,
	search_bar,
	],
	leaderboard_table,
	queue=True,
	)

	with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=2):
	with gr.Column():
	with gr.Row():
	gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")

	with gr.Column():
	with gr.Accordion(
	f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
	open=False,
	):
	with gr.Row():
	finished_eval_table = gr.components.Dataframe(
	value=finished_eval_queue_df,
	headers=EVAL_COLS,
	datatype=EVAL_TYPES,
	row_count=5,
	)
	with gr.Accordion(
	f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
	open=False,
	):
	with gr.Row():
	running_eval_table = gr.components.Dataframe(
	value=running_eval_queue_df,
	headers=EVAL_COLS,
	datatype=EVAL_TYPES,
	row_count=5,
	)

	with gr.Accordion(
	f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
	open=False,
	):
	with gr.Row():
	pending_eval_table = gr.components.Dataframe(
	value=pending_eval_queue_df,
	headers=EVAL_COLS,
	datatype=EVAL_TYPES,
	row_count=5,
	)



	with gr.Row():
	gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")

	with gr.Row():
	with gr.Column():
	with gr.Row():
	model_name_textbox = gr.Textbox(label="Model name")

	with gr.Column():
	with gr.Row():
	weight_type = gr.Dropdown(
	choices=['safetensors', 'gguf'],
	label="Weights type",
	multiselect=False,
	value='safgit petensors',
	interactive=True,
	)

	with gr.Column():
	with gr.Row():
	gguf_filename_textbox = gr.Textbox(label="GGUF filename")

	submit_button = gr.Button("Submit Eval")
	submission_result = gr.Markdown()
	submit_button.click(
	add_new_eval,
	[
	model_name_textbox,
	weight_type,
	gguf_filename_textbox
	],
	submission_result,
	)

	with gr.TabItem("📝 Evaluation Datasets", elem_id="llm-benchmark-tab-table", id=4):
	gr.Markdown(LLM_DATASET_TEXT, elem_classes="markdown-text")
	gr.HTML("""<h1 align="center" id="space-title"> Contributor Companies and Teams </h1>""")
	with gr.Row():
	with gr.Column(scale=35):
	pass
	with gr.Column(scale=10, min_width=1, elem_classes='center-column'):
	gr.Image('src/display/localdocs.jpeg',
	scale = 1,
	height=160,
	show_label=False,
	interactive=False,
	show_share_button=False,
	show_download_button=False)
	gr.HTML("""<h1 align="center" id="company tile"> LocalDocs </h1>""")
	with gr.Column(scale=10, min_width=1, elem_classes='center-column'):
	gr.Image('src/display/prodata.png',
	scale = 1,
	height=160,
	show_label=False,
	interactive=False,
	show_share_button=False,
	show_download_button=False)
	gr.HTML("""<h1 align="center" id="company tile"> PRODATA </h1>""")
	with gr.Column(scale=10, min_width=1, elem_classes='center-column'):
	gr.Image('src/display/bhosai.jpeg',
	scale = 1,
	height=160,
	show_label=False,
	interactive=False,
	show_share_button=False,
	show_download_button=False)
	gr.HTML("""<h1 align="center" id="company tile"> BHOSAI </h1>""")
	with gr.Column(scale=35):
	pass

	scheduler = BackgroundScheduler()
	scheduler.add_job(restart_space, "interval", seconds=1000)
	scheduler.start()
	demo.queue(default_concurrency_limit=40).launch()