kaz-llm-lb

Running

App Files Files Community

kaz-llm-lb / app.py

kz-transformers

Update app.py

ed9137f verified 19 days ago

raw

history blame contribute delete

18.8 kB

	import logging
	import os
	os.makedirs("tmp", exist_ok=True)
	os.environ['TMP_DIR'] = "tmp"
	import subprocess
	import shutil
	import glob
	import gradio as gr
	import numpy as np
	from src.radial.radial import create_plot
	from apscheduler.schedulers.background import BackgroundScheduler
	from gradio_leaderboard import Leaderboard, SelectColumns
	from gradio_space_ci import enable_space_ci
	import json
	from io import BytesIO

	def handle_file_upload(file):
	file_path = file.name.split("/")[-1] if "/" in file.name else file.name
	logging.info("File uploaded: %s", file_path)
	with open(file.name, "r") as f:
	v = json.load(f)
	return v, file_path
	def submit_file(v, file_path, mn, profile: gr.OAuthProfile \| None):
	print('START SUBMITTING!!!')
	if profile is None:
	return "Hub Login Required"
	print('PROFILE: ', profile.__dict__)
	print('MN:', mn)
	new_file = v['results']
	if profile.username == 'kz-transformers':
	new_file['model'] = mn
	else:
	new_file['model'] = profile.username + "/" + mn

	columns = [
	'mmlu_translated_kk', 'kk_constitution_mc', 'kk_dastur_mc', 'kazakh_and_literature_unt_mc', 'kk_geography_unt_mc',
	'kk_world_history_unt_mc', 'kk_history_of_kazakhstan_unt_mc', 'kk_english_unt_mc', 'kk_biology_unt_mc',
	'kk_human_society_rights_unt_mc'
	]

	for column in columns:
	new_file[column] = new_file[column]['acc,none']

	new_file['model_dtype'] = v['config']["model_dtype"]
	new_file['ppl'] = 0

	print('WE READ FILE: ', new_file)
	buf = BytesIO()
	buf.write(json.dumps(new_file).encode('utf-8'))
	API.upload_file(
	path_or_fileobj=buf,
	path_in_repo="model_data/external/" + profile.username + '__' + mn.replace('/', '__') + ".json",
	repo_id="kz-transformers/s-openbench-eval",
	repo_type="dataset",
	)
	os.environ[RESET_JUDGEMENT_ENV] = "1"
	return "Success!"

	from src.display.about import (
	INTRODUCTION_TEXT,
	TITLE,
	LLM_BENCHMARKS_TEXT
	)
	from src.display.css_html_js import custom_css
	from src.display.utils import (
	AutoEvalColumn,
	fields,
	)
	from src.envs import API, H4_TOKEN, HF_HOME, REPO_ID, RESET_JUDGEMENT_ENV
	from src.leaderboard.build_leaderboard import build_leadearboard_df, download_openbench, download_dataset
	import huggingface_hub
	# huggingface_hub.login(token=H4_TOKEN)

	os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"

	# Configure logging
	logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

	# Start ephemeral Spaces on PRs (see config in README.md)
	enable_space_ci()

	# download_openbench()

	def restart_space():
	API.restart_space(repo_id=REPO_ID)
	download_openbench()

	def update_plot(selected_models):
	return create_plot(selected_models)

	def build_demo():
	download_openbench()
	demo = gr.Blocks(title="Kaz LLM LB", css=custom_css)
	leaderboard_df = build_leadearboard_df()
	with demo:
	gr.HTML(TITLE)
	gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

	with gr.Tabs(elem_classes="tab-buttons"):
	with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
	Leaderboard(
	value=leaderboard_df,
	datatype=[c.type for c in fields(AutoEvalColumn)],
	select_columns=SelectColumns(
	default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
	cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden or c.dummy],
	label="Select Columns to Display:",
	),
	search_columns=[
	AutoEvalColumn.model.name,
	# AutoEvalColumn.fullname.name,
	# AutoEvalColumn.license.name
	],
	)

	# with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=1):
	# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
	# with gr.TabItem("❗FAQ", elem_id="llm-benchmark-tab-table", id=2):
	# gr.Markdown(FAQ_TEXT, elem_classes="markdown-text")

	with gr.TabItem("🚀 Submit ", elem_id="llm-benchmark-tab-table", id=3):
	with gr.Row():
	gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
	with gr.Row():
	gr.Markdown("# ✨ Submit your model here!", elem_classes="markdown-text")

	with gr.Column():

	model_name_textbox = gr.Textbox(label="Model name")
	# submitter_username = gr.Textbox(label="Username")

	# def toggle_upload_button(model_name, username):
	# return bool(model_name) and bool(username)
	file_output = gr.File(label="Drag and drop JSON file judgment here", type="filepath")
	# upload_button = gr.Button("Click to Upload & Submit Answers", elem_id="upload_button",variant='primary')
	uploaded_file = gr.State()
	file_path = gr.State()
	with gr.Row():
	with gr.Column():
	out = gr.Textbox("Статус отправки")
	with gr.Column():
	login_button = gr.LoginButton(elem_id="oauth-button")

	submit_button = gr.Button("Submit File", elem_id="submit_button", variant='primary')

	file_output.upload(
	handle_file_upload,
	file_output,
	[uploaded_file, file_path]
	)

	submit_button.click(
	submit_file,
	[uploaded_file, file_path, model_name_textbox],
	[out]
	)

	with gr.TabItem("📊 Analytics", elem_id="llm-benchmark-tab-table", id=4):
	with gr.Column():
	model_dropdown = gr.Dropdown(
	choices=leaderboard_df["model"].tolist(),
	label="Models",
	value=leaderboard_df["model"].tolist(),
	multiselect=True,
	info="Select models"
	)
	with gr.Column():
	plot = gr.Plot(update_plot(model_dropdown.value))
	# plot = gr.Plot()
	model_dropdown.change(
	fn=update_plot,
	inputs=[model_dropdown],
	outputs=[plot]
	)
	return demo


	# print(os.system('cd src/gen && ../../.venv/bin/python gen_judgment.py'))
	# print(os.system('cd src/gen/ && python show_result.py --output'))


	def update_board():
	need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
	logging.info("Updating the judgement: %s", need_reset)
	if need_reset != "1":
	# return
	pass
	os.environ[RESET_JUDGEMENT_ENV] = "0"

	# `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
	# recursively. In this specific context, it is used to delete the directory named "m_data" along
	# with all its files and subdirectories. This command helps in cleaning up the existing data in
	# the "m_data" directory before downloading new dataset files into it.
	# shutil.rmtree("./m_data")
	# shutil.rmtree("./data")
	download_dataset("kz-transformers/s-openbench-eval", "m_data")
	data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
	files_list = glob.glob("./m_data/model_data/external/*.json")
	print(f'FILES LIST: {files_list}')
	for file in files_list:
	with open(file) as f:
	print(f'trying to read external submit file: {file}')
	try:
	data = json.load(f)
	print(f'succeed to read: {file}, got {len(data)}')
	data_list.append(data)
	except Exception as e:
	pass # data was badly formatted, should not fail
	print("DATALIST: ", data_list)

	with open("genned.json", "w") as f:
	json.dump(data_list, f)


	API.upload_file(
	path_or_fileobj="genned.json",
	path_in_repo="leaderboard.json",
	repo_id="kz-transformers/kaz-llm-lb-metainfo",
	repo_type="dataset",
	)
	restart_space()


	# gen_judgement_file = os.path.join(HF_HOME, "src/gen/gen_judgement.py")
	# subprocess.run(["python3", gen_judgement_file], check=True)

	def update_board_():
	need_reset = os.environ.get(RESET_JUDGEMENT_ENV)
	logging.info("Updating the judgement: %s", need_reset)
	if need_reset != "1":
	# return
	pass
	os.environ[RESET_JUDGEMENT_ENV] = "0"

	# `shutil.rmtree("./m_data")` is a Python command that removes a directory and all its contents
	# recursively. In this specific context, it is used to delete the directory named "m_data" along
	# with all its files and subdirectories. This command helps in cleaning up the existing data in
	# the "m_data" directory before downloading new dataset files into it.
	# shutil.rmtree("./m_data")
	# shutil.rmtree("./data")
	download_dataset("kz-transformers/s-openbench-eval", "m_data")
	data_list = [{"model_dtype": "torch.float16", "model": "dummy-random-baseline", "ppl": 0, "mmlu_translated_kk": 0.22991508817766165, "kk_constitution_mc": 0.25120772946859904, "kk_dastur_mc": 0.24477611940298508, "kazakh_and_literature_unt_mc": 0.2090443686006826, "kk_geography_unt_mc": 0.2019790454016298, "kk_world_history_unt_mc": 0.1986970684039088, "kk_history_of_kazakhstan_unt_mc": 0.19417177914110428, "kk_english_unt_mc": 0.189804278561675, "kk_biology_unt_mc": 0.22330729166666666, "kk_human_society_rights_unt_mc": 0.242152466367713}, {"model_dtype": "torch.float16", "model": "gpt-4o-mini", "ppl": 0, "mmlu_translated_kk": 0.5623775310254735, "kk_constitution_mc": 0.79, "kk_dastur_mc": 0.755, "kazakh_and_literature_unt_mc": 0.4953071672354949, "kk_geography_unt_mc": 0.5675203725261933, "kk_world_history_unt_mc": 0.6091205211726385, "kk_history_of_kazakhstan_unt_mc": 0.47883435582822087, "kk_english_unt_mc": 0.6763768775603095, "kk_biology_unt_mc": 0.607421875, "kk_human_society_rights_unt_mc": 0.7309417040358744}, {"model_dtype": "api", "model": "gpt-4o", "ppl": 0, "mmlu_translated_kk": 0.7419986936642717, "kk_constitution_mc": 0.841, "kk_dastur_mc": 0.798, "kazakh_and_literature_unt_mc": 0.6785409556313993, "kk_geography_unt_mc": 0.629802095459837, "kk_world_history_unt_mc": 0.6783387622149837, "kk_history_of_kazakhstan_unt_mc": 0.6785276073619632, "kk_english_unt_mc": 0.7410104688211198, "kk_biology_unt_mc": 0.6979166666666666, "kk_human_society_rights_unt_mc": 0.7937219730941704}, {"model_dtype": "torch.float16", "model": "nova-pro-v1", "ppl": 0, "mmlu_translated_kk": 0.6792945787067276, "kk_constitution_mc": 0.7753623188405797, "kk_dastur_mc": 0.718407960199005, "kazakh_and_literature_unt_mc": 0.4656569965870307, "kk_geography_unt_mc": 0.5541327124563445, "kk_world_history_unt_mc": 0.6425081433224755, "kk_history_of_kazakhstan_unt_mc": 0.5, "kk_english_unt_mc": 0.6845698680018206, "kk_biology_unt_mc": 0.6197916666666666, "kk_human_society_rights_unt_mc": 0.7713004484304933}, {"model_dtype": "torch.float16", "model": "gemini-1.5-pro", "ppl": 0, "mmlu_translated_kk": 0.7380796864794252, "kk_constitution_mc": 0.8164251207729468, "kk_dastur_mc": 0.7383084577114428, "kazakh_and_literature_unt_mc": 0.5565273037542662, "kk_geography_unt_mc": 0.6065192083818394, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.5791411042944785, "kk_english_unt_mc": 0.7114246700045517, "kk_biology_unt_mc": 0.6673177083333334, "kk_human_society_rights_unt_mc": 0.7623318385650224}, {"model_dtype": "torch.float16", "model": "gemini-1.5-flash", "ppl": 0, "mmlu_translated_kk": 0.6335728282168517, "kk_constitution_mc": 0.748792270531401, "kk_dastur_mc": 0.7054726368159204, "kazakh_and_literature_unt_mc": 0.4761092150170648, "kk_geography_unt_mc": 0.5640279394644936, "kk_world_history_unt_mc": 0.5838762214983714, "kk_history_of_kazakhstan_unt_mc": 0.43374233128834355, "kk_english_unt_mc": 0.6681838871187984, "kk_biology_unt_mc": 0.6217447916666666, "kk_human_society_rights_unt_mc": 0.7040358744394619}, {"model_dtype": "torch.float16", "model": "claude-3-5-sonnet", "ppl": 0, "mmlu_translated_kk": 0.7335075114304376, "kk_constitution_mc": 0.8623188405797102, "kk_dastur_mc": 0.7950248756218905, "kazakh_and_literature_unt_mc": 0.6548634812286689, "kk_geography_unt_mc": 0.6431897555296857, "kk_world_history_unt_mc": 0.6669381107491856, "kk_history_of_kazakhstan_unt_mc": 0.6251533742331289, "kk_english_unt_mc": 0.7291761492944925, "kk_biology_unt_mc": 0.6686197916666666, "kk_human_society_rights_unt_mc": 0.8026905829596412}, {"model_dtype": "torch.float16", "model": "yandex-gpt", "ppl": 0, "mmlu_translated_kk": 0.39777922926192033, "kk_constitution_mc": 0.7028985507246377, "kk_dastur_mc": 0.6159203980099502, "kazakh_and_literature_unt_mc": 0.3914249146757679, "kk_geography_unt_mc": 0.4912689173457509, "kk_world_history_unt_mc": 0.5244299674267101, "kk_history_of_kazakhstan_unt_mc": 0.4030674846625767, "kk_english_unt_mc": 0.5844333181611289, "kk_biology_unt_mc": 0.4368489583333333, "kk_human_society_rights_unt_mc": 0.6995515695067265}]
	files_list = glob.glob("./m_data/model_data/external/*.json")
	print(f'FILES LIST: {files_list}')
	for file in files_list:
	with open(file) as f:
	print(f'trying to read external submit file: {file}')
	try:
	data = json.load(f)
	print(f'succeed to read: {file}, got {len(data)}')
	data_list.append(data)
	except Exception as e:
	pass # data was badly formatted, should not fail
	print("DATALIST: ", data_list)

	with open("genned.json", "w") as f:
	json.dump(data_list, f)


	API.upload_file(
	path_or_fileobj="genned.json",
	path_in_repo="leaderboard.json",
	repo_id="kz-transformers/kaz-llm-lb-metainfo",
	repo_type="dataset",
	)

	if __name__ == "__main__":
	os.environ[RESET_JUDGEMENT_ENV] = "1"

	scheduler = BackgroundScheduler()
	update_board_()
	scheduler.add_job(update_board, "interval", minutes=10)
	scheduler.start()

	demo_app = build_demo()
	demo_app.launch(debug=True,share=True)