Spaces:

longvideobench
/

LongVideoBench

Running

LongVideoBench / app.py

Teo Wu

update

3852af6 8 months ago

3.14 kB

	import gradio as gr
	import pandas as pd

	block = gr.Blocks(title="LongVideoBench Leaderboard", theme='gradio/soft')

	# Function to sort data and filter columns based on checkboxes
	def sort_data(key, show_duration, show_category):
	data = pd.read_csv("result.csv")

	duration_columns = ['8s-15s', '15s-60s', '180s-600s', '900s-3600s']
	category_columns = ['S2E', 'S2O', 'S2A', 'E2O', 'SSS', 'SOS', 'SAA', 'T3E', 'T3O', 'TOS', 'TAA']

	columns_to_show = ['Model', 'Test Total']

	if show_duration:
	columns_to_show += duration_columns
	if show_category:
	columns_to_show += category_columns

	columns_to_show += ['Val Total', 'LMM Type', 'Interleaved?', "#Max Frames"]

	if key in data.columns:
	df_sorted = data.sort_values(by=key, ascending=False)
	else:
	df_sorted = data.sort_values(by='Test Total', ascending=False)

	return df_sorted[columns_to_show]

	with block:

	gr.HTML("<link rel='stylesheet' type='text/css' href='style.css'>")

	with gr.Row():
	gr.Markdown("""
	<div style='text-align: center;'>
	<h1>LongVideoBench Leaderboard</h1>
	Website: <a href="https://longvideobench.github.io" target="_blank">longvideobench.github.io</a>
	</div>
	""")

	with gr.Tab("Existing Results"):
	with gr.Row():
	show_duration = gr.Checkbox(label="Show Test Set Accuracy by Duration Groups", value=False)
	show_category = gr.Checkbox(label="Show Test Set Accuracy by Question Categories", value=False)

	key_input = gr.Textbox(label="Rank LMMs by column:", placeholder="Test Total (default)")


	data_frame = gr.DataFrame(sort_data('Test Total', show_duration=False, show_category=False))

	def update_data_frame(key, show_duration, show_category):
	return sort_data(key, show_duration, show_category)

	key_input.change(update_data_frame, inputs=[key_input, show_duration, show_category], outputs=data_frame)
	show_duration.change(update_data_frame, inputs=[key_input, show_duration, show_category], outputs=data_frame)
	show_category.change(update_data_frame, inputs=[key_input, show_duration, show_category], outputs=data_frame)

	gr.Markdown("Models are evaluated using their optimal #max frames, capped at 256 frames.")

	with gr.Tab("Submit!"):
	gr.Markdown(
	'''The answer of validation set of LongVideoBench is public now. Please see our [released dataset](https://huggingface.co/datasets/longvideobench/LongVideoBench) for more information.

	For test set, please prepare your output as follows:
	```python
	{VIDEO_ID_0: "A", VIDEO_ID_1: "D", ...} # Please make sure your submission only contains the letter of model's choice, or starts with the letter of model's choice.
	```
	and submit to us as a JSON file.

	Please prepare an email to `[email protected]` titled [LongVideoBench-Submission-YOURNAME] to submit and obtain your results.

	_We will launch an automatic submission server soon._'''
	)

	block.launch()