Spaces:
Runtime error
Runtime error
File size: 8,191 Bytes
c95d2d4 e033be6 2fcf5a9 de421b2 3cb92b9 de421b2 c95d2d4 3ce275e c95d2d4 aa4e5f7 c95d2d4 9225f07 aa4e5f7 9225f07 c95d2d4 3ce275e c95d2d4 2fcf5a9 c95d2d4 3cb92b9 0db30ce 3cb92b9 c95d2d4 3cb92b9 c95d2d4 e033be6 c95d2d4 e033be6 c95d2d4 e033be6 c95d2d4 e033be6 c95d2d4 3cb92b9 c95d2d4 3cb92b9 c95d2d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
import os
import gradio as gr
import pandas as pd
BASELINE = f'<a target="_blank" href=https://github.com/showlab/loveu-tgve-2023 style="color: blue; text-decoration: underline;text-decoration-style: dotted;">Tune-A-Video (Baseline)</a>'
COLS = ["Method", "Human Eval (Text Alignment) ⬆️", "Human Eval (Structure) ⬆️", "Human Eval (Quality) ⬆️", "Human Eval (Avg.) ⬆️",
"References"]
COLS_AUTO = ["Method", "CLIPScore (Text Alignment) ⬆️", "CLIPScore (Frame Consistency) ⬆️", "PickScore ⬆️", "References"]
TYPES = ["markdown", "number", "number", "number", "number", "markdown"]
TYPES_AUTO = ["markdown", "number", "number", "number", "markdown"]
def get_leaderboard():
from result import submission_results
all_data = []
baseline_0 = {
"Method": '**Tune-A-Video**',
"CLIPScore (Frame Consistency) ⬆️":92.40,
"CLIPScore (Text Alignment) ⬆️":27.12,
"PickScore ⬆️":20.36,
"References": ','.join([f'<a target="_blank" href="https://arxiv.org/abs/2212.11565" style="color: blue">Paper</a>',
f'<a target="_blank" href="https://github.com/showlab/Tune-A-Video" style="color: blue">Code</a>',
f'<a target="_blank" href="https://tuneavideo.github.io/" style="color: blue">Website</a>',
f'<a target="_blank" href="https://huggingface.co/spaces/Tune-A-Video-library/Tune-A-Video-inference" style="color: blue">Demo</a>'])
}
baseline_1 = {
"Method": 'VideoCrafter',
"CLIPScore (Frame Consistency) ⬆️":88.51,
"CLIPScore (Text Alignment) ⬆️":25.55,
"PickScore ⬆️":19.17,
"References": ','.join([f'<a target="_blank" href="https://github.com/VideoCrafter/VideoCrafter" style="color: blue">Code</a>',
f'<a target="_blank" href="https://huggingface.co/spaces/VideoCrafter/VideoCrafter" style="color: blue">Demo</a>'])
}
all_data += submission_results
dataframe = pd.DataFrame.from_records(all_data)
dataframe = dataframe.sort_values(by=['Human Eval (Avg.) ⬆️'], ascending=False)
print(dataframe)
dataframe_human = dataframe[COLS]
all_data += [baseline_0, baseline_1]
dataframe = pd.DataFrame.from_records(all_data)
dataframe_auto = dataframe[COLS_AUTO]
dataframe_auto = dataframe_auto.sort_values(by=['CLIPScore (Text Alignment) ⬆️'], ascending=False)
return dataframe_human, dataframe_auto
leaderboard, leaderboard_auto = get_leaderboard()
def refresh():
return get_leaderboard()
def load_edited_video(source_video, *args):
result = source_video.split('/')[-1].split('.mp4')[0] + '-edit.mp4'
return os.path.join(os.path.dirname(__file__), f"files/{result}")
block = gr.Blocks()
with block:
with gr.Tab("Leaderboard"):
with gr.Row():
gr.Markdown(f"""
# 🤗 LOVEU-TGVE @ CVPR 2023 Leaderboard
<font size="4">
<b>Welcome to the <a href="https://sites.google.com/view/loveucvpr23/track4" target="_blank">Text-Guided Video Editing (TGVE)</a> competition leaderboard of <a href="https://sites.google.com/view/loveucvpr23/home" target="_blank">LOVEU Workshop @ CVPR 2023</a>!</b>
Leveraging AI for video editing has the potential to unleash creativity for artists across all skill levels. The rapidly-advancing field of Text-Guided Video Editing (TGVE) is here to address this challenge. Recent works in this field include <a href="https://tuneavideo.github.io/" target="_blank">Tune-A-Video</a>, <a href="https://research.runwayml.com/gen2" target="_blank">Gen-2</a>, and <a href="https://dreamix-video-editing.github.io/" target="_blank">Dreamix</a>.
In this competition track, we provide a standard set of videos and prompts. As a researcher, you will develop a model that takes a video and a prompt for how to edit it, and your model will produce an edited video. For instance, you might be given a video of “a man is surfing inside the barrel of a wave,” and your model will edit the video to “a man is surfing on a wave made of aurora borealis.”
To participate in the contest, you will submit the videos generated by your model. As you develop your model, you may want to visually evaluate your results and use automated metrics such as the <a href="https://arxiv.org/abs/2104.08718" target="_blank">CLIPScore</a> and <a href="https://arxiv.org/abs/2305.01569" target="_blank">PickScore</a> to track your progress:
- CLIPScore (Frame Consistency) - the average cosine similarity between all pairs of CLIP image embeddings computed on all frames of output videos.
- CLIPScore (Text Alignment) - the average CLIP score between all frames of output videos and corresponding edited prompts.
- PickScore - the average PickScore between all frames of output videos.
After all submissions are uploaded, we will run a human-evaluation of all submitted videos. Specifically, we will have human labelers compare all submitted videos to the baseline videos that were edited with the Tune-A-Video model. Labelers will evaluate videos on the following criteria:
- Text alignment: Which video better matches the caption?
- Structure: Which video better preserves the structure of the input video?
- Quality: Aesthetically, which video is better?
We will choose a winner and a runner-up based on the human evaluation results.
</font>
The **bold** method name indicates that the implementation is **official** (by the author / developer of the original method).""")
with gr.Row():
leaderboard_table = gr.components.Dataframe(value=leaderboard, headers=COLS,
datatype=TYPES, max_rows=10)
with gr.Accordion("Expand for automated metrics results", open=False):
with gr.Row():
leaderboard_table_auto = gr.components.Dataframe(value=leaderboard_auto, headers=COLS_AUTO,
datatype=TYPES_AUTO, max_rows=10)
with gr.Row():
refresh_button = gr.Button("Refresh")
refresh_button.click(refresh, inputs=[], outputs=[leaderboard_table, leaderboard_table_auto])
block.load(refresh, inputs=[], outputs=[leaderboard_table, leaderboard_table_auto])
with gr.Tab("Baseline Demo"):
with gr.Row():
gr.Markdown(f"""Some examples generated by {BASELINE} are shown below.""")
with gr.Row():
with gr.Column():
source_video = gr.Video(type="file", label='Source Video', format="mp4", interactive=True)
source_prompt = gr.Textbox(label='Source Prompt',
# info='A good prompt describes each frame and most objects in video. Especially, it has the object or attribute that we want to edit or preserve.',
max_lines=2,
placeholder='Example: "A cat in the grass in the sun."',
# value='A cat in the grass in the sun.'
)
with gr.Column():
result = gr.Video(type="file", label='Edited Video', format="mp4", interactive=True)
editing_prompt = gr.Textbox(label='Editing Prompt',
# info='A reasonable composition of video may achieve better results(e.g., "sunflower" video with "Van Gogh" prompt is better than "sunflower" with "Monet")',
max_lines=2,
placeholder='Example: "A dog in the grass in the sun."',
# value='A dog in the grass in the sun.'
)
with gr.Row():
from example import examples
gr.Examples(examples=examples,
inputs=[source_video, source_prompt, editing_prompt],
outputs=result,
fn=load_edited_video,
cache_examples=True,
)
block.launch() |