Spaces:

fcakyon
/

timesformer

Runtime error

App Files Files Community

fcakyon commited on Dec 2, 2022

Commit

09295f0

1 Parent(s): 80ae000

initial upload

Browse files

Files changed (4) hide show

README.md +7 -9
app.py +98 -0
requirements.txt +7 -0
utils.py +37 -0

README.md CHANGED Viewed

@@ -1,13 +1,11 @@
 ---
-title: Timesformer
-emoji: 🐨
-colorFrom: gray
-colorTo: purple
 sdk: gradio
-sdk_version: 3.12.0
 app_file: app.py
 pinned: false
-license: openrail
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Zero Shot Video Classification
+emoji: 👀
+colorFrom: blue
+colorTo: pink
 sdk: gradio
+sdk_version: 2.9.1
 app_file: app.py
 pinned: false
+license: apache-2.0
+---

app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import os
+import torch
+import gradio as gr
+from video_transformers import VideoModel
+from utils import (
+    convert_frames_to_gif,
+    download_youtube_video,
+    sample_frames_from_video_file,
+)
+video_model = VideoModel.from_transformers("facebook/timesformer-base-finetuned-k400")
+examples = [
+    ["https://www.youtube.com/watch?v=huAJ9dC5lmI"],
+    ["https://www.youtube.com/watch?v=wvcWt6u5HTg"],
+    ["https://www.youtube.com/watch?v=-3kZSi5qjRM"],
+    ["https://www.youtube.com/watch?v=-6usjfP8hys"],
+    ["https://www.youtube.com/watch?v=B8OdMwVwyXc"],
+    ["https://www.youtube.com/watch?v=B9ea7YyCP6E"],
+    ["https://www.youtube.com/watch?v=BBkpaeJBKmk"],
+    ["https://www.youtube.com/watch?v=BBqU8Apee_g"],
+    ["https://www.youtube.com/watch?v=BDHub0gBGtc"],
+    ["https://www.youtube.com/watch?v=I7cwq6_4QtM"],
+    ["https://www.youtube.com/watch?v=Z0mJDXpNhYA"],
+    ["https://www.youtube.com/watch?v=QkQQjFGnZlg"],
+    ["https://www.youtube.com/watch?v=IQaoRUQif14"],
+]
+def predict(youtube_url):
+    video_path = download_youtube_video(youtube_url)
+    frames = sample_frames_from_video_file(video_path, num_frames=16)
+    gif_path = convert_frames_to_gif(frames)
+    result = video_model.predict(video_or_folder_path=video_path)
+    os.remove(video_path)
+    return result["predictions"], gif_path
+app = gr.Blocks()
+with app:
+    gr.Markdown("# **<p align='center'>Video Classification with Timesformer</p>**")
+    gr.Markdown(
+        """
+        <p style='text-align: center'>
+        Timesformer is a video model that uses a Transformer architecture to process video frames.
+        <br>It is released by Facebook AI Research in ICML 2021.
+        <br>This version is trained on Kinetics-400 dataset and can classify videos into 400 classes.
+        </p>
+        """
+    )
+    gr.Markdown(
+        """
+        <p style='text-align: center'>
+        Follow me for more!
+        <br> <a href='https://twitter.com/fcakyon' target='_blank'>twitter</a> | <a href='https://github.com/fcakyon' target='_blank'>github</a> | <a href='https://www.linkedin.com/in/fcakyon/' target='_blank'>linkedin</a> | <a href='https://fcakyon.medium.com/' target='_blank'>medium</a>
+        </p>
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("Provide a Youtube video URL.")
+            youtube_url = gr.Textbox(label="Youtube URL:", show_label=True)
+            predict_btn = gr.Button(value="Predict")
+        with gr.Column():
+            video_gif = gr.Image(
+                label="Input Clip",
+                show_label=True,
+            )
+        with gr.Column():
+            predictions = gr.Label(
+                label="Predictions:", show_label=True, num_top_classes=5
+            )
+    gr.Markdown("**Examples:**")
+    gr.Examples(
+        examples,
+        youtube_url,
+        [predictions, video_gif],
+        fn=predict,
+        cache_examples=True,
+    )
+    predict_btn.click(predict, inputs=youtube_url, outputs=[predictions, video_gif])
+    gr.Markdown(
+        """
+        \n Demo created by: <a href=\"https://github.com/fcakyon\">fcakyon</a>
+        <br> Based on this <a href=\"https://huggingface.co/facebook/timesformer-base-finetuned-k400\">HuggingFace model</a>
+        """
+    )
+app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+torch
+decord
+pytube
+imageio
+transformers @ git+https://github.com/huggingface/transformers.git@c54646b13d468b7a21fd6ee18f943ad69daab48e
+video_transformers == 0.0.8

utils.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from pytube import YouTube
+import numpy as np
+from decord import VideoReader, cpu
+import imageio
+def download_youtube_video(url: str):
+    yt = YouTube(url)
+    streams = yt.streams.filter(file_extension="mp4")
+    file_path = streams[0].download()
+    return file_path
+def sample_frame_indices(clip_len, frame_sample_rate):
+    converted_len = int(clip_len * frame_sample_rate)
+    start_idx = 0
+    end_idx = converted_len
+    indices = np.linspace(start_idx, end_idx, num=clip_len)
+    indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
+    return indices
+def sample_frames_from_video_file(file_path: str, num_frames: int = 16):
+    videoreader = VideoReader(file_path, num_threads=1, ctx=cpu(0))
+    # sample frames
+    videoreader.seek(0)
+    indices = sample_frame_indices(clip_len=num_frames, frame_sample_rate=4)
+    frames = videoreader.get_batch(indices).asnumpy()
+    return frames
+def convert_frames_to_gif(frames):
+    converted_frames = frames.astype(np.uint8)
+    imageio.mimsave("frames.gif", converted_frames, fps=8)
+    return "frames.gif"