import os import cv2 import torch import numpy as np import gradio as gr def parse_video(video_file): vs = cv2.VideoCapture(video_file) frames = [] while True: (gotit, frame) = vs.read() if frame is not None: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frames.append(frame) if not gotit: break return np.stack(frames) @spaces.GPU def cotracker_demo( input_video, grid_size: int = 10, tracks_leave_trace: bool = False, ): load_video = parse_video(input_video) load_video = torch.from_numpy(load_video).permute(0, 3, 1, 2)[None].float() import time def current_milli_time(): return round(time.time() * 1000) filename = str(current_milli_time()) return os.path.join( os.path.dirname(__file__), "results", f"{filename}.mp4" ) app = gr.Interface( title="🎨 CoTracker: It is Better to Track Together", description="
\

Welcome to CoTracker! This space demonstrates point (pixel) tracking in videos. \ Points are sampled on a regular grid and are tracked jointly.

\

To get started, simply upload your .mp4 video in landscape orientation or click on one of the example videos to load them. The shorter the video, the faster the processing. We recommend submitting short videos of length 2-7 seconds.

\ \

For more details, check out our GitHub Repo

\
", fn=cotracker_demo, inputs=[ gr.Video(type="file", label="Input video", interactive=True), gr.Slider(minimum=10, maximum=80, step=1, value=10, label="Number of tracks"), ], outputs=gr.Video(label="Video with predicted tracks"), cache_examples=True, allow_flagging=False, ) app.queue(max_size=20, concurrency_count=1).launch(debug=True)