Spaces:
Runtime error
Runtime error
feat: implement detect accident from video (not yet done)
Browse files- app.py +110 -11
- requirements.txt +4 -1
- video/README.md +0 -0
app.py
CHANGED
@@ -7,6 +7,10 @@ import cv2
|
|
7 |
import torch
|
8 |
import supervision as sv
|
9 |
import numpy as np
|
|
|
|
|
|
|
|
|
10 |
|
11 |
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
12 |
CHECKPOINT = 'facebook/detr-resnet-50'
|
@@ -14,6 +18,8 @@ CHECKPOINT_ACCIDENT_DETECTION = 'hilmantm/detr-traffic-accident-detection'
|
|
14 |
CONFIDENCE_TRESHOLD = 0.5
|
15 |
IOU_TRESHOLD = 0.8
|
16 |
NMS_TRESHOLD = 0.5
|
|
|
|
|
17 |
fdic = {
|
18 |
"family" : "Impact",
|
19 |
"style" : "italic",
|
@@ -26,9 +32,6 @@ image_processor = DetrImageProcessor.from_pretrained(CHECKPOINT)
|
|
26 |
model = DetrForObjectDetection.from_pretrained(CHECKPOINT_ACCIDENT_DETECTION)
|
27 |
model.to(DEVICE)
|
28 |
|
29 |
-
# use this function only for DETR Algorithm
|
30 |
-
# def detect_object(model, test_image_path, nms_treshold = 0.5):
|
31 |
-
|
32 |
def inference_from_image(pil_image):
|
33 |
|
34 |
box_annotator = sv.BoxAnnotator()
|
@@ -65,6 +68,101 @@ def inference_from_image(pil_image):
|
|
65 |
print("No object detected")
|
66 |
return None
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
with gr.Blocks() as demo:
|
69 |
gr.Markdown(
|
70 |
"""
|
@@ -76,21 +174,22 @@ with gr.Blocks() as demo:
|
|
76 |
with gr.Row():
|
77 |
with gr.Column():
|
78 |
input_image = gr.Image(label="Input image", type="pil")
|
79 |
-
inp = gr.Textbox(label="Image URL", placeholder="You have image from URL? Drop here")
|
80 |
with gr.Column():
|
81 |
output_image = gr.Image(label="Output image with predicted accident", type="pil")
|
82 |
|
83 |
detect_image_btn = gr.Button(value="Detect Accident")
|
84 |
detect_image_btn.click(fn=inference_from_image, inputs=[input_image], outputs=[output_image])
|
85 |
|
86 |
-
|
87 |
-
|
88 |
-
with gr.
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
92 |
|
93 |
-
|
|
|
94 |
|
95 |
|
96 |
demo.launch(debug=True)
|
|
|
7 |
import torch
|
8 |
import supervision as sv
|
9 |
import numpy as np
|
10 |
+
from pytube import YouTube
|
11 |
+
import uuid
|
12 |
+
import os
|
13 |
+
from moviepy.editor import VideoFileClip
|
14 |
|
15 |
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
|
16 |
CHECKPOINT = 'facebook/detr-resnet-50'
|
|
|
18 |
CONFIDENCE_TRESHOLD = 0.5
|
19 |
IOU_TRESHOLD = 0.8
|
20 |
NMS_TRESHOLD = 0.5
|
21 |
+
VIDEO_PATH = os.path.join("video")
|
22 |
+
VIDEO_INFRENCE = False
|
23 |
fdic = {
|
24 |
"family" : "Impact",
|
25 |
"style" : "italic",
|
|
|
32 |
model = DetrForObjectDetection.from_pretrained(CHECKPOINT_ACCIDENT_DETECTION)
|
33 |
model.to(DEVICE)
|
34 |
|
|
|
|
|
|
|
35 |
def inference_from_image(pil_image):
|
36 |
|
37 |
box_annotator = sv.BoxAnnotator()
|
|
|
68 |
print("No object detected")
|
69 |
return None
|
70 |
|
71 |
+
def convert_to_h264(file_path, output_file):
|
72 |
+
clip = VideoFileClip(file_path)
|
73 |
+
clip.write_videofile(output_file, codec="libx264")
|
74 |
+
clip.close()
|
75 |
+
|
76 |
+
def inference_from_video(url):
|
77 |
+
box_annotator = sv.BoxAnnotator()
|
78 |
+
|
79 |
+
# Define the YouTube video URL
|
80 |
+
video_url = url
|
81 |
+
|
82 |
+
# Create a YouTube object and get the video stream
|
83 |
+
yt = YouTube(video_url)
|
84 |
+
yt_stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
|
85 |
+
|
86 |
+
# Download the video to a file
|
87 |
+
unique_id = uuid.uuid4().hex[:6].upper()
|
88 |
+
video_folder = os.path.join(VIDEO_PATH, unique_id)
|
89 |
+
video_filename = os.path.join(video_folder, f"{unique_id}.mp4")
|
90 |
+
result_video_filename = os.path.join(video_folder, f"{unique_id}_result.mp4")
|
91 |
+
result_video_filename_temp = os.path.join(video_folder, f"{unique_id}_result_temp.mp4")
|
92 |
+
|
93 |
+
os.mkdir(video_folder)
|
94 |
+
yt_stream.download(filename=video_filename)
|
95 |
+
|
96 |
+
# Load the video
|
97 |
+
cap = cv2.VideoCapture(video_filename)
|
98 |
+
|
99 |
+
# Get the video frame dimensions
|
100 |
+
frame_width = int(cap.get(3))
|
101 |
+
frame_height = int(cap.get(4))
|
102 |
+
|
103 |
+
# Define the codec and create a VideoWriter object
|
104 |
+
out = cv2.VideoWriter(result_video_filename_temp, cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width, frame_height))
|
105 |
+
|
106 |
+
while True:
|
107 |
+
ret, image = cap.read()
|
108 |
+
if not ret:
|
109 |
+
break
|
110 |
+
|
111 |
+
# inference
|
112 |
+
with torch.no_grad():
|
113 |
+
|
114 |
+
# load image and predict
|
115 |
+
inputs = image_processor(images=image, return_tensors='pt').to(DEVICE)
|
116 |
+
outputs = model(**inputs)
|
117 |
+
|
118 |
+
# post-process
|
119 |
+
target_sizes = torch.tensor([image.shape[:2]]).to(DEVICE)
|
120 |
+
results = image_processor.post_process_object_detection(
|
121 |
+
outputs=outputs,
|
122 |
+
threshold=CONFIDENCE_TRESHOLD,
|
123 |
+
target_sizes=target_sizes
|
124 |
+
)[0]
|
125 |
+
|
126 |
+
print("transformer result", results)
|
127 |
+
|
128 |
+
if results['scores'].shape[0] != 0 or results['labels'].shape[0] != 0:
|
129 |
+
# annotate
|
130 |
+
detections = sv.Detections.from_transformers(transformers_results=results).with_nms(threshold=NMS_TRESHOLD)
|
131 |
+
labels = [
|
132 |
+
f"{model.config.id2label[class_id]} {confidence:0.2f}"
|
133 |
+
for _, confidence, class_id, _
|
134 |
+
in detections
|
135 |
+
]
|
136 |
+
frame = box_annotator.annotate(scene=image.copy(), detections=detections, labels=labels)
|
137 |
+
out.write(frame)
|
138 |
+
else:
|
139 |
+
out.write(image)
|
140 |
+
|
141 |
+
cap.release()
|
142 |
+
out.release()
|
143 |
+
|
144 |
+
convert_to_h264(result_video_filename_temp, result_video_filename)
|
145 |
+
|
146 |
+
# delete temp file
|
147 |
+
os.remove(result_video_filename_temp)
|
148 |
+
|
149 |
+
return result_video_filename
|
150 |
+
|
151 |
+
|
152 |
+
def testing(file):
|
153 |
+
unique_id = "39EE5A"
|
154 |
+
video_folder = os.path.join(VIDEO_PATH, unique_id)
|
155 |
+
video_filename = os.path.join(video_folder, f"{unique_id}.mp4")
|
156 |
+
result_video_filename = os.path.join(video_folder, f"{unique_id}_result.mp4")
|
157 |
+
result_video_filename_temp = os.path.join(video_folder, f"{unique_id}_result_temp.mp4")
|
158 |
+
|
159 |
+
convert_to_h264(result_video_filename_temp, result_video_filename)
|
160 |
+
|
161 |
+
os.remove(result_video_filename_temp)
|
162 |
+
|
163 |
+
return result_video_filename
|
164 |
+
|
165 |
+
|
166 |
with gr.Blocks() as demo:
|
167 |
gr.Markdown(
|
168 |
"""
|
|
|
174 |
with gr.Row():
|
175 |
with gr.Column():
|
176 |
input_image = gr.Image(label="Input image", type="pil")
|
|
|
177 |
with gr.Column():
|
178 |
output_image = gr.Image(label="Output image with predicted accident", type="pil")
|
179 |
|
180 |
detect_image_btn = gr.Button(value="Detect Accident")
|
181 |
detect_image_btn.click(fn=inference_from_image, inputs=[input_image], outputs=[output_image])
|
182 |
|
183 |
+
if VIDEO_INFRENCE:
|
184 |
+
gr.Markdown("## Detect Accident from Video")
|
185 |
+
with gr.Row():
|
186 |
+
with gr.Column():
|
187 |
+
inp = gr.Textbox(label="Youtube URL", placeholder="You should upload video to youtube and drop the link here")
|
188 |
+
with gr.Column():
|
189 |
+
output_video = gr.Video(label="Output image with predicted accident", format="mp4")
|
190 |
|
191 |
+
detect_video_btn = gr.Button(value="Detect Accident")
|
192 |
+
detect_video_btn.click(fn=inference_from_video, inputs=[inp], outputs=[output_video])
|
193 |
|
194 |
|
195 |
demo.launch(debug=True)
|
requirements.txt
CHANGED
@@ -4,4 +4,7 @@ supervision==0.3.0
|
|
4 |
pytorch-lightning
|
5 |
roboflow
|
6 |
timm
|
7 |
-
numpy
|
|
|
|
|
|
|
|
4 |
pytorch-lightning
|
5 |
roboflow
|
6 |
timm
|
7 |
+
numpy
|
8 |
+
pytube
|
9 |
+
ffmpeg
|
10 |
+
moviepy
|
video/README.md
ADDED
File without changes
|