youssef
commited on
Commit
·
c9f0527
1
Parent(s):
3ad5e22
fix dockerfile
Browse files- Dockerfile +1 -0
- src/video_processor/processor.py +31 -23
Dockerfile
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
|
|
|
2 |
ENV DEBIAN_FRONTEND=noninteractive
|
3 |
RUN apt-get update && \
|
4 |
apt-get upgrade -y && \
|
|
|
1 |
FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
|
2 |
+
|
3 |
ENV DEBIAN_FRONTEND=noninteractive
|
4 |
RUN apt-get update && \
|
5 |
apt-get upgrade -y && \
|
src/video_processor/processor.py
CHANGED
@@ -65,34 +65,42 @@ class VideoAnalyzer:
|
|
65 |
|
66 |
def analyze_segment(self, video_path: str, start_time: float) -> str:
|
67 |
"""Analyze a single video segment."""
|
|
|
68 |
messages = [
|
|
|
|
|
|
|
69 |
{
|
70 |
-
"
|
71 |
-
"
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
{
|
81 |
-
"
|
82 |
-
"
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
- What is the environment and weather like?
|
87 |
-
- What objects or items are visible?
|
88 |
-
- Is there any text visible on screen?
|
89 |
-
- What actions or events are occurring?
|
90 |
-
- Note any significant visual details
|
91 |
-
Be specific about all visual elements to enable searching later."""}
|
92 |
-
]
|
93 |
}
|
94 |
]
|
95 |
-
|
|
|
|
|
|
|
96 |
inputs = self.processor.apply_chat_template(
|
97 |
messages,
|
98 |
add_generation_prompt=True,
|
|
|
65 |
|
66 |
def analyze_segment(self, video_path: str, start_time: float) -> str:
|
67 |
"""Analyze a single video segment."""
|
68 |
+
|
69 |
messages = [
|
70 |
+
{
|
71 |
+
"role": "system",
|
72 |
+
"content": [
|
73 |
{
|
74 |
+
"type": "text",
|
75 |
+
"text": (
|
76 |
+
"You are an AI specialized in video content analysis. "
|
77 |
+
"Your task is to watch the provided video segment and generate a detailed, structured description focusing on the following elements:\n"
|
78 |
+
"1. **People and Their Actions:** Identify all individuals, their appearances, and describe their activities or interactions.\n"
|
79 |
+
"2. **Environment and Setting:** Describe the location, time of day, weather conditions, and any notable background details.\n"
|
80 |
+
"3. **Objects and Their Positions:** List prominent objects, their attributes, and spatial relationships within the scene.\n"
|
81 |
+
"4. **On-Screen Text:** Transcribe any visible text, including signs, labels, or subtitles, and specify their locations.\n"
|
82 |
+
"5. **Key Events and Timing:** Outline significant events, actions, or changes, along with their timestamps.\n\n"
|
83 |
+
"Provide the information in a clear and concise manner, using bullet points or numbered lists where appropriate."
|
84 |
+
)
|
85 |
+
}
|
86 |
+
]
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"role": "user",
|
90 |
+
"content": [
|
91 |
+
{"type": "video", "path": video_path},
|
92 |
{
|
93 |
+
"type": "text",
|
94 |
+
"text": (
|
95 |
+
"Please analyze the attached video segment and provide a structured description as per the guidelines above. "
|
96 |
+
"If certain elements are not present in the video, you may omit them from your response."
|
97 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
}
|
99 |
]
|
100 |
+
}
|
101 |
+
]
|
102 |
+
|
103 |
+
|
104 |
inputs = self.processor.apply_chat_template(
|
105 |
messages,
|
106 |
add_generation_prompt=True,
|