youssef commited on
Commit
d40303a
·
1 Parent(s): 43dbecd
Files changed (4) hide show
  1. example.py +458 -0
  2. requirements.txt +8 -7
  3. src/app.py +94 -17
  4. src/video_processor/processor.py +0 -1
example.py ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import gradio as gr
4
+ import tempfile
5
+ import torch
6
+ import spaces
7
+ from pathlib import Path
8
+ from transformers import AutoProcessor, AutoModelForImageTextToText
9
+ import subprocess
10
+ import logging
11
+
12
+ logging.basicConfig(level=logging.INFO)
13
+ logger = logging.getLogger(__name__)
14
+
15
+ def load_examples(json_path: str) -> dict:
16
+ with open(json_path, 'r') as f:
17
+ return json.load(f)
18
+
19
+ def format_duration(seconds: int) -> str:
20
+ hours = seconds // 3600
21
+ minutes = (seconds % 3600) // 60
22
+ secs = seconds % 60
23
+ if hours > 0:
24
+ return f"{hours}:{minutes:02d}:{secs:02d}"
25
+ return f"{minutes}:{secs:02d}"
26
+
27
+ def get_video_duration_seconds(video_path: str) -> float:
28
+ """Use ffprobe to get video duration in seconds."""
29
+ cmd = [
30
+ "ffprobe",
31
+ "-v", "quiet",
32
+ "-print_format", "json",
33
+ "-show_format",
34
+ video_path
35
+ ]
36
+ result = subprocess.run(cmd, capture_output=True, text=True)
37
+ info = json.loads(result.stdout)
38
+ return float(info["format"]["duration"])
39
+
40
+ class VideoHighlightDetector:
41
+ def __init__(
42
+ self,
43
+ model_path: str,
44
+ device: str = "cuda",
45
+ batch_size: int = 8
46
+ ):
47
+ self.device = device
48
+ self.batch_size = batch_size
49
+
50
+ # Initialize model and processor
51
+ self.processor = AutoProcessor.from_pretrained(model_path)
52
+ self.model = AutoModelForImageTextToText.from_pretrained(
53
+ model_path,
54
+ torch_dtype=torch.bfloat16,
55
+ # _attn_implementation="flash_attention_2"
56
+ ).to(device)
57
+
58
+ def analyze_video_content(self, video_path: str) -> str:
59
+ """Analyze video content to determine its type and description."""
60
+ system_message = "You are a helpful assistant that can understand videos. Describe what type of video this is and what's happening in it."
61
+ messages = [
62
+ {
63
+ "role": "system",
64
+ "content": [{"type": "text", "text": system_message}]
65
+ },
66
+ {
67
+ "role": "user",
68
+ "content": [
69
+ {"type": "video", "path": video_path},
70
+ {"type": "text", "text": "What type of video is this and what's happening in it? Be specific about the content type and general activities you observe."}
71
+ ]
72
+ }
73
+ ]
74
+
75
+ inputs = self.processor.apply_chat_template(
76
+ messages,
77
+ add_generation_prompt=True,
78
+ tokenize=True,
79
+ return_dict=True,
80
+ return_tensors="pt"
81
+ ).to(self.device)
82
+
83
+ outputs = self.model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
84
+ return self.processor.decode(outputs[0], skip_special_tokens=True).lower().split("assistant: ")[1]
85
+
86
+ def determine_highlights(self, video_description: str, prompt_num: int = 1) -> str:
87
+ """Determine what constitutes highlights based on video description with different prompts."""
88
+ system_prompts = {
89
+ 1: "You are a highlight editor. List archetypal dramatic moments that would make compelling highlights if they appear in the video. Each moment should be specific enough to be recognizable but generic enough to potentially exist in other videos of this type.",
90
+ 2: "You are a helpful visual-language assistant that can understand videos and edit. You are tasked helping the user to create highlight reels for videos. Highlights should be rare and important events in the video in question."
91
+ }
92
+ user_prompts = {
93
+ 1: "List potential highlight moments to look for in this video:",
94
+ 2: "List dramatic moments that would make compelling highlights if they appear in the video. Each moment should be specific enough to be recognizable but generic enough to potentially exist in any video of this type:"
95
+ }
96
+
97
+
98
+ messages = [
99
+ {
100
+ "role": "system",
101
+ "content": [{"type": "text", "text": system_prompts[prompt_num]}]
102
+ },
103
+ {
104
+ "role": "user",
105
+ "content": [{"type": "text", "text": f"""Here is a description of a video:\n\n{video_description}\n\n{user_prompts[prompt_num]}"""}]
106
+ }
107
+ ]
108
+
109
+ print(f"Using prompt {prompt_num} for highlight detection")
110
+ print(messages)
111
+
112
+ inputs = self.processor.apply_chat_template(
113
+ messages,
114
+ add_generation_prompt=True,
115
+ tokenize=True,
116
+ return_dict=True,
117
+ return_tensors="pt"
118
+ ).to(self.device)
119
+
120
+ outputs = self.model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.7)
121
+ return self.processor.decode(outputs[0], skip_special_tokens=True).split("Assistant: ")[1]
122
+
123
+ def process_segment(self, video_path: str, highlight_types: str) -> bool:
124
+ """Process a video segment and determine if it contains highlights."""
125
+ messages = [
126
+ {
127
+ "role": "system",
128
+ "content": [{"type": "text", "text": "You are a video highlight analyzer. Your role is to identify moments that have high dramatic value, focusing on displays of skill, emotion, personality, or tension. Compare video segments against provided example highlights to find moments with similar emotional impact and visual interest, even if the specific actions differ."}]
129
+ },
130
+ {
131
+ "role": "user",
132
+ "content": [
133
+ {"type": "video", "path": video_path},
134
+ {"type": "text", "text": f"""Given these highlight examples:\n{highlight_types}\n\nDoes this video contain a moment that matches the core action of one of the highlights? Answer with:\n'yes' or 'no'\nIf yes, justify it"""}]
135
+ }
136
+ ]
137
+
138
+
139
+ print(messages)
140
+
141
+
142
+ inputs = self.processor.apply_chat_template(
143
+ messages,
144
+ add_generation_prompt=True,
145
+ tokenize=True,
146
+ return_dict=True,
147
+ return_tensors="pt"
148
+ ).to(self.device)
149
+
150
+ outputs = self.model.generate(**inputs, max_new_tokens=64, do_sample=False)
151
+ response = self.processor.decode(outputs[0], skip_special_tokens=True).lower().split("assistant: ")[1]
152
+ print(f"Segment response {response}")
153
+ return "yes" in response
154
+
155
+ def _concatenate_scenes(
156
+ self,
157
+ video_path: str,
158
+ scene_times: list,
159
+ output_path: str
160
+ ):
161
+ """Concatenate selected scenes into final video."""
162
+ if not scene_times:
163
+ logger.warning("No scenes to concatenate, skipping.")
164
+ return
165
+
166
+ filter_complex_parts = []
167
+ concat_inputs = []
168
+ for i, (start_sec, end_sec) in enumerate(scene_times):
169
+ filter_complex_parts.append(
170
+ f"[0:v]trim=start={start_sec}:end={end_sec},"
171
+ f"setpts=PTS-STARTPTS[v{i}];"
172
+ )
173
+ filter_complex_parts.append(
174
+ f"[0:a]atrim=start={start_sec}:end={end_sec},"
175
+ f"asetpts=PTS-STARTPTS[a{i}];"
176
+ )
177
+ concat_inputs.append(f"[v{i}][a{i}]")
178
+
179
+ concat_filter = f"{''.join(concat_inputs)}concat=n={len(scene_times)}:v=1:a=1[outv][outa]"
180
+ filter_complex = "".join(filter_complex_parts) + concat_filter
181
+
182
+ cmd = [
183
+ "ffmpeg",
184
+ "-y",
185
+ "-i", video_path,
186
+ "-filter_complex", filter_complex,
187
+ "-map", "[outv]",
188
+ "-map", "[outa]",
189
+ "-c:v", "libx264",
190
+ "-c:a", "aac",
191
+ output_path
192
+ ]
193
+
194
+ logger.info(f"Running ffmpeg command: {' '.join(cmd)}")
195
+ subprocess.run(cmd, check=True)
196
+
197
+ def create_ui(examples_path: str, model_path: str):
198
+ examples_data = load_examples(examples_path)
199
+
200
+ with gr.Blocks() as app:
201
+ gr.Markdown("# Video Highlight Generator")
202
+ gr.Markdown("Upload a video and get an automated highlight reel!")
203
+
204
+ with gr.Row():
205
+ gr.Markdown("## Example Results")
206
+
207
+ with gr.Row():
208
+ for example in examples_data["examples"]:
209
+ with gr.Column():
210
+ gr.Video(
211
+ value=example["original"]["url"],
212
+ label=f"Original ({format_duration(example['original']['duration_seconds'])})",
213
+ interactive=False
214
+ )
215
+ gr.Markdown(f"### {example['title']}")
216
+
217
+ with gr.Column():
218
+ gr.Video(
219
+ value=example["highlights"]["url"],
220
+ label=f"Highlights ({format_duration(example['highlights']['duration_seconds'])})",
221
+ interactive=False
222
+ )
223
+ with gr.Accordion("Chain of thought details", open=False):
224
+ gr.Markdown(f"### Summary:\n{example['analysis']['video_description']}")
225
+ gr.Markdown(f"### Highlights to search for:\n{example['analysis']['highlight_types']}")
226
+
227
+ gr.Markdown("## Try It Yourself!")
228
+ with gr.Row():
229
+ with gr.Column(scale=1):
230
+ input_video = gr.Video(
231
+ label="Upload your video (max 30 minutes)",
232
+ interactive=True
233
+ )
234
+ process_btn = gr.Button("Process Video", variant="primary")
235
+
236
+ with gr.Column(scale=1):
237
+ output_video = gr.Video(
238
+ label="Highlight Video",
239
+ visible=False,
240
+ interactive=False,
241
+ )
242
+
243
+ status = gr.Markdown()
244
+
245
+ analysis_accordion = gr.Accordion(
246
+ "Chain of thought details",
247
+ open=True,
248
+ visible=False
249
+ )
250
+
251
+ with analysis_accordion:
252
+ video_description = gr.Markdown("", elem_id="video_desc")
253
+ highlight_types = gr.Markdown("", elem_id="highlight_types")
254
+
255
+ @spaces.GPU
256
+ def on_process(video):
257
+ # Clear all components when starting new processing
258
+ yield [
259
+ "", # Clear status
260
+ "", # Clear video description
261
+ "", # Clear highlight types
262
+ gr.update(value=None, visible=False), # Clear video
263
+ gr.update(visible=False) # Hide accordion
264
+ ]
265
+
266
+ if not video:
267
+ yield [
268
+ "Please upload a video",
269
+ "",
270
+ "",
271
+ gr.update(visible=False),
272
+ gr.update(visible=False)
273
+ ]
274
+ return
275
+
276
+ try:
277
+ duration = get_video_duration_seconds(video)
278
+ if duration > 1800: # 30 minutes
279
+ yield [
280
+ "Video must be shorter than 30 minutes",
281
+ "",
282
+ "",
283
+ gr.update(visible=False),
284
+ gr.update(visible=False)
285
+ ]
286
+ return
287
+
288
+ yield [
289
+ "Initializing video highlight detector...",
290
+ "",
291
+ "",
292
+ gr.update(visible=False),
293
+ gr.update(visible=False)
294
+ ]
295
+
296
+ detector = VideoHighlightDetector(
297
+ model_path=model_path,
298
+ batch_size=16
299
+ )
300
+
301
+ yield [
302
+ "Analyzing video content...",
303
+ "",
304
+ "",
305
+ gr.update(visible=False),
306
+ gr.update(visible=True)
307
+ ]
308
+
309
+ video_desc = detector.analyze_video_content(video)
310
+ formatted_desc = f"### Summary:\n {video_desc[:500] + '...' if len(video_desc) > 500 else video_desc}"
311
+
312
+ yield [
313
+ "Determining highlight types (2 variations)...",
314
+ formatted_desc,
315
+ "",
316
+ gr.update(visible=False),
317
+ gr.update(visible=True)
318
+ ]
319
+
320
+ # Get two different sets of highlights
321
+ highlights1 = detector.determine_highlights(video_desc, prompt_num=1)
322
+ highlights2 = detector.determine_highlights(video_desc, prompt_num=2)
323
+ formatted_highlights = f"### Highlights to search for:\nSet 1:\n{highlights1[:500] + '...' if len(highlights1) > 500 else highlights1}\n\nSet 2:\n{highlights2[:500] + '...' if len(highlights2) > 500 else highlights2}"
324
+
325
+ # Split video into segments
326
+ temp_dir = "temp_segments"
327
+ os.makedirs(temp_dir, exist_ok=True)
328
+
329
+ segment_length = 10.0
330
+ duration = get_video_duration_seconds(video)
331
+ kept_segments1 = []
332
+ kept_segments2 = []
333
+ segments_processed = 0
334
+ total_segments = int(duration / segment_length)
335
+
336
+ for start_time in range(0, int(duration), int(segment_length)):
337
+ progress = int((segments_processed / total_segments) * 100)
338
+
339
+ yield [
340
+ f"Processing segments... {progress}% complete",
341
+ formatted_desc,
342
+ formatted_highlights,
343
+ gr.update(visible=False),
344
+ gr.update(visible=True)
345
+ ]
346
+
347
+ # Create segment
348
+ segment_path = f"{temp_dir}/segment_{start_time}.mp4"
349
+ end_time = min(start_time + segment_length, duration)
350
+
351
+ cmd = [
352
+ "ffmpeg",
353
+ "-y",
354
+ "-i", video,
355
+ "-ss", str(start_time),
356
+ "-t", str(segment_length),
357
+ "-c:v", "libx264",
358
+ "-preset", "ultrafast", # Use ultrafast preset for speed
359
+ "-pix_fmt", "yuv420p", # Ensure compatible pixel format
360
+ segment_path
361
+ ]
362
+ subprocess.run(cmd, check=True)
363
+
364
+ # Process segment with both highlight sets
365
+ if detector.process_segment(segment_path, highlights1):
366
+ print("KEEPING SEGMENT FOR SET 1")
367
+ kept_segments1.append((start_time, end_time))
368
+
369
+ if detector.process_segment(segment_path, highlights2):
370
+ print("KEEPING SEGMENT FOR SET 2")
371
+ kept_segments2.append((start_time, end_time))
372
+
373
+ # Clean up segment file
374
+ os.remove(segment_path)
375
+ segments_processed += 1
376
+
377
+
378
+ # Remove temp directory
379
+ os.rmdir(temp_dir)
380
+
381
+ # Calculate percentages of video kept for each highlight set
382
+ total_duration = duration
383
+ duration1 = sum(end - start for start, end in kept_segments1)
384
+ duration2 = sum(end - start for start, end in kept_segments2)
385
+
386
+ percent1 = (duration1 / total_duration) * 100
387
+ percent2 = (duration2 / total_duration) * 100
388
+
389
+ print(f"Highlight set 1: {percent1:.1f}% of video")
390
+ print(f"Highlight set 2: {percent2:.1f}% of video")
391
+
392
+ # Choose the set with lower percentage unless it's zero
393
+ final_segments = kept_segments2 if (0 < percent2 <= percent1 or percent1 == 0) else kept_segments1
394
+
395
+ # Create final video
396
+ if final_segments:
397
+ with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmp_file:
398
+ temp_output = tmp_file.name
399
+ detector._concatenate_scenes(video, final_segments, temp_output)
400
+
401
+ selected_set = "2" if final_segments == kept_segments2 else "1"
402
+ percent_used = percent2 if final_segments == kept_segments2 else percent1
403
+
404
+ completion_message = f"Processing complete! Used highlight set {selected_set} ({percent_used:.1f}% of video)"
405
+
406
+ yield [
407
+ completion_message,
408
+ formatted_desc,
409
+ formatted_highlights,
410
+ gr.update(value=temp_output, visible=True),
411
+ gr.update(visible=True)
412
+ ]
413
+ else:
414
+ yield [
415
+ "No highlights detected in the video with either set of criteria.",
416
+ formatted_desc,
417
+ formatted_highlights,
418
+ gr.update(visible=False),
419
+ gr.update(visible=True)
420
+ ]
421
+
422
+ except Exception as e:
423
+ logger.exception("Error processing video")
424
+ yield [
425
+ f"Error processing video: {str(e)}",
426
+ "",
427
+ "",
428
+ gr.update(visible=False),
429
+ gr.update(visible=False)
430
+ ]
431
+ finally:
432
+ # Clean up
433
+ torch.cuda.empty_cache()
434
+
435
+
436
+ process_btn.click(
437
+ on_process,
438
+ inputs=[input_video],
439
+ outputs=[
440
+ status,
441
+ video_description,
442
+ highlight_types,
443
+ output_video,
444
+ analysis_accordion
445
+ ],
446
+ queue=True,
447
+ )
448
+
449
+ return app
450
+
451
+ if __name__ == "__main__":
452
+ # subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
453
+
454
+ # Initialize CUDA
455
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
456
+
457
+ app = create_ui("video_spec.json", "HuggingFaceTB/SmolVLM2-2.2B-Instruct")
458
+ app.launch()
requirements.txt CHANGED
@@ -1,8 +1,9 @@
1
- torch==2.1.2
2
- torchvision==0.16.2
3
  git+https://github.com/huggingface/transformers.git
4
- num2words==0.5.13
5
- gradio==4.19.2
6
- av==10.0.0
7
- numpy==1.24.3
8
- Pillow==10.0.0
 
 
1
+ torch
2
+ torchvision
3
  git+https://github.com/huggingface/transformers.git
4
+ gradio
5
+ huggingface_hub
6
+ spaces
7
+ av
8
+ numpy
9
+ Pillow
src/app.py CHANGED
@@ -1,31 +1,108 @@
1
  import gradio as gr
2
  from video_processor.processor import VideoAnalyzer
3
  import logging
 
 
4
 
5
  # Configure logging
6
  logging.basicConfig(level=logging.INFO)
7
  logger = logging.getLogger(__name__)
8
 
9
- analyzer = VideoAnalyzer()
10
-
11
- def process_video(video_path):
12
- """Process video and return description"""
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  try:
14
- logger.info(f"Processing video: {video_path}")
15
- results = analyzer.process_video(video_path)
16
- return results[0]["description"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  except Exception as e:
18
- logger.error(f"Error processing video: {e}")
19
- return str(e)
 
 
 
 
 
 
 
20
 
21
  # Create Gradio interface
22
- demo = gr.Interface(
23
- fn=process_video,
24
- inputs=gr.Video(label="Upload Video"),
25
- outputs=gr.Textbox(label="Video Description"),
26
- title="SmolVLM Video Analyzer",
27
- description="Upload a video to get a detailed description of its contents."
28
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
  if __name__ == "__main__":
31
- demo.launch()
 
 
 
 
 
1
  import gradio as gr
2
  from video_processor.processor import VideoAnalyzer
3
  import logging
4
+ import torch
5
+ import spaces
6
 
7
  # Configure logging
8
  logging.basicConfig(level=logging.INFO)
9
  logger = logging.getLogger(__name__)
10
 
11
+ @spaces.GPU
12
+ def on_process(video):
13
+ # Clear all components when starting new processing
14
+ yield [
15
+ "", # Clear status
16
+ "", # Clear description
17
+ gr.update(visible=False) # Hide accordion
18
+ ]
19
+
20
+ if not video:
21
+ yield [
22
+ "Please upload a video",
23
+ "",
24
+ gr.update(visible=False)
25
+ ]
26
+ return
27
+
28
  try:
29
+ # Initialize analyzer
30
+ yield [
31
+ "Initializing video analyzer...",
32
+ "",
33
+ gr.update(visible=False)
34
+ ]
35
+
36
+ analyzer = VideoAnalyzer()
37
+
38
+ # Process video
39
+ yield [
40
+ "Analyzing video content...",
41
+ "",
42
+ gr.update(visible=True)
43
+ ]
44
+
45
+ logger.info(f"Processing video: {video}")
46
+ result = analyzer.process_video(video)
47
+ description = result[0]["description"]
48
+
49
+ # Format output
50
+ formatted_desc = f"### Analysis:\n{description}"
51
+
52
+ yield [
53
+ "Processing complete!",
54
+ formatted_desc,
55
+ gr.update(visible=True)
56
+ ]
57
+
58
  except Exception as e:
59
+ logger.exception("Error processing video")
60
+ yield [
61
+ f"Error processing video: {str(e)}",
62
+ "",
63
+ gr.update(visible=False)
64
+ ]
65
+ finally:
66
+ # Clean up
67
+ torch.cuda.empty_cache()
68
 
69
  # Create Gradio interface
70
+ with gr.Blocks() as demo:
71
+ gr.Markdown("# SmolVLM Video Analyzer")
72
+ gr.Markdown("Upload a video to get a detailed analysis of its content.")
73
+
74
+ with gr.Row():
75
+ with gr.Column(scale=1):
76
+ input_video = gr.Video(
77
+ label="Upload your video",
78
+ interactive=True
79
+ )
80
+ process_btn = gr.Button("Process Video", variant="primary")
81
+
82
+ with gr.Column(scale=1):
83
+ status = gr.Markdown()
84
+ analysis_accordion = gr.Accordion(
85
+ "Analysis Details",
86
+ open=True,
87
+ visible=False
88
+ )
89
+ with analysis_accordion:
90
+ video_description = gr.Markdown("")
91
+
92
+ process_btn.click(
93
+ on_process,
94
+ inputs=[input_video],
95
+ outputs=[
96
+ status,
97
+ video_description,
98
+ analysis_accordion
99
+ ],
100
+ queue=True,
101
+ )
102
 
103
  if __name__ == "__main__":
104
+ demo.launch(
105
+ server_name="0.0.0.0",
106
+ server_port=7860,
107
+ share=False
108
+ )
src/video_processor/processor.py CHANGED
@@ -31,7 +31,6 @@ class VideoAnalyzer:
31
  torch_dtype=torch.bfloat16
32
  )
33
 
34
-
35
  self.model = AutoModelForImageTextToText.from_pretrained(
36
  self.model_path,
37
  torch_dtype=torch.bfloat16,
 
31
  torch_dtype=torch.bfloat16
32
  )
33
 
 
34
  self.model = AutoModelForImageTextToText.from_pretrained(
35
  self.model_path,
36
  torch_dtype=torch.bfloat16,