mfarre HF staff commited on
Commit
d7863e4
·
1 Parent(s): c587eed

prompts update + transformers update

Browse files
Files changed (1) hide show
  1. app.py +12 -8
app.py CHANGED
@@ -9,7 +9,7 @@ import subprocess
9
  import logging
10
  import xml.etree.ElementTree as ET
11
  from xml.dom import minidom
12
- from transformers import AutoProcessor, AutoModelForVision2Seq
13
 
14
 
15
  logging.basicConfig(level=logging.INFO)
@@ -49,7 +49,7 @@ class VideoHighlightDetector:
49
 
50
  # Initialize model and processor
51
  self.processor = AutoProcessor.from_pretrained(model_path)
52
- self.model = AutoModelForVision2Seq.from_pretrained(
53
  model_path,
54
  torch_dtype=torch.bfloat16
55
  ).to(device)
@@ -86,13 +86,13 @@ class VideoHighlightDetector:
86
  messages = [
87
  {
88
  "role": "system",
89
- "content": [{"type": "text", "text": "Describe what is happening in this specific video segment in a brief, concise way."}]
90
  },
91
  {
92
  "role": "user",
93
  "content": [
94
  {"type": "video", "path": video_path},
95
- {"type": "text", "text": "What is happening in this segment? Provide a very brief and concise description."}
96
  ]
97
  }
98
  ]
@@ -109,14 +109,15 @@ class VideoHighlightDetector:
109
  return self.processor.decode(outputs[0], skip_special_tokens=True).split("Assistant: ")[1]
110
 
111
  def determine_highlights(self, video_description: str) -> str:
 
112
  messages = [
113
  {
114
  "role": "system",
115
- "content": [{"type": "text", "text": "You are a professional video editor specializing in creating viral highlight reels."}]
116
  },
117
  {
118
  "role": "user",
119
- "content": [{"type": "text", "text": f"Based on this description, list which segments should be included in highlights: {video_description}"}]
120
  }
121
  ]
122
 
@@ -133,12 +134,15 @@ class VideoHighlightDetector:
133
 
134
  def process_segment(self, video_path: str, highlight_types: str) -> bool:
135
  messages = [
 
 
 
 
136
  {
137
  "role": "user",
138
  "content": [
139
  {"type": "video", "path": video_path},
140
- {"type": "text", "text": f"Do you see any of these elements in the video: {highlight_types}? Answer yes or no."}
141
- ]
142
  }
143
  ]
144
 
 
9
  import logging
10
  import xml.etree.ElementTree as ET
11
  from xml.dom import minidom
12
+ from transformers import AutoProcessor, AutoModelForImageTextToText
13
 
14
 
15
  logging.basicConfig(level=logging.INFO)
 
49
 
50
  # Initialize model and processor
51
  self.processor = AutoProcessor.from_pretrained(model_path)
52
+ self.model = AutoModelForImageTextToText.from_pretrained(
53
  model_path,
54
  torch_dtype=torch.bfloat16
55
  ).to(device)
 
86
  messages = [
87
  {
88
  "role": "system",
89
+ "content": [{"type": "text", "text": "Focus only on describing the key dramatic action or notable event occurring in this video segment. Skip general context or scene-setting details unless they are crucial to understanding the main action."}]
90
  },
91
  {
92
  "role": "user",
93
  "content": [
94
  {"type": "video", "path": video_path},
95
+ {"type": "text", "text": "WWhat is the main action or notable event happening in this segment? Describe it in one brief sentence."}
96
  ]
97
  }
98
  ]
 
109
  return self.processor.decode(outputs[0], skip_special_tokens=True).split("Assistant: ")[1]
110
 
111
  def determine_highlights(self, video_description: str) -> str:
112
+ """Determine what constitutes highlights based on video description."""
113
  messages = [
114
  {
115
  "role": "system",
116
+ "content": [{"type": "text", "text": "You are a highlight editor. List archetypal dramatic moments that would make compelling highlights if they appear in the video. Each moment should be specific enough to be recognizable but generic enough to potentially exist in any video of this type."}]
117
  },
118
  {
119
  "role": "user",
120
+ "content": [{"type": "text", "text": f"""Here is a description of a video:\n\n{video_description}\n\nList potential highlight moments to look for in this video:"""}]
121
  }
122
  ]
123
 
 
134
 
135
  def process_segment(self, video_path: str, highlight_types: str) -> bool:
136
  messages = [
137
+ {
138
+ "role": "system",
139
+ "content": [{"type": "text", "text": "You are a video highlight analyzer. Your role is to identify moments that have high dramatic value, focusing on displays of skill, emotion, personality, or tension. Compare video segments against provided example highlights to find moments with similar emotional impact and visual interest, even if the specific actions differ."}]
140
+ },
141
  {
142
  "role": "user",
143
  "content": [
144
  {"type": "video", "path": video_path},
145
+ {"type": "text", "text": f"""Given these highlight examples:\n{highlight_types}\n\nDoes this video contain a moment that matches the core action of one of the highlights? Answer with:\n'yes' or 'no'\nIf yes, justify it"""}]
 
146
  }
147
  ]
148