MingGatsby commited on
Commit
2c00ad9
1 Parent(s): 1115e59

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+
3
+ # 過濾 SyntaxWarning 警告
4
+ warnings.filterwarnings("ignore", category=SyntaxWarning)
5
+
6
+ import gradio as gr
7
+ from moviepy.editor import VideoFileClip
8
+ import cv2
9
+ import base64
10
+ from openai import OpenAI
11
+ import os
12
+
13
+ def extract_frames(video_path, seconds_per_frame):
14
+ base64Frames = []
15
+ video = cv2.VideoCapture(video_path)
16
+ total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
17
+ fps = video.get(cv2.CAP_PROP_FPS)
18
+ frames_to_skip = int(fps * seconds_per_frame)
19
+ curr_frame = 0
20
+
21
+ while curr_frame < total_frames - 1:
22
+ video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
23
+ success, frame = video.read()
24
+ if not success:
25
+ break
26
+ _, buffer = cv2.imencode(".jpg", frame)
27
+ base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
28
+ curr_frame += frames_to_skip
29
+ video.release()
30
+ return base64Frames
31
+
32
+ def extract_audio(video_path):
33
+ base_video_path, _ = os.path.splitext(video_path)
34
+ audio_path = f"{base_video_path}.mp3"
35
+ clip = VideoFileClip(video_path)
36
+ clip.audio.write_audiofile(filename=audio_path, codec="mp3", bitrate="32k")
37
+ # Remove the line that closes the audio
38
+ clip.close()
39
+ return audio_path
40
+
41
+ def process_video(video_path, seconds_per_frame=2):
42
+ base64Frames = extract_frames(video_path, seconds_per_frame)
43
+ audio_path = extract_audio(video_path)
44
+ return base64Frames, audio_path
45
+
46
+ def get_summary_prompt(summary_detail):
47
+ if summary_detail == "簡短":
48
+ return "生成一個簡短的摘要,不超過50字。"
49
+ elif summary_detail == "中等":
50
+ return "生成一個中等長度的摘要,概述影片的主要內容。"
51
+ else: # 詳細
52
+ return "生成一個詳細的摘要,包括影片的主要內容和關鍵細節。"
53
+
54
+ def summarize_video(file_path, summary_detail):
55
+ api_key = os.getenv("OPENAI_API_KEY")
56
+ client = OpenAI(api_key=api_key)
57
+
58
+ # 抽取幀和音頻(每0.5秒一幀)
59
+ base64Frames, audio_path = process_video(file_path, seconds_per_frame=int(0.5))
60
+
61
+ # 使用Whisper進行音頻轉錄
62
+ transcription = client.audio.transcriptions.create(
63
+ model="whisper-1", file=open(audio_path, "rb")
64
+ )
65
+
66
+ # 根據用戶選擇的詳細程度設置提示
67
+ summary_prompt = get_summary_prompt(summary_detail)
68
+
69
+ # 使用GPT-4o生成摘要
70
+ response = client.chat.completions.create(
71
+ model="gpt-4o",
72
+ messages=[
73
+ {
74
+ "role": "system",
75
+ "content": f"""您是一名優秀的摘要專家,請根據提供的影片和其轉錄內容生成Markdown格式的摘要。{summary_prompt}""",
76
+ },
77
+ {
78
+ "role": "user",
79
+ "content": [
80
+ "以下是從影片中提取的幀畫面",
81
+ *map(
82
+ lambda x: {
83
+ "type": "image_url",
84
+ "image_url": {
85
+ "url": f"data:image/jpg;base64,{x}",
86
+ "detail": "low",
87
+ },
88
+ },
89
+ base64Frames,
90
+ ),
91
+ {
92
+ "type": "text",
93
+ "text": f"這是影片的轉錄內容: {transcription.text}",
94
+ },
95
+ ],
96
+ },
97
+ ],
98
+ temperature=0,
99
+ )
100
+
101
+ return response.choices[0].message.content
102
+
103
+ demo = gr.Interface(
104
+ fn=summarize_video,
105
+ inputs=[
106
+ gr.File(label="上傳影片 (mp4)"),
107
+ gr.Radio(["簡短", "中等", "詳細"], label="摘要詳細程度", value="中等")
108
+ ],
109
+ outputs="markdown",
110
+ title="影片摘要生成器",
111
+ description="上傳影片,將生成影片的摘要。",
112
+ flagging_dir="" # 這會移除 Flag 功能
113
+ )
114
+
115
+ if __name__ == "__main__":
116
+ demo.launch()