gpt-4o-video-summarizer-demo

Sleeping

App Files Files Community

MingGatsby commited on Sep 3

Commit

2c00ad9

•

1 Parent(s): 1115e59

Create app.py

Browse files

Files changed (1) hide show

app.py +116 -0

app.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import warnings
+# 過濾 SyntaxWarning 警告
+warnings.filterwarnings("ignore", category=SyntaxWarning)
+import gradio as gr
+from moviepy.editor import VideoFileClip
+import cv2
+import base64
+from openai import OpenAI
+import os
+def extract_frames(video_path, seconds_per_frame):
+    base64Frames = []
+    video = cv2.VideoCapture(video_path)
+    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+    fps = video.get(cv2.CAP_PROP_FPS)
+    frames_to_skip = int(fps * seconds_per_frame)
+    curr_frame = 0
+    while curr_frame < total_frames - 1:
+        video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame)
+        success, frame = video.read()
+        if not success:
+            break
+        _, buffer = cv2.imencode(".jpg", frame)
+        base64Frames.append(base64.b64encode(buffer).decode("utf-8"))
+        curr_frame += frames_to_skip
+    video.release()
+    return base64Frames
+def extract_audio(video_path):
+    base_video_path, _ = os.path.splitext(video_path)
+    audio_path = f"{base_video_path}.mp3"
+    clip = VideoFileClip(video_path)
+    clip.audio.write_audiofile(filename=audio_path, codec="mp3", bitrate="32k")
+    # Remove the line that closes the audio
+    clip.close()
+    return audio_path
+def process_video(video_path, seconds_per_frame=2):
+    base64Frames = extract_frames(video_path, seconds_per_frame)
+    audio_path = extract_audio(video_path)
+    return base64Frames, audio_path
+def get_summary_prompt(summary_detail):
+    if summary_detail == "簡短":
+        return "生成一個簡短的摘要，不超過50字。"
+    elif summary_detail == "中等":
+        return "生成一個中等長度的摘要，概述影片的主要內容。"
+    else:  # 詳細
+        return "生成一個詳細的摘要，包括影片的主要內容和關鍵細節。"
+def summarize_video(file_path, summary_detail):
+    api_key = os.getenv("OPENAI_API_KEY")
+    client = OpenAI(api_key=api_key)
+    # 抽取幀和音頻（每0.5秒一幀）
+    base64Frames, audio_path = process_video(file_path, seconds_per_frame=int(0.5))
+    # 使用Whisper進行音頻轉錄
+    transcription = client.audio.transcriptions.create(
+        model="whisper-1", file=open(audio_path, "rb")
+    )
+    # 根據用戶選擇的詳細程度設置提示
+    summary_prompt = get_summary_prompt(summary_detail)
+    # 使用GPT-4o生成摘要
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[
+            {
+                "role": "system",
+                "content": f"""您是一名優秀的摘要專家，請根據提供的影片和其轉錄內容生成Markdown格式的摘要。{summary_prompt}""",
+            },
+            {
+                "role": "user",
+                "content": [
+                    "以下是從影片中提取的幀畫面",
+                    *map(
+                        lambda x: {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpg;base64,{x}",
+                                "detail": "low",
+                            },
+                        },
+                        base64Frames,
+                    ),
+                    {
+                        "type": "text",
+                        "text": f"這是影片的轉錄內容: {transcription.text}",
+                    },
+                ],
+            },
+        ],
+        temperature=0,
+    )
+    return response.choices[0].message.content
+demo = gr.Interface(
+    fn=summarize_video,
+    inputs=[
+        gr.File(label="上傳影片 (mp4)"),
+        gr.Radio(["簡短", "中等", "詳細"], label="摘要詳細程度", value="中等")
+    ],
+    outputs="markdown",
+    title="影片摘要生成器",
+    description="上傳影片，將生成影片的摘要。",
+    flagging_dir=""  # 這會移除 Flag 功能
+)
+if __name__ == "__main__":
+    demo.launch()