import warnings # 過濾 SyntaxWarning 警告 warnings.filterwarnings("ignore", category=SyntaxWarning) import gradio as gr from moviepy.editor import VideoFileClip import cv2 import base64 from openai import OpenAI import os def extract_frames(video_path, seconds_per_frame): base64Frames = [] video = cv2.VideoCapture(video_path) total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT)) fps = video.get(cv2.CAP_PROP_FPS) frames_to_skip = int(fps * seconds_per_frame) curr_frame = 0 while curr_frame < total_frames - 1: video.set(cv2.CAP_PROP_POS_FRAMES, curr_frame) success, frame = video.read() if not success: break _, buffer = cv2.imencode(".jpg", frame) base64Frames.append(base64.b64encode(buffer).decode("utf-8")) curr_frame += frames_to_skip video.release() return base64Frames def extract_audio(video_path): base_video_path, _ = os.path.splitext(video_path) audio_path = f"{base_video_path}.mp3" clip = VideoFileClip(video_path) clip.audio.write_audiofile(audio_path, bitrate="32k") clip.audio.close() clip.close() return audio_path def process_video(video_path, seconds_per_frame): base64Frames = extract_frames(video_path, seconds_per_frame) audio_path = extract_audio(video_path) return base64Frames, audio_path def get_summary_prompt(summary_detail): if summary_detail == "簡短": return "生成一個簡短的摘要,不超過50字。" elif summary_detail == "中等": return "生成一個中等長度的摘要,概述影片的主要內容。" else: # 詳細 return "生成一個詳細的摘要,包括影片的主要內容和關鍵細節。" def summarize_video(file_path, summary_detail): api_key = os.getenv("OPENAI_API_KEY") client = OpenAI(api_key=api_key) # 抽取幀和音頻(每0.5秒一幀) base64Frames, audio_path = process_video(file_path, seconds_per_frame=0.5) # 使用Whisper進行音頻轉錄 transcription = client.audio.transcriptions.create( model="whisper-1", file=open(audio_path, "rb") ) # 根據用戶選擇的詳細程度設置提示 summary_prompt = get_summary_prompt(summary_detail) # 使用GPT-4o生成摘要 response = client.chat.completions.create( model="gpt-4o", messages=[ { "role": "system", "content": f"""您是一名優秀的摘要專家,請根據提供的影片和其轉錄內容生成Markdown格式的摘要。{summary_prompt}""", }, { "role": "user", "content": [ "以下是從影片中提取的幀畫面", *map( lambda x: { "type": "image_url", "image_url": { "url": f"data:image/jpg;base64,{x}", "detail": "low", }, }, base64Frames, ), { "type": "text", "text": f"這是影片的轉錄內容: {transcription.text}", }, ], }, ], temperature=0, ) return response.choices[0].message.content demo = gr.Interface( fn=summarize_video, inputs=[ gr.File(label="上傳影片 (mp4)"), gr.Radio(["簡短", "中等", "詳細"], label="摘要詳細程度", value="中等") ], outputs="markdown", title="影片摘要生成器", description="上傳影片,將生成影片的摘要。", flagging_dir="" # 這會移除 Flag 功能 ) if __name__ == "__main__": demo.launch()