Spaces:
Sleeping
Sleeping
from flask import Flask, request, render_template, redirect, url_for | |
import os | |
from moviepy.editor import VideoFileClip | |
import whisper | |
import hashlib | |
app = Flask(__name__) | |
# Configure the maximum content length for uploads (500 MB) | |
app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 500 # 500 MB limit | |
# Create directories for uploads and cache | |
UPLOAD_FOLDER = 'uploads' | |
AUDIO_FOLDER = 'audio_cache' | |
TRANSCRIPT_FOLDER = 'transcript_cache' | |
os.makedirs(UPLOAD_FOLDER, exist_ok=True) | |
os.makedirs(AUDIO_FOLDER, exist_ok=True) | |
os.makedirs(TRANSCRIPT_FOLDER, exist_ok=True) | |
# Set environment variable for Whisper cache | |
os.environ["XDG_CACHE_HOME"] = "/app/.cache" | |
# Load the Whisper model | |
model = whisper.load_model("base") | |
def index(): | |
return render_template('index.html') | |
def upload_video(): | |
if 'video' not in request.files: | |
return redirect(url_for('index')) | |
video_file = request.files['video'] | |
if video_file.filename == '': | |
return redirect(url_for('index')) | |
# Save the video file | |
video_path = os.path.join(UPLOAD_FOLDER, video_file.filename) | |
video_file.save(video_path) | |
try: | |
# Generate a unique hash for the video file to use as a cache key | |
video_hash = hashlib.md5(video_file.read()).hexdigest() | |
# Check if the audio and transcript are already cached | |
audio_path = os.path.join(AUDIO_FOLDER, f"{video_hash}.wav") | |
transcript_path = os.path.join(TRANSCRIPT_FOLDER, f"{video_hash}.txt") | |
if not os.path.exists(audio_path): | |
# Extract audio from the video if not cached | |
audio_path = extract_audio(video_path, audio_path) | |
if not os.path.exists(transcript_path): | |
# Transcribe the audio if not cached | |
transcript = transcribe_audio(audio_path) | |
# Cache the transcript | |
with open(transcript_path, 'w') as f: | |
f.write(transcript) | |
else: | |
# Load cached transcript | |
with open(transcript_path, 'r') as f: | |
transcript = f.read() | |
except Exception as e: | |
return f"Error: {e}" | |
return render_template('result.html', transcript=transcript) | |
def extract_audio(video_path, audio_path): | |
try: | |
# Use a temporary file to reduce the load on memory | |
with VideoFileClip(video_path) as video: | |
video.audio.write_audiofile(audio_path) | |
except Exception as e: | |
raise RuntimeError(f"Error extracting audio: {e}") | |
return audio_path | |
def transcribe_audio(audio_path): | |
if not os.path.exists(audio_path): | |
raise FileNotFoundError(f"Audio file not found at {audio_path}") | |
try: | |
result = model.transcribe(audio_path) | |
return result["text"] | |
except Exception as e: | |
raise RuntimeError(f"Error during transcription: {e}") | |
if __name__ == '__main__': | |
app.run(debug=False, host='0.0.0.0', port=7860) | |