Spaces:
Sleeping
Sleeping
File size: 2,959 Bytes
6588ad0 8625430 af70ba1 bee9e06 8625430 41557b5 f462b2c bee9e06 9475b53 af70ba1 29bd054 6588ad0 29bd054 6588ad0 29bd054 bee9e06 29bd054 bee9e06 29bd054 6588ad0 29bd054 6588ad0 29bd054 bee9e06 9d915ef f462b2c 9d915ef 29bd054 41557b5 ac31d19 9c20e94 af70ba1 29bd054 9d915ef 29bd054 bee9e06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
from flask import Flask, request, render_template, redirect, url_for
import os
from moviepy.editor import VideoFileClip
import whisper
import hashlib
app = Flask(__name__)
# Configure the maximum content length for uploads (500 MB)
app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 500 # 500 MB limit
# Create directories for uploads and cache
UPLOAD_FOLDER = 'uploads'
AUDIO_FOLDER = 'audio_cache'
TRANSCRIPT_FOLDER = 'transcript_cache'
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
os.makedirs(AUDIO_FOLDER, exist_ok=True)
os.makedirs(TRANSCRIPT_FOLDER, exist_ok=True)
# Set environment variable for Whisper cache
os.environ["XDG_CACHE_HOME"] = "/app/.cache"
# Load the Whisper model
model = whisper.load_model("base")
@app.route('/')
def index():
return render_template('index.html')
@app.route('/upload', methods=['POST'])
def upload_video():
if 'video' not in request.files:
return redirect(url_for('index'))
video_file = request.files['video']
if video_file.filename == '':
return redirect(url_for('index'))
# Save the video file
video_path = os.path.join(UPLOAD_FOLDER, video_file.filename)
video_file.save(video_path)
try:
# Generate a unique hash for the video file to use as a cache key
video_hash = hashlib.md5(video_file.read()).hexdigest()
# Check if the audio and transcript are already cached
audio_path = os.path.join(AUDIO_FOLDER, f"{video_hash}.wav")
transcript_path = os.path.join(TRANSCRIPT_FOLDER, f"{video_hash}.txt")
if not os.path.exists(audio_path):
# Extract audio from the video if not cached
audio_path = extract_audio(video_path, audio_path)
if not os.path.exists(transcript_path):
# Transcribe the audio if not cached
transcript = transcribe_audio(audio_path)
# Cache the transcript
with open(transcript_path, 'w') as f:
f.write(transcript)
else:
# Load cached transcript
with open(transcript_path, 'r') as f:
transcript = f.read()
except Exception as e:
return f"Error: {e}"
return render_template('result.html', transcript=transcript)
def extract_audio(video_path, audio_path):
try:
# Use a temporary file to reduce the load on memory
with VideoFileClip(video_path) as video:
video.audio.write_audiofile(audio_path)
except Exception as e:
raise RuntimeError(f"Error extracting audio: {e}")
return audio_path
def transcribe_audio(audio_path):
if not os.path.exists(audio_path):
raise FileNotFoundError(f"Audio file not found at {audio_path}")
try:
result = model.transcribe(audio_path)
return result["text"]
except Exception as e:
raise RuntimeError(f"Error during transcription: {e}")
if __name__ == '__main__':
app.run(debug=False, host='0.0.0.0', port=7860)
|