Elalimy commited on
Commit
bee9e06
·
verified ·
1 Parent(s): 106ba50

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -8
app.py CHANGED
@@ -2,12 +2,21 @@ from flask import Flask, request, render_template, redirect, url_for
2
  import os
3
  from moviepy.editor import VideoFileClip
4
  import whisper
 
5
 
6
  app = Flask(__name__)
7
 
8
  # Configure the maximum content length for uploads (500 MB)
9
  app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 500 # 500 MB limit
10
 
 
 
 
 
 
 
 
 
11
  # Load the Whisper model
12
  model = whisper.load_model("base")
13
 
@@ -25,21 +34,38 @@ def upload_video():
25
  return redirect(url_for('index'))
26
 
27
  # Save the video file
28
- video_path = os.path.join('uploads', video_file.filename)
29
  video_file.save(video_path)
30
 
31
  try:
32
- # Extract audio from the video
33
- audio_path = extract_audio(video_path)
34
- # Transcribe the audio
35
- transcript = transcribe_audio(audio_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  except Exception as e:
37
  return f"Error: {e}"
38
 
39
  return render_template('result.html', transcript=transcript)
40
 
41
- def extract_audio(video_path):
42
- audio_path = os.path.splitext(video_path)[0] + ".wav"
43
  try:
44
  # Use a temporary file to reduce the load on memory
45
  with VideoFileClip(video_path) as video:
@@ -59,4 +85,4 @@ def transcribe_audio(audio_path):
59
  raise RuntimeError(f"Error during transcription: {e}")
60
 
61
  if __name__ == '__main__':
62
- app.run(debug=False, host='0.0.0.0', port=7860)
 
2
  import os
3
  from moviepy.editor import VideoFileClip
4
  import whisper
5
+ import hashlib
6
 
7
  app = Flask(__name__)
8
 
9
  # Configure the maximum content length for uploads (500 MB)
10
  app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 500 # 500 MB limit
11
 
12
+ # Create directories for uploads and cache
13
+ UPLOAD_FOLDER = 'uploads'
14
+ AUDIO_FOLDER = 'audio_cache'
15
+ TRANSCRIPT_FOLDER = 'transcript_cache'
16
+ os.makedirs(UPLOAD_FOLDER, exist_ok=True)
17
+ os.makedirs(AUDIO_FOLDER, exist_ok=True)
18
+ os.makedirs(TRANSCRIPT_FOLDER, exist_ok=True)
19
+
20
  # Load the Whisper model
21
  model = whisper.load_model("base")
22
 
 
34
  return redirect(url_for('index'))
35
 
36
  # Save the video file
37
+ video_path = os.path.join(UPLOAD_FOLDER, video_file.filename)
38
  video_file.save(video_path)
39
 
40
  try:
41
+ # Generate a unique hash for the video file to use as a cache key
42
+ video_hash = hashlib.md5(video_file.read()).hexdigest()
43
+
44
+ # Check if the audio and transcript are already cached
45
+ audio_path = os.path.join(AUDIO_FOLDER, f"{video_hash}.wav")
46
+ transcript_path = os.path.join(TRANSCRIPT_FOLDER, f"{video_hash}.txt")
47
+
48
+ if not os.path.exists(audio_path):
49
+ # Extract audio from the video if not cached
50
+ audio_path = extract_audio(video_path, audio_path)
51
+
52
+ if not os.path.exists(transcript_path):
53
+ # Transcribe the audio if not cached
54
+ transcript = transcribe_audio(audio_path)
55
+ # Cache the transcript
56
+ with open(transcript_path, 'w') as f:
57
+ f.write(transcript)
58
+ else:
59
+ # Load cached transcript
60
+ with open(transcript_path, 'r') as f:
61
+ transcript = f.read()
62
+
63
  except Exception as e:
64
  return f"Error: {e}"
65
 
66
  return render_template('result.html', transcript=transcript)
67
 
68
+ def extract_audio(video_path, audio_path):
 
69
  try:
70
  # Use a temporary file to reduce the load on memory
71
  with VideoFileClip(video_path) as video:
 
85
  raise RuntimeError(f"Error during transcription: {e}")
86
 
87
  if __name__ == '__main__':
88
+ app.run(debug=False, host='0.0.0.0', port=7860)