Spaces:

Elalimy
/

video-text

Sleeping

App Files Files Community

Elalimy commited on Jul 2, 2024

Commit

6588ad0

verified ·

1 Parent(s): b08d195

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -10

app.py CHANGED Viewed

@@ -1,15 +1,19 @@
-from flask import Flask, request, jsonify, redirect, url_for
 import os
 from moviepy.editor import VideoFileClip
-import whisper
 app = Flask(__name__)
 # Configure the maximum content length for uploads (500 MB)
 app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 500  # 500 MB limit
-# Load the Whisper model
-model = whisper.load_model("base")
 @app.route('/')
 def index():
@@ -18,11 +22,11 @@ def index():
 @app.route('/upload', methods=['POST'])
 def upload_video():
     if 'video' not in request.files:
-        return jsonify({"error": "No video file provided"}), 400
     video_file = request.files['video']
     if video_file.filename == '':
-        return jsonify({"error": "No video file selected"}), 400
     # Save the video file
     video_path = os.path.join('uploads', video_file.filename)
@@ -34,9 +38,9 @@ def upload_video():
         # Transcribe the audio
         transcript = transcribe_audio(audio_path)
     except Exception as e:
-        return jsonify({"error": str(e)}), 500
-    return jsonify({"transcript": transcript})
 def extract_audio(video_path):
     audio_path = os.path.splitext(video_path)[0] + ".wav"
@@ -53,10 +57,27 @@ def transcribe_audio(audio_path):
         raise FileNotFoundError(f"Audio file not found at {audio_path}")
     try:
-        result = model.transcribe(audio_path)
-        return result["text"]
     except Exception as e:
         raise RuntimeError(f"Error during transcription: {e}")
 if __name__ == '__main__':
     app.run(debug=False, host='0.0.0.0', port=7860)

+from flask import Flask, request, render_template, redirect, url_for
 import os
 from moviepy.editor import VideoFileClip
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
+import torch
+import torchaudio
 app = Flask(__name__)
 # Configure the maximum content length for uploads (500 MB)
 app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 500  # 500 MB limit
+# Load the wav2vec2 model and tokenizer
+model_name = "jonatasgrosman/wav2vec2-large-xlsr-53-english"
+tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name)
+model = Wav2Vec2ForCTC.from_pretrained(model_name)
 @app.route('/')
 def index():
 @app.route('/upload', methods=['POST'])
 def upload_video():
     if 'video' not in request.files:
+        return redirect(url_for('index'))
     video_file = request.files['video']
     if video_file.filename == '':
+        return redirect(url_for('index'))
     # Save the video file
     video_path = os.path.join('uploads', video_file.filename)
         # Transcribe the audio
         transcript = transcribe_audio(audio_path)
     except Exception as e:
+        return f"Error: {e}"
+    return render_template('result.html', transcript=transcript)
 def extract_audio(video_path):
     audio_path = os.path.splitext(video_path)[0] + ".wav"
         raise FileNotFoundError(f"Audio file not found at {audio_path}")
     try:
+        # Load the audio file
+        waveform, sample_rate = torchaudio.load(audio_path)
+        # Resample if necessary
+        if sample_rate != 16000:
+            resampler = torchaudio.transforms.Resample(sample_rate, 16000)
+            waveform = resampler(waveform)
+        # Tokenize the audio
+        input_values = tokenizer(waveform.squeeze().numpy(), return_tensors="pt", padding="longest").input_values
+        # Perform the transcription
+        with torch.no_grad():
+            logits = model(input_values).logits
+        predicted_ids = torch.argmax(logits, dim=-1)
+        # Decode the transcription
+        transcription = tokenizer.batch_decode(predicted_ids)[0]
+        return transcription
     except Exception as e:
         raise RuntimeError(f"Error during transcription: {e}")
 if __name__ == '__main__':
+    os.makedirs('uploads', exist_ok=True)
     app.run(debug=False, host='0.0.0.0', port=7860)