Spaces:

Elalimy
/

video-text

Sleeping

Elalimy commited on Jul 2, 2024

Commit

af70ba1

verified ·

1 Parent(s): 5902974

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,19 +1,15 @@
 from flask import Flask, request, render_template, redirect, url_for
 import os
 from moviepy.editor import VideoFileClip
-from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
-import torch
-import torchaudio
 app = Flask(__name__)
 # Configure the maximum content length for uploads (500 MB)
 app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 500  # 500 MB limit
-# Load the model and processor
-model_name = "jonatasgrosman/wav2vec2-large-xlsr-53-english"
-processor = Wav2Vec2Processor.from_pretrained(model_name)
-model = Wav2Vec2ForCTC.from_pretrained(model_name)
 @app.route('/')
 def index():
@@ -57,19 +53,10 @@ def transcribe_audio(audio_path):
         raise FileNotFoundError(f"Audio file not found at {audio_path}")
     try:
-        # Load and preprocess the audio
-        speech, rate = torchaudio.load(audio_path)
-        input_values = processor(speech.squeeze().numpy(), return_tensors="pt", sampling_rate=rate).input_values
-        # Perform the transcription
-        with torch.no_grad():
-            logits = model(input_values).logits
-        predicted_ids = torch.argmax(logits, dim=-1)
-        transcription = processor.batch_decode(predicted_ids)
-        return transcription[0]
     except Exception as e:
         raise RuntimeError(f"Error during transcription: {e}")
 if __name__ == '__main__':
-    app.run(debug=False, host='0.0.0.0', port=7860)

 from flask import Flask, request, render_template, redirect, url_for
 import os
 from moviepy.editor import VideoFileClip
+import whisper
 app = Flask(__name__)
 # Configure the maximum content length for uploads (500 MB)
 app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 500  # 500 MB limit
+# Load the Whisper model
+model = whisper.load_model("base")
 @app.route('/')
 def index():
         raise FileNotFoundError(f"Audio file not found at {audio_path}")
     try:
+        result = model.transcribe(audio_path)
+        return result["text"]
     except Exception as e:
         raise RuntimeError(f"Error during transcription: {e}")
 if __name__ == '__main__':
+    app.run(debug=False, host='0.0.0.0', port=7860)