Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,15 +1,19 @@
|
|
1 |
-
from flask import Flask, request,
|
2 |
import os
|
3 |
from moviepy.editor import VideoFileClip
|
4 |
-
import
|
|
|
|
|
5 |
|
6 |
app = Flask(__name__)
|
7 |
|
8 |
# Configure the maximum content length for uploads (500 MB)
|
9 |
app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 500 # 500 MB limit
|
10 |
|
11 |
-
# Load the
|
12 |
-
|
|
|
|
|
13 |
|
14 |
@app.route('/')
|
15 |
def index():
|
@@ -18,11 +22,11 @@ def index():
|
|
18 |
@app.route('/upload', methods=['POST'])
|
19 |
def upload_video():
|
20 |
if 'video' not in request.files:
|
21 |
-
return
|
22 |
|
23 |
video_file = request.files['video']
|
24 |
if video_file.filename == '':
|
25 |
-
return
|
26 |
|
27 |
# Save the video file
|
28 |
video_path = os.path.join('uploads', video_file.filename)
|
@@ -34,9 +38,9 @@ def upload_video():
|
|
34 |
# Transcribe the audio
|
35 |
transcript = transcribe_audio(audio_path)
|
36 |
except Exception as e:
|
37 |
-
return
|
38 |
|
39 |
-
return
|
40 |
|
41 |
def extract_audio(video_path):
|
42 |
audio_path = os.path.splitext(video_path)[0] + ".wav"
|
@@ -53,10 +57,27 @@ def transcribe_audio(audio_path):
|
|
53 |
raise FileNotFoundError(f"Audio file not found at {audio_path}")
|
54 |
|
55 |
try:
|
56 |
-
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
except Exception as e:
|
59 |
raise RuntimeError(f"Error during transcription: {e}")
|
60 |
|
61 |
if __name__ == '__main__':
|
|
|
62 |
app.run(debug=False, host='0.0.0.0', port=7860)
|
|
|
1 |
+
from flask import Flask, request, render_template, redirect, url_for
|
2 |
import os
|
3 |
from moviepy.editor import VideoFileClip
|
4 |
+
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
|
5 |
+
import torch
|
6 |
+
import torchaudio
|
7 |
|
8 |
app = Flask(__name__)
|
9 |
|
10 |
# Configure the maximum content length for uploads (500 MB)
|
11 |
app.config['MAX_CONTENT_LENGTH'] = 1024 * 1024 * 500 # 500 MB limit
|
12 |
|
13 |
+
# Load the wav2vec2 model and tokenizer
|
14 |
+
model_name = "jonatasgrosman/wav2vec2-large-xlsr-53-english"
|
15 |
+
tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name)
|
16 |
+
model = Wav2Vec2ForCTC.from_pretrained(model_name)
|
17 |
|
18 |
@app.route('/')
|
19 |
def index():
|
|
|
22 |
@app.route('/upload', methods=['POST'])
|
23 |
def upload_video():
|
24 |
if 'video' not in request.files:
|
25 |
+
return redirect(url_for('index'))
|
26 |
|
27 |
video_file = request.files['video']
|
28 |
if video_file.filename == '':
|
29 |
+
return redirect(url_for('index'))
|
30 |
|
31 |
# Save the video file
|
32 |
video_path = os.path.join('uploads', video_file.filename)
|
|
|
38 |
# Transcribe the audio
|
39 |
transcript = transcribe_audio(audio_path)
|
40 |
except Exception as e:
|
41 |
+
return f"Error: {e}"
|
42 |
|
43 |
+
return render_template('result.html', transcript=transcript)
|
44 |
|
45 |
def extract_audio(video_path):
|
46 |
audio_path = os.path.splitext(video_path)[0] + ".wav"
|
|
|
57 |
raise FileNotFoundError(f"Audio file not found at {audio_path}")
|
58 |
|
59 |
try:
|
60 |
+
# Load the audio file
|
61 |
+
waveform, sample_rate = torchaudio.load(audio_path)
|
62 |
+
# Resample if necessary
|
63 |
+
if sample_rate != 16000:
|
64 |
+
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
65 |
+
waveform = resampler(waveform)
|
66 |
+
|
67 |
+
# Tokenize the audio
|
68 |
+
input_values = tokenizer(waveform.squeeze().numpy(), return_tensors="pt", padding="longest").input_values
|
69 |
+
|
70 |
+
# Perform the transcription
|
71 |
+
with torch.no_grad():
|
72 |
+
logits = model(input_values).logits
|
73 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
74 |
+
|
75 |
+
# Decode the transcription
|
76 |
+
transcription = tokenizer.batch_decode(predicted_ids)[0]
|
77 |
+
return transcription
|
78 |
except Exception as e:
|
79 |
raise RuntimeError(f"Error during transcription: {e}")
|
80 |
|
81 |
if __name__ == '__main__':
|
82 |
+
os.makedirs('uploads', exist_ok=True)
|
83 |
app.run(debug=False, host='0.0.0.0', port=7860)
|