Elalimy commited on
Commit
9c20e94
·
verified ·
1 Parent(s): c8d9ae6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -10
app.py CHANGED
@@ -2,6 +2,8 @@ from flask import Flask, request, render_template, redirect, url_for
2
  import os
3
  import requests
4
  from moviepy.editor import VideoFileClip
 
 
5
 
6
  app = Flask(__name__)
7
 
@@ -33,8 +35,8 @@ def upload_video():
33
  try:
34
  # Extract audio from the video
35
  audio_path = extract_audio(video_path)
36
- # Transcribe the audio
37
- transcript = transcribe_audio(audio_path)
38
  except Exception as e:
39
  return f"Error: {e}"
40
 
@@ -50,20 +52,38 @@ def extract_audio(video_path):
50
  raise RuntimeError(f"Error extracting audio: {e}")
51
  return audio_path
52
 
53
- def transcribe_audio(audio_path):
54
- if not os.path.exists(audio_path):
55
- raise FileNotFoundError(f"Audio file not found at {audio_path}")
56
-
57
  try:
58
- with open(audio_path, "rb") as audio_file:
59
- headers = {"Authorization": f"Bearer {API_TOKEN}"}
60
- response = requests.post(API_URL, headers=headers, files={"file": audio_file})
61
 
62
  if response.status_code != 200:
63
  raise RuntimeError(f"Error during transcription: {response.text}")
64
 
65
  result = response.json()
66
- return result.get("text", "No transcription available")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  except Exception as e:
68
  raise RuntimeError(f"Error during transcription: {e}")
69
 
 
2
  import os
3
  import requests
4
  from moviepy.editor import VideoFileClip
5
+ from pydub import AudioSegment
6
+ from pydub.utils import make_chunks
7
 
8
  app = Flask(__name__)
9
 
 
35
  try:
36
  # Extract audio from the video
37
  audio_path = extract_audio(video_path)
38
+ # Split and transcribe the audio
39
+ transcript = split_and_transcribe_audio(audio_path)
40
  except Exception as e:
41
  return f"Error: {e}"
42
 
 
52
  raise RuntimeError(f"Error extracting audio: {e}")
53
  return audio_path
54
 
55
+ def transcribe_audio_chunk(audio_chunk):
 
 
 
56
  try:
57
+ headers = {"Authorization": f"Bearer {API_TOKEN}"}
58
+ response = requests.post(API_URL, headers=headers, files={"file": audio_chunk})
 
59
 
60
  if response.status_code != 200:
61
  raise RuntimeError(f"Error during transcription: {response.text}")
62
 
63
  result = response.json()
64
+ return result.get("text", "")
65
+ except Exception as e:
66
+ raise RuntimeError(f"Error during transcription: {e}")
67
+
68
+ def split_and_transcribe_audio(audio_path):
69
+ if not os.path.exists(audio_path):
70
+ raise FileNotFoundError(f"Audio file not found at {audio_path}")
71
+
72
+ try:
73
+ audio = AudioSegment.from_wav(audio_path)
74
+ chunk_length_ms = 60000 # Split audio into 1-minute chunks
75
+ chunks = make_chunks(audio, chunk_length_ms)
76
+
77
+ transcript = ""
78
+ for i, chunk in enumerate(chunks):
79
+ chunk_path = f"{audio_path[:-4]}_chunk{i}.wav"
80
+ chunk.export(chunk_path, format="wav")
81
+
82
+ with open(chunk_path, "rb") as audio_chunk:
83
+ transcript += transcribe_audio_chunk(audio_chunk)
84
+ transcript += " " # Add space between chunks' transcriptions
85
+
86
+ return transcript.strip()
87
  except Exception as e:
88
  raise RuntimeError(f"Error during transcription: {e}")
89