Spaces:
Running
Running
ashhadahsan
commited on
Commit
·
4751966
1
Parent(s):
eeb50b0
Update app.py
Browse files
app.py
CHANGED
@@ -9,7 +9,6 @@ from utils import (
|
|
9 |
read,
|
10 |
get_key,
|
11 |
)
|
12 |
-
import subprocess
|
13 |
import whisperx as whisper
|
14 |
import json
|
15 |
import pandas as pd
|
@@ -164,33 +163,22 @@ with output:
|
|
164 |
if audio_uploaded is not None:
|
165 |
if audio_uploaded.name.endswith(".wav"):
|
166 |
temp = AudioSegment.from_wav(audio_uploaded)
|
167 |
-
|
168 |
-
temp.export(input)
|
169 |
if audio_uploaded.name.endswith(".mp3"):
|
170 |
-
input=f"{name}.mp3"
|
171 |
|
172 |
-
|
173 |
-
with open(input, "wb") as f:
|
174 |
|
175 |
-
f.write(audio_uploaded.getbuffer())
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
# subprocess.call(['ffmpeg', '-i', audio_uploaded.name,
|
180 |
-
# f'{name}.wav'])
|
181 |
-
# try:
|
182 |
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
|
188 |
-
|
189 |
-
|
190 |
if language == "":
|
191 |
model = whisper.load_model(model_name)
|
192 |
with st.spinner("Detecting language..."):
|
193 |
-
detection = detect_language(
|
194 |
language = detection.get("detected_language")
|
195 |
del model
|
196 |
if len(language) > 2:
|
@@ -204,7 +192,7 @@ with output:
|
|
204 |
with st.container():
|
205 |
with st.spinner(f"Running with {model_name} model"):
|
206 |
result = model.transcribe(
|
207 |
-
|
208 |
language=language,
|
209 |
patience=patience,
|
210 |
initial_prompt=initial_prompt,
|
@@ -228,15 +216,15 @@ with output:
|
|
228 |
result["segments"],
|
229 |
model_a,
|
230 |
metadata,
|
231 |
-
|
232 |
device=device,
|
233 |
)
|
234 |
write(
|
235 |
-
|
236 |
dtype=transcription,
|
237 |
result_aligned=result_aligned,
|
238 |
)
|
239 |
-
trans_text = read(
|
240 |
trans.text_area(
|
241 |
"transcription", trans_text, height=None, max_chars=None, key=None
|
242 |
)
|
@@ -319,16 +307,16 @@ with output:
|
|
319 |
cont,
|
320 |
model_a,
|
321 |
metadata,
|
322 |
-
|
323 |
device=device,
|
324 |
)
|
325 |
words_segments = result_aligned["word_segments"]
|
326 |
write(
|
327 |
-
|
328 |
dtype=transcription,
|
329 |
result_aligned=result_aligned,
|
330 |
)
|
331 |
-
trans_text = read(
|
332 |
char_segments = []
|
333 |
word_segments = []
|
334 |
|
@@ -387,4 +375,4 @@ with output:
|
|
387 |
"detected language", language_dict.get(language), disabled=True
|
388 |
)
|
389 |
os.remove(f"{name}.wav")
|
390 |
-
os.remove(f"{json_filname}.json")
|
|
|
9 |
read,
|
10 |
get_key,
|
11 |
)
|
|
|
12 |
import whisperx as whisper
|
13 |
import json
|
14 |
import pandas as pd
|
|
|
163 |
if audio_uploaded is not None:
|
164 |
if audio_uploaded.name.endswith(".wav"):
|
165 |
temp = AudioSegment.from_wav(audio_uploaded)
|
166 |
+
temp.export(f"{name}.wav")
|
|
|
167 |
if audio_uploaded.name.endswith(".mp3"):
|
|
|
168 |
|
169 |
+
try:
|
|
|
170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
+
temp = AudioSegment.from_file(audio_uploaded, format="mp3")
|
173 |
+
temp.export(f"{name}.wav")
|
174 |
+
except:
|
|
|
175 |
|
176 |
+
temp = AudioSegment.from_file(audio_uploaded, format="mp4")
|
177 |
+
temp.export(f"{name}.wav")
|
178 |
if language == "":
|
179 |
model = whisper.load_model(model_name)
|
180 |
with st.spinner("Detecting language..."):
|
181 |
+
detection = detect_language(f"{name}.wav", model)
|
182 |
language = detection.get("detected_language")
|
183 |
del model
|
184 |
if len(language) > 2:
|
|
|
192 |
with st.container():
|
193 |
with st.spinner(f"Running with {model_name} model"):
|
194 |
result = model.transcribe(
|
195 |
+
f"{name}.wav",
|
196 |
language=language,
|
197 |
patience=patience,
|
198 |
initial_prompt=initial_prompt,
|
|
|
216 |
result["segments"],
|
217 |
model_a,
|
218 |
metadata,
|
219 |
+
f"{name}.wav",
|
220 |
device=device,
|
221 |
)
|
222 |
write(
|
223 |
+
f"{name}.wav",
|
224 |
dtype=transcription,
|
225 |
result_aligned=result_aligned,
|
226 |
)
|
227 |
+
trans_text = read(f"{name}.wav", transcription)
|
228 |
trans.text_area(
|
229 |
"transcription", trans_text, height=None, max_chars=None, key=None
|
230 |
)
|
|
|
307 |
cont,
|
308 |
model_a,
|
309 |
metadata,
|
310 |
+
f"{name}.wav",
|
311 |
device=device,
|
312 |
)
|
313 |
words_segments = result_aligned["word_segments"]
|
314 |
write(
|
315 |
+
f"{name}.wav",
|
316 |
dtype=transcription,
|
317 |
result_aligned=result_aligned,
|
318 |
)
|
319 |
+
trans_text = read(f"{name}.wav", transcription)
|
320 |
char_segments = []
|
321 |
word_segments = []
|
322 |
|
|
|
375 |
"detected language", language_dict.get(language), disabled=True
|
376 |
)
|
377 |
os.remove(f"{name}.wav")
|
378 |
+
os.remove(f"{json_filname}.json")
|