Spaces:
Running
Running
ashhadahsan
commited on
Commit
·
eeb50b0
1
Parent(s):
a526070
update to handle m3p
Browse files
app.py
CHANGED
@@ -164,12 +164,20 @@ with output:
|
|
164 |
if audio_uploaded is not None:
|
165 |
if audio_uploaded.name.endswith(".wav"):
|
166 |
temp = AudioSegment.from_wav(audio_uploaded)
|
167 |
-
|
|
|
168 |
if audio_uploaded.name.endswith(".mp3"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
|
170 |
|
171 |
-
subprocess.call(['ffmpeg', '-i', audio_uploaded.name,
|
172 |
-
|
173 |
# try:
|
174 |
|
175 |
# temp = AudioSegment.from_file(audio_uploaded, format="mp3")
|
@@ -182,7 +190,7 @@ with output:
|
|
182 |
if language == "":
|
183 |
model = whisper.load_model(model_name)
|
184 |
with st.spinner("Detecting language..."):
|
185 |
-
detection = detect_language(
|
186 |
language = detection.get("detected_language")
|
187 |
del model
|
188 |
if len(language) > 2:
|
@@ -196,7 +204,7 @@ with output:
|
|
196 |
with st.container():
|
197 |
with st.spinner(f"Running with {model_name} model"):
|
198 |
result = model.transcribe(
|
199 |
-
|
200 |
language=language,
|
201 |
patience=patience,
|
202 |
initial_prompt=initial_prompt,
|
@@ -220,15 +228,15 @@ with output:
|
|
220 |
result["segments"],
|
221 |
model_a,
|
222 |
metadata,
|
223 |
-
|
224 |
device=device,
|
225 |
)
|
226 |
write(
|
227 |
-
|
228 |
dtype=transcription,
|
229 |
result_aligned=result_aligned,
|
230 |
)
|
231 |
-
trans_text = read(
|
232 |
trans.text_area(
|
233 |
"transcription", trans_text, height=None, max_chars=None, key=None
|
234 |
)
|
@@ -311,16 +319,16 @@ with output:
|
|
311 |
cont,
|
312 |
model_a,
|
313 |
metadata,
|
314 |
-
|
315 |
device=device,
|
316 |
)
|
317 |
words_segments = result_aligned["word_segments"]
|
318 |
write(
|
319 |
-
|
320 |
dtype=transcription,
|
321 |
result_aligned=result_aligned,
|
322 |
)
|
323 |
-
trans_text = read(
|
324 |
char_segments = []
|
325 |
word_segments = []
|
326 |
|
|
|
164 |
if audio_uploaded is not None:
|
165 |
if audio_uploaded.name.endswith(".wav"):
|
166 |
temp = AudioSegment.from_wav(audio_uploaded)
|
167 |
+
input=f"{name}.wav"
|
168 |
+
temp.export(input)
|
169 |
if audio_uploaded.name.endswith(".mp3"):
|
170 |
+
input=f"{name}.mp3"
|
171 |
+
|
172 |
+
|
173 |
+
with open(input, "wb") as f:
|
174 |
+
|
175 |
+
f.write(audio_uploaded.getbuffer())
|
176 |
+
|
177 |
|
178 |
|
179 |
+
# subprocess.call(['ffmpeg', '-i', audio_uploaded.name,
|
180 |
+
# f'{name}.wav'])
|
181 |
# try:
|
182 |
|
183 |
# temp = AudioSegment.from_file(audio_uploaded, format="mp3")
|
|
|
190 |
if language == "":
|
191 |
model = whisper.load_model(model_name)
|
192 |
with st.spinner("Detecting language..."):
|
193 |
+
detection = detect_language(input, model)
|
194 |
language = detection.get("detected_language")
|
195 |
del model
|
196 |
if len(language) > 2:
|
|
|
204 |
with st.container():
|
205 |
with st.spinner(f"Running with {model_name} model"):
|
206 |
result = model.transcribe(
|
207 |
+
input,
|
208 |
language=language,
|
209 |
patience=patience,
|
210 |
initial_prompt=initial_prompt,
|
|
|
228 |
result["segments"],
|
229 |
model_a,
|
230 |
metadata,
|
231 |
+
input,
|
232 |
device=device,
|
233 |
)
|
234 |
write(
|
235 |
+
input,
|
236 |
dtype=transcription,
|
237 |
result_aligned=result_aligned,
|
238 |
)
|
239 |
+
trans_text = read(input, transcription)
|
240 |
trans.text_area(
|
241 |
"transcription", trans_text, height=None, max_chars=None, key=None
|
242 |
)
|
|
|
319 |
cont,
|
320 |
model_a,
|
321 |
metadata,
|
322 |
+
input,
|
323 |
device=device,
|
324 |
)
|
325 |
words_segments = result_aligned["word_segments"]
|
326 |
write(
|
327 |
+
input,
|
328 |
dtype=transcription,
|
329 |
result_aligned=result_aligned,
|
330 |
)
|
331 |
+
trans_text = read(input, transcription)
|
332 |
char_segments = []
|
333 |
word_segments = []
|
334 |
|