thak123 commited on
Commit
8621c12
·
verified ·
1 Parent(s): 70a53fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -63,22 +63,22 @@ def transcribe_speech(filepath):
63
  # Check length and process
64
  if input_features.shape[-1] > 3000:
65
  print("Splitting audio required")
66
- from pydub import AudioSegment
67
-
68
- def split_audio(file_path, chunk_length_ms=30000): # 30 sec chunks
69
- audio = AudioSegment.from_file(file_path)
70
- chunks = [audio[i:i+chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
71
- return chunks
72
-
73
- # Split and transcribe
74
- audio_chunks = split_audio(audio_path)
75
-
76
- for i, chunk in enumerate(audio_chunks):
77
- print(i)
78
- chunk.export(f"chunk_{i}.wav", format="wav")
79
- result = pipe(f"chunk_{i}.wav")
80
- output += result['text'] + " "
81
- print(f"Chunk {i}: {result['text']}")
82
  else:
83
  predicted_ids = model.generate(input_features)
84
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
 
63
  # Check length and process
64
  if input_features.shape[-1] > 3000:
65
  print("Splitting audio required")
66
+ # from pydub import AudioSegment
67
+
68
+ # def split_audio(file_path, chunk_length_ms=30000): # 30 sec chunks
69
+ # audio = AudioSegment.from_file(file_path)
70
+ # chunks = [audio[i:i+chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
71
+ # return chunks
72
+
73
+ # # Split and transcribe
74
+ # audio_chunks = split_audio(audio_path)
75
+
76
+ # for i, chunk in enumerate(audio_chunks):
77
+ # print(i)
78
+ # chunk.export(f"chunk_{i}.wav", format="wav")
79
+ # result = pipe(f"chunk_{i}.wav")
80
+ # output += result['text'] + " "
81
+ # print(f"Chunk {i}: {result['text']}")
82
  else:
83
  predicted_ids = model.generate(input_features)
84
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)