Spaces:
Runtime error
Runtime error
Commit
·
a3e23e4
1
Parent(s):
ed868f3
Update app.py
Browse files
app.py
CHANGED
@@ -47,9 +47,9 @@ def RemoveAllFiles():
|
|
47 |
if (os.path.isfile(file)):
|
48 |
os.remove(file)
|
49 |
|
50 |
-
def
|
51 |
SPEAKER_DICT = {}
|
52 |
-
SPEAKERS = []
|
53 |
|
54 |
def GetSpeaker(sp):
|
55 |
speaker = sp
|
@@ -61,10 +61,6 @@ def Transcribe(NumberOfSpeakers, SpeakerNames="", audio="temp_audio.wav"):
|
|
61 |
else:
|
62 |
speaker = SPEAKER_DICT[sp]
|
63 |
return speaker
|
64 |
-
|
65 |
-
def GenerateSpeakerDict(sp):
|
66 |
-
global SPEAKERS
|
67 |
-
SPEAKERS = [speaker.strip() for speaker in sp.split(',')]
|
68 |
|
69 |
def millisec(timeStr):
|
70 |
spl = timeStr.split(":")
|
@@ -113,7 +109,7 @@ def Transcribe(NumberOfSpeakers, SpeakerNames="", audio="temp_audio.wav"):
|
|
113 |
return f"dz_{audio}.wav", dzList, segments
|
114 |
|
115 |
def transcribe(dz_audio):
|
116 |
-
model = whisper.load_model("
|
117 |
result = model.transcribe(dz_audio)
|
118 |
# for _ in result['segments']:
|
119 |
# print(_['start'], _['end'], _['text'])
|
@@ -140,7 +136,6 @@ def Transcribe(NumberOfSpeakers, SpeakerNames="", audio="temp_audio.wav"):
|
|
140 |
#print(f"[{dzList[i][2]}] {c[2]}")
|
141 |
return conversation, ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation]))
|
142 |
|
143 |
-
GenerateSpeakerDict(SpeakerNames)
|
144 |
spacermilli, spacer = preprocess(audio)
|
145 |
dz_audio, dzList, segments = diarization(audio)
|
146 |
conversation, t_text = transcribe(dz_audio)
|
@@ -179,7 +174,7 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
179 |
# conversation.append([GetSpeaker(segment["speaker"]), segment["text"][1:]]) # segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
|
180 |
# conversation[-1][1] += segment["text"][1:]
|
181 |
# return output
|
182 |
-
return ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation])), conversation
|
183 |
|
184 |
def get_duration(path):
|
185 |
with contextlib.closing(wave.open(path,'r')) as f:
|
@@ -237,7 +232,7 @@ def AudioTranscribe(NumberOfSpeakers=None, SpeakerNames="", audio="", retries=5)
|
|
237 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
238 |
if not (os.path.isfile("temp_audio.wav")):
|
239 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
240 |
-
return
|
241 |
else:
|
242 |
raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
|
243 |
|
@@ -253,10 +248,10 @@ def VideoTranscribe(NumberOfSpeakers=None, SpeakerNames="", video="", retries=5)
|
|
253 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
254 |
if not (os.path.isfile("temp_audio.wav")):
|
255 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
256 |
-
return
|
257 |
else:
|
258 |
raise gr.Error("There is some issue ith Video Transcriber. Please try again later!")
|
259 |
-
return
|
260 |
|
261 |
def YoutubeTranscribe(NumberOfSpeakers=None, SpeakerNames="", URL="", retries = 5):
|
262 |
if retries:
|
@@ -280,7 +275,7 @@ def YoutubeTranscribe(NumberOfSpeakers=None, SpeakerNames="", URL="", retries =
|
|
280 |
stream = ffmpeg.input('temp_audio.m4a')
|
281 |
stream = ffmpeg.output(stream, 'temp_audio.wav')
|
282 |
RemoveFile("temp_audio.m4a")
|
283 |
-
return
|
284 |
else:
|
285 |
raise gr.Error(f"Unable to get video from {URL}")
|
286 |
|
|
|
47 |
if (os.path.isfile(file)):
|
48 |
os.remove(file)
|
49 |
|
50 |
+
def Transcribe_V1(NumberOfSpeakers, SpeakerNames="", audio="temp_audio.wav"):
|
51 |
SPEAKER_DICT = {}
|
52 |
+
SPEAKERS = [speaker.strip() for speaker in SpeakerNames.split(',')]
|
53 |
|
54 |
def GetSpeaker(sp):
|
55 |
speaker = sp
|
|
|
61 |
else:
|
62 |
speaker = SPEAKER_DICT[sp]
|
63 |
return speaker
|
|
|
|
|
|
|
|
|
64 |
|
65 |
def millisec(timeStr):
|
66 |
spl = timeStr.split(":")
|
|
|
109 |
return f"dz_{audio}.wav", dzList, segments
|
110 |
|
111 |
def transcribe(dz_audio):
|
112 |
+
model = whisper.load_model("large")
|
113 |
result = model.transcribe(dz_audio)
|
114 |
# for _ in result['segments']:
|
115 |
# print(_['start'], _['end'], _['text'])
|
|
|
136 |
#print(f"[{dzList[i][2]}] {c[2]}")
|
137 |
return conversation, ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation]))
|
138 |
|
|
|
139 |
spacermilli, spacer = preprocess(audio)
|
140 |
dz_audio, dzList, segments = diarization(audio)
|
141 |
conversation, t_text = transcribe(dz_audio)
|
|
|
174 |
# conversation.append([GetSpeaker(segment["speaker"]), segment["text"][1:]]) # segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
|
175 |
# conversation[-1][1] += segment["text"][1:]
|
176 |
# return output
|
177 |
+
return ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation])), ({ "data": [{"speaker": speaker, "text": text} for speaker, text in conversation]})
|
178 |
|
179 |
def get_duration(path):
|
180 |
with contextlib.closing(wave.open(path,'r')) as f:
|
|
|
232 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
233 |
if not (os.path.isfile("temp_audio.wav")):
|
234 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
235 |
+
return Transcribe_V1(NumberOfSpeakers, SpeakerNames)
|
236 |
else:
|
237 |
raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
|
238 |
|
|
|
248 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
249 |
if not (os.path.isfile("temp_audio.wav")):
|
250 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
251 |
+
return Transcribe_V1(NumberOfSpeakers, SpeakerNames)
|
252 |
else:
|
253 |
raise gr.Error("There is some issue ith Video Transcriber. Please try again later!")
|
254 |
+
return Transcribe_V1(NumberOfSpeakers, SpeakerNames)
|
255 |
|
256 |
def YoutubeTranscribe(NumberOfSpeakers=None, SpeakerNames="", URL="", retries = 5):
|
257 |
if retries:
|
|
|
275 |
stream = ffmpeg.input('temp_audio.m4a')
|
276 |
stream = ffmpeg.output(stream, 'temp_audio.wav')
|
277 |
RemoveFile("temp_audio.m4a")
|
278 |
+
return Transcribe_V1(NumberOfSpeakers, SpeakerNames)
|
279 |
else:
|
280 |
raise gr.Error(f"Unable to get video from {URL}")
|
281 |
|