Alex Volkov commited on
Commit
5efed34
·
1 Parent(s): 7db5fdc

Fix language support

Browse files
Files changed (1) hide show
  1. download.py +5 -2
download.py CHANGED
@@ -130,7 +130,8 @@ def caption_generator(tweet_url, language="Autodetect", model_size=model_size):
130
  try:
131
  print(f"Starting whisper transcribe with {meta['id']}.mp3")
132
  transcribe_whisper_result = transcribe(audio, translate_action=False, language=language, override_model_size=model_size)
133
- translate_whisper_result = transcribe(audio, translate_action=True, language=language, override_model_size=model_size)
 
134
  srt = get_srt(transcribe_whisper_result["segments"])
135
  en_srt = get_srt(translate_whisper_result["segments"])
136
 
@@ -233,7 +234,9 @@ def transcribe(audio, translate_action=True, language='Autodetect', override_mod
233
  output = model.transcribe(audio, verbose=True, **props)
234
 
235
  output['segments'] = output['segments']
236
- print(f'Finished transcribe from {output["language"]}', output["text"])
 
 
237
  return output
238
 
239
 
 
130
  try:
131
  print(f"Starting whisper transcribe with {meta['id']}.mp3")
132
  transcribe_whisper_result = transcribe(audio, translate_action=False, language=language, override_model_size=model_size)
133
+ detected_language = LANGUAGES[transcribe_whisper_result["language"]]
134
+ translate_whisper_result = transcribe(audio, translate_action=True, language=detected_language, override_model_size=model_size)
135
  srt = get_srt(transcribe_whisper_result["segments"])
136
  en_srt = get_srt(translate_whisper_result["segments"])
137
 
 
234
  output = model.transcribe(audio, verbose=True, **props)
235
 
236
  output['segments'] = output['segments']
237
+ output['requested_language'] = language.lower()
238
+
239
+ print(f'Finished transcribe from {LANGUAGES[output["language"]].capitalize()}', output["text"])
240
  return output
241
 
242