from transformers import pipeline from transformers.utils import logging import torch import pandas as pd import time import gradio as gr logging.set_verbosity_error() asr = pipeline(task="automatic-speech-recognition", model="distil-whisper/distil-small.en") translator = pipeline(task="translation", model="facebook/nllb-200-3.3B", max_length = 5120, # model="facebook/nllb-200-distilled-600M", torch_dtype=torch.bfloat16) flores_200_df = pd.read_csv("Flores200_language_codes.csv", encoding='cp1252') flores_200 = dict(zip(flores_200_df['Language'],flores_200_df['FLORES_200_code'])) flores_200_languages = list(flores_200.keys()) def transcribe_audio(filepath, tgt_language): target_language = flores_200_df.loc[int(tgt_language),'Language'] print(f"Selected Target Language: {target_language}") time.sleep(5) if filepath is None: gr.Warning("No audio found, please retry.") return "" english_transcript = asr( filepath, # max_new_tokens=256, chunk_length_s=30, batch_size=8, )['text'] print(english_transcript) transcripts = english_transcript.split('.') translations = [] for tscript in transcripts: translation = translator(tscript, src_lang="eng_Latn", tgt_lang=flores_200_df.loc[int(tgt_language),'FLORES_200_code'])[0]['translation_text'] translations.append(translation+'.') output = ' '.join(translations) print(output) return output demo = gr.Blocks() mic_transcribe = gr.Interface(title="Transcribe English Audio into any Language - test and demo app by Srinivas.V ..", description="Speak into your system using your system mic, select your target language and submit (if error appears, retry)", fn=transcribe_audio, inputs=[gr.Audio(sources="microphone", type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')], outputs=gr.Textbox(label="Transcription in Selected Target Language", lines=3), allow_flagging="never") file_transcribe = gr.Interface(title="Transcribe English Audio into any Language - test and demo app by Srinivas.V ..", description="Upload an audio file, select your target language and submit (if error appears, retry)", fn=transcribe_audio, inputs=[gr.Audio(sources="upload", type="filepath"), gr.Dropdown(flores_200_df.Language.tolist(), type='index', label='Select Target Language')], outputs=gr.Textbox(label="Transcription in Selected Target Language", lines=3), allow_flagging="never", ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Speak Through Microphone", "Upload Audio File"], ) demo.launch(debug=True)