from transformers import pipeline import gradio as gr import os pipe = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim', use_auth_token=os.environ['HUGGINGFACE_TOKEN'], src='model') def transcribe(audio_mic, audio_file): if audio_file is not None: return pipe(audio_file)['text'] if audio_mic is not None: return pipe(audio_mic)['text'] else: return 'There was no audio to transcribe...' iface = gr.Interface( fn=transcribe, inputs=[gr.Audio(source='microphone', type='filepath'), gr.Audio(source='upload', type='filepath')], outputs='text', title='Whisper Large v2 - ATCO2-ASR-ATCOSIM', description='Whisper Large v2 model fine-tuned on the ATCO2-ASR and ATCOSIM datasets.' ) iface.launch()