import gradio as gr import numpy as np from whisper_online import * asr = FasterWhisperASR("es", "large-v3") # loads and wraps Whisper model # set options: # asr.set_translate_task() # it will translate from lan into English asr.use_vad() # set using VAD online = OnlineASRProcessor(asr) # create processing object with default buffer trimming option online.init() def transcribe(transcription, new_chunk): sr, y = new_chunk y = y.astype(np.float32) y /= np.max(np.abs(y)) online.insert_audio_chunk(y) text = online.process_iter() return transcription + text demo = gr.Interface( transcribe, ["state", gr.Audio(sources=["microphone"], streaming=True)], ["state", "text"], live=True, ) demo.launch()