Spaces:

jlvdoorn
/

WhisperATC

Running

File size: 934 Bytes

bffc737
137a8df
bffc737
100b19d
bffc737
 
b6e8b6c
516ddf0
6056819
dcdf98f
 
bffc737
 
 
10994f7
e475ec1
6056819
b6e8b6c
6056819
dcdf98f
bc83b2a
dcdf98f
 
10994f7
e475ec1
6056819
b6e8b6c
6056819
dcdf98f
bc83b2a
dcdf98f
516ddf0
dcdf98f
516ddf0

from transformers import pipeline
from datasets import load_dataset
import gradio as gr
import numpy as np
import os

whisper = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')

def transcribe(audio):
    if audio is not None:
        return whisper(audio)['text']
    else:
        return 'There was no audio to transcribe...'

file_iface = gr.Interface(
    fn = transcribe,
    inputs = gr.Audio(source='upload', interactive=True),

    outputs = gr.Textbox(label='Transcription'),
    title = 'Whisper ATC - Large v3',
    description = 'Transcribe ATC speech',
)

mic_iface = gr.Interface(
    fn = transcribe,
    inputs = gr.Audio(source='microphone', type='filepath'),

    outputs = gr.Textbox(label='Transcription'),
    title = 'Whisper ATC - Large v3',
    description = 'Transcribe ATC speech',
)

demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
demo.launch(server_name='0.0.0.0')