Spaces:
Running
Running
File size: 1,967 Bytes
bffc737 137a8df bffc737 c32cd1a 137a8df c61758c ae58876 b6e8b6c 137a8df b6e8b6c 516ddf0 dcdf98f bffc737 10994f7 e475ec1 b6e8b6c dcdf98f b6e8b6c dcdf98f bc83b2a dcdf98f 10994f7 e475ec1 b6e8b6c dcdf98f b6e8b6c dcdf98f bc83b2a dcdf98f 516ddf0 dcdf98f 516ddf0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
from transformers import pipeline
from datasets import load_dataset
import gradio as gr
import os
atco2 = load_dataset('jlvdoorn/atco2-asr', split='validation')
atcosim = load_dataset('jlvdoorn/atcosim', split='validation')
num_examples = 3
examples_atco2 = [ [{'sampling_rate': atco2[i]['audio']['sampling_rate'], 'raw': atco2[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
#examples_atcosim = [ [{'sampling_rate': atcosim[i]['audio']['sampling_rate'], 'raw': atcosim[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
examples = examples_atco2 #+ examples_atcosim
whisper = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
def transcribe(audio, model_version):
if audio is not None:
return whisper(audio)['text']
else:
return 'There was no audio to transcribe...'
file_iface = gr.Interface(
fn = transcribe,
inputs = [gr.Audio(source='upload', interactive=True),
gr.Checkbox(label='Transcribe only', default=False),
gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
],
outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
title = 'Whisper ATC - Large v3',
description = 'Transcribe ATC speech',
# examples = examples,
)
mic_iface = gr.Interface(
fn = transcribe,
inputs = [gr.Audio(source='microphone', type='filepath'),
gr.Checkbox(label='Transcribe only', default=False),
gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
],
outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
title = 'Whisper ATC - Large v3',
description = 'Transcribe ATC speech',
)
demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
demo.launch(server_name='0.0.0.0')
|