Spaces:
Running
Running
File size: 2,256 Bytes
ab2b897 49c643d bffc737 137a8df bffc737 c32cd1a 137a8df c61758c ae58876 b6e8b6c fac5461 137a8df b6e8b6c ab2b897 dcdf98f bffc737 ab2b897 10994f7 e475ec1 b6e8b6c dcdf98f b6e8b6c dcdf98f c61758c dcdf98f 10994f7 e475ec1 b6e8b6c dcdf98f b6e8b6c dcdf98f b6e8b6c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
#%%
from huggingface_hub import login
from transformers import pipeline
from datasets import load_dataset
import gradio as gr
import os
atco2 = load_dataset('jlvdoorn/atco2-asr', split='validation')
atcosim = load_dataset('jlvdoorn/atcosim', split='validation')
num_examples = 3
examples_atco2 = [ [{'sampling_rate': atco2[i]['audio']['sampling_rate'], 'raw': atco2[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
#examples_atcosim = [ [{'sampling_rate': atcosim[i]['audio']['sampling_rate'], 'raw': atcosim[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
examples = examples_atco2 #+ examples_atcosim
# examples = [atco2[0]['audio']['array'], atcosim[0]['audio']['array'], atco2[1]['audio']['array'], atcosim[1]['audio']['array'], atco2[2]['audio']['array'], atcosim[2]['audio']['array']]
# examples_labels = ['Example ' + str(i+1) for i in range(len(examples))]
whisper = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
#%%
def transcribe(audio, model_version):
if audio is not None:
return whisper(audio)['text']
else:
return 'There was no audio to transcribe...'
#%%
file_iface = gr.Interface(
fn = transcribe,
inputs = [gr.Audio(source='upload', interactive=True),
gr.Checkbox(label='Transcribe only', default=False),
gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
],
outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
title = 'Whisper ATC - Large v3',
description = 'Transcribe and extract',
examples = examples,
)
mic_iface = gr.Interface(
fn = transcribe,
inputs = [gr.Audio(source='microphone', type='filepath'),
gr.Checkbox(label='Transcribe only', default=False),
gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
],
outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
title = 'Whisper ATC - Large v3',
description = 'Transcribe and extract',
)
#%%
demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
demo.launch()
|