Spaces:
Running
Running
File size: 1,558 Bytes
bffc737 137a8df bffc737 100b19d bffc737 c32cd1a 137a8df c61758c 942a9ec b6e8b6c 137a8df ff9e4fc 32c7384 ff9e4fc 32c7384 ff9e4fc 100b19d b6e8b6c 516ddf0 6056819 dcdf98f bffc737 10994f7 e475ec1 6056819 b6e8b6c 6056819 dcdf98f bc83b2a 34a193d dcdf98f 10994f7 e475ec1 6056819 b6e8b6c 6056819 dcdf98f bc83b2a dcdf98f 516ddf0 dcdf98f 516ddf0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
from transformers import pipeline
from datasets import load_dataset
import gradio as gr
import numpy as np
import os
atco2 = load_dataset('jlvdoorn/atco2-asr', split='validation')
atcosim = load_dataset('jlvdoorn/atcosim', split='validation')
num_examples = 3
examples_atco2 = [[atco2[i]['audio']['array'].tobytes()] for i in range(num_examples)]
#examples_atcosim = [ [{'sampling_rate': atcosim[i]['audio']['sampling_rate'], 'raw': atcosim[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
examples = examples_atco2 #+ examples_atcosim
sr, y = atco2[0]['audio']['sampling_rate'], atco2[0]['audio']['array']
y = y.astype(np.float32)
y /= np.max(np.abs(y))
y = y.tobytes()
examples = [[y]]
whisper = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
def transcribe(audio):
if audio is not None:
return whisper(audio)['text']
else:
return 'There was no audio to transcribe...'
file_iface = gr.Interface(
fn = transcribe,
inputs = gr.Audio(source='upload', interactive=True),
outputs = gr.Textbox(label='Transcription'),
title = 'Whisper ATC - Large v3',
description = 'Transcribe ATC speech',
examples = examples,
)
mic_iface = gr.Interface(
fn = transcribe,
inputs = gr.Audio(source='microphone', type='filepath'),
outputs = gr.Textbox(label='Transcription'),
title = 'Whisper ATC - Large v3',
description = 'Transcribe ATC speech',
)
demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
demo.launch(server_name='0.0.0.0')
|