File size: 2,314 Bytes
ab2b897
49c643d
bffc737
137a8df
bffc737
 
 
b6e8b6c
49c643d
c32cd1a
137a8df
 
c61758c
ae58876
b6e8b6c
 
fac5461
 
137a8df
b6e8b6c
ab2b897
dcdf98f
 
 
bffc737
 
 
ab2b897
10994f7
dcdf98f
b6e8b6c
 
dcdf98f
 
b6e8b6c
dcdf98f
 
 
c61758c
dcdf98f
 
10994f7
dcdf98f
b6e8b6c
 
dcdf98f
 
b6e8b6c
dcdf98f
 
 
 
 
 
b6e8b6c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#%%
from huggingface_hub import login
from transformers import pipeline
from datasets import load_dataset
import gradio as gr
import os

#login(token=os.environ['hf_token'])

atco2 = load_dataset('jlvdoorn/atco2-asr', split='validation')
atcosim = load_dataset('jlvdoorn/atcosim', split='validation')

num_examples = 3
examples_atco2   = [ [{'sampling_rate': atco2[i]['audio']['sampling_rate'], 'raw': atco2[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
#examples_atcosim = [ [{'sampling_rate': atcosim[i]['audio']['sampling_rate'], 'raw': atcosim[i]['audio']['array']}, False, 'large-v3'] for i in range(num_examples)]
examples = examples_atco2 #+ examples_atcosim
# examples = [atco2[0]['audio']['array'], atcosim[0]['audio']['array'], atco2[1]['audio']['array'], atcosim[1]['audio']['array'], atco2[2]['audio']['array'], atcosim[2]['audio']['array']]
# examples_labels = ['Example ' + str(i+1) for i in range(len(examples))]

whisper = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
#%%
def transcribe(audio, model_version):
    if audio is not None:
        return whisper(audio)['text']
    else:
        return 'There was no audio to transcribe...'

#%%
file_iface = gr.Interface(
    fn = transcribeAndExtract,
    inputs = [gr.Audio(source='upload', interactive=True),
              gr.Checkbox(label='Transcribe only', default=False),
              gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
            ],

    outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
    title = 'Whisper ATC - Large v3',
    description = 'Transcribe and extract',
    examples = examples,
)

mic_iface = gr.Interface(
    fn = transcribeAndExtract,
    inputs = [gr.Audio(source='microphone', type='filepath'),
              gr.Checkbox(label='Transcribe only', default=False),
              gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
            ],

    outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
    title = 'Whisper ATC - Large v3',
    description = 'Transcribe and extract',
)
#%%
demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
demo.launch()