Spaces:
Running
Running
commit
Browse files
app.py
CHANGED
@@ -18,7 +18,7 @@ bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
|
|
18 |
whisper_v2 = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
|
19 |
whisper_v3 = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
|
20 |
#%%
|
21 |
-
def transcribe(
|
22 |
if model_version == 'large-v2':
|
23 |
whisper = whisper_v2
|
24 |
ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
|
@@ -27,10 +27,8 @@ def transcribe(audio_file, audio_mic, model_version):
|
|
27 |
whisper = whisper_v3
|
28 |
ttl = 'Whisper Large v3 - ATCO2-ATCOSIM'
|
29 |
dis = 'This demo will transcribe ATC audio files by using the Whisper Large v3 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
|
30 |
-
if
|
31 |
-
return whisper(
|
32 |
-
elif audio_file is not None:
|
33 |
-
return whisper(audio_file)['text']
|
34 |
else:
|
35 |
return 'There was no audio to transcribe...'
|
36 |
|
@@ -54,8 +52,8 @@ def extractCallSignCommand(transcription):
|
|
54 |
return 'There was no transcription to extract a callsign or command from...'
|
55 |
|
56 |
#%%
|
57 |
-
def transcribeAndExtract(
|
58 |
-
transcription = transcribe(
|
59 |
if not transcribe_only:
|
60 |
callSignCommandValues = extractCallSignCommand(transcription)
|
61 |
else:
|
@@ -79,5 +77,29 @@ iface = gr.Interface(
|
|
79 |
)
|
80 |
|
81 |
#%%
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
whisper_v2 = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
|
19 |
whisper_v3 = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
|
20 |
#%%
|
21 |
+
def transcribe(audio, model_version):
|
22 |
if model_version == 'large-v2':
|
23 |
whisper = whisper_v2
|
24 |
ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
|
|
|
27 |
whisper = whisper_v3
|
28 |
ttl = 'Whisper Large v3 - ATCO2-ATCOSIM'
|
29 |
dis = 'This demo will transcribe ATC audio files by using the Whisper Large v3 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
|
30 |
+
if audio is not None:
|
31 |
+
return whisper(audio)['text']
|
|
|
|
|
32 |
else:
|
33 |
return 'There was no audio to transcribe...'
|
34 |
|
|
|
52 |
return 'There was no transcription to extract a callsign or command from...'
|
53 |
|
54 |
#%%
|
55 |
+
def transcribeAndExtract(audio, transcribe_only, model_version):
|
56 |
+
transcription = transcribe(audio, model_version)
|
57 |
if not transcribe_only:
|
58 |
callSignCommandValues = extractCallSignCommand(transcription)
|
59 |
else:
|
|
|
77 |
)
|
78 |
|
79 |
#%%
|
80 |
+
file_iface = gr.load(
|
81 |
+
fn = transcribeAndExtract,
|
82 |
+
inputs = [gr.Audio(source='upload', type='filepath', interactive=True),
|
83 |
+
gr.Checkbox(label='Transcribe only', default=False),
|
84 |
+
gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
|
85 |
+
],
|
86 |
+
|
87 |
+
outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
|
88 |
+
title = 'Whisper ATC - Large v3',
|
89 |
+
description = 'Transcribe and extract',
|
90 |
+
)
|
91 |
+
|
92 |
+
mic_iface = gr.load(
|
93 |
+
fn = transcribeAndExtract,
|
94 |
+
inputs = [gr.Audio(source='microphone', type='filepath'),
|
95 |
+
gr.Checkbox(label='Transcribe only', default=False),
|
96 |
+
gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
|
97 |
+
],
|
98 |
+
|
99 |
+
outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
|
100 |
+
title = 'Whisper ATC - Large v3',
|
101 |
+
description = 'Transcribe and extract',
|
102 |
+
)
|
103 |
+
#%%
|
104 |
+
demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
|
105 |
+
demo.launch()
|