jlvdoorn commited on
Commit
dcdf98f
1 Parent(s): fac5461
Files changed (1) hide show
  1. app.py +31 -9
app.py CHANGED
@@ -18,7 +18,7 @@ bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
18
  whisper_v2 = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
19
  whisper_v3 = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
20
  #%%
21
- def transcribe(audio_file, audio_mic, model_version):
22
  if model_version == 'large-v2':
23
  whisper = whisper_v2
24
  ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
@@ -27,10 +27,8 @@ def transcribe(audio_file, audio_mic, model_version):
27
  whisper = whisper_v3
28
  ttl = 'Whisper Large v3 - ATCO2-ATCOSIM'
29
  dis = 'This demo will transcribe ATC audio files by using the Whisper Large v3 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
30
- if audio_mic is not None:
31
- return whisper(audio_mic)['text']
32
- elif audio_file is not None:
33
- return whisper(audio_file)['text']
34
  else:
35
  return 'There was no audio to transcribe...'
36
 
@@ -54,8 +52,8 @@ def extractCallSignCommand(transcription):
54
  return 'There was no transcription to extract a callsign or command from...'
55
 
56
  #%%
57
- def transcribeAndExtract(audio_file, audio_mic, transcribe_only, model_version):
58
- transcription = transcribe(audio_file, audio_mic, model_version)
59
  if not transcribe_only:
60
  callSignCommandValues = extractCallSignCommand(transcription)
61
  else:
@@ -79,5 +77,29 @@ iface = gr.Interface(
79
  )
80
 
81
  #%%
82
- #iface.launch(server_name='0.0.0.0', server_port=9000)
83
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  whisper_v2 = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
19
  whisper_v3 = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
20
  #%%
21
+ def transcribe(audio, model_version):
22
  if model_version == 'large-v2':
23
  whisper = whisper_v2
24
  ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
 
27
  whisper = whisper_v3
28
  ttl = 'Whisper Large v3 - ATCO2-ATCOSIM'
29
  dis = 'This demo will transcribe ATC audio files by using the Whisper Large v3 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
30
+ if audio is not None:
31
+ return whisper(audio)['text']
 
 
32
  else:
33
  return 'There was no audio to transcribe...'
34
 
 
52
  return 'There was no transcription to extract a callsign or command from...'
53
 
54
  #%%
55
+ def transcribeAndExtract(audio, transcribe_only, model_version):
56
+ transcription = transcribe(audio, model_version)
57
  if not transcribe_only:
58
  callSignCommandValues = extractCallSignCommand(transcription)
59
  else:
 
77
  )
78
 
79
  #%%
80
+ file_iface = gr.load(
81
+ fn = transcribeAndExtract,
82
+ inputs = [gr.Audio(source='upload', type='filepath', interactive=True),
83
+ gr.Checkbox(label='Transcribe only', default=False),
84
+ gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
85
+ ],
86
+
87
+ outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
88
+ title = 'Whisper ATC - Large v3',
89
+ description = 'Transcribe and extract',
90
+ )
91
+
92
+ mic_iface = gr.load(
93
+ fn = transcribeAndExtract,
94
+ inputs = [gr.Audio(source='microphone', type='filepath'),
95
+ gr.Checkbox(label='Transcribe only', default=False),
96
+ gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version')
97
+ ],
98
+
99
+ outputs = [gr.Textbox(label='Transcription'), gr.Textbox(label='Callsigns, commands and values')],
100
+ title = 'Whisper ATC - Large v3',
101
+ description = 'Transcribe and extract',
102
+ )
103
+ #%%
104
+ demo = gr.TabbedInterface([file_iface, mic_iface], ["File", "Microphone"])
105
+ demo.launch()