Jan van Doorn commited on
Commit
ab2b897
1 Parent(s): bffc737
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -1,10 +1,13 @@
 
1
  from transformers import pipeline
2
  import gradio as gr
3
  import os
4
 
5
- whisper = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim', use_auth_token=os.environ['HUGGINGFACE_TOKEN'])
 
6
  bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
7
 
 
8
  def transcribe(audio_mic, audio_file):
9
  if audio_file is not None:
10
  return whisper(audio_file)['text']
@@ -13,6 +16,7 @@ def transcribe(audio_mic, audio_file):
13
  else:
14
  return 'There was no audio to transcribe...'
15
 
 
16
  def extractCallSignCommand(transcription):
17
  if type(transcription) is str:
18
  result = bert_atco_ner(transcription)
@@ -31,6 +35,7 @@ def extractCallSignCommand(transcription):
31
  else:
32
  return 'There was no transcription to extract a callsign or command from...'
33
 
 
34
  def transcribeAndExtract(audio_mic, audio_file, transcribe_only):
35
  transcription = transcribe(audio_mic, audio_file)
36
  if not transcribe_only:
@@ -39,12 +44,14 @@ def transcribeAndExtract(audio_mic, audio_file, transcribe_only):
39
  callSignCommandValues = ''
40
  return transcription, callSignCommandValues
41
 
 
42
  iface = gr.Interface(
43
  fn=transcribeAndExtract,
44
  inputs=[gr.Audio(source='microphone', type='filepath'), gr.Audio(source='upload', type='filepath'), gr.Checkbox(label='Transcribe only', default=False)],
45
  outputs=[gr.Text(label='Transcription'), gr.Text(label='Callsigns, commands and values')],
46
  title='Whisper Large v2 - ATCO2-ASR-ATCOSIM',
47
- description='Whisper Large v2 model fine-tuned on the ATCO2-ASR and ATCOSIM datasets.'
48
  )
49
 
 
50
  iface.launch()
 
1
+ #%%
2
  from transformers import pipeline
3
  import gradio as gr
4
  import os
5
 
6
+ #%%
7
+ whisper = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim', token=os.environ['HUGGINGFACE_TOKEN'])
8
  bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
9
 
10
+ #%%
11
  def transcribe(audio_mic, audio_file):
12
  if audio_file is not None:
13
  return whisper(audio_file)['text']
 
16
  else:
17
  return 'There was no audio to transcribe...'
18
 
19
+ #%%
20
  def extractCallSignCommand(transcription):
21
  if type(transcription) is str:
22
  result = bert_atco_ner(transcription)
 
35
  else:
36
  return 'There was no transcription to extract a callsign or command from...'
37
 
38
+ #%%
39
  def transcribeAndExtract(audio_mic, audio_file, transcribe_only):
40
  transcription = transcribe(audio_mic, audio_file)
41
  if not transcribe_only:
 
44
  callSignCommandValues = ''
45
  return transcription, callSignCommandValues
46
 
47
+ #%%
48
  iface = gr.Interface(
49
  fn=transcribeAndExtract,
50
  inputs=[gr.Audio(source='microphone', type='filepath'), gr.Audio(source='upload', type='filepath'), gr.Checkbox(label='Transcribe only', default=False)],
51
  outputs=[gr.Text(label='Transcription'), gr.Text(label='Callsigns, commands and values')],
52
  title='Whisper Large v2 - ATCO2-ASR-ATCOSIM',
53
+ description='This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.',
54
  )
55
 
56
+ #%%
57
  iface.launch()