jlvdoorn commited on
Commit
f091ddf
1 Parent(s): c5c476a

Fixed to working version using local model

Browse files
Files changed (1) hide show
  1. app.py +32 -31
app.py CHANGED
@@ -3,51 +3,52 @@ from transformers import pipeline
3
  import gradio as gr
4
  import os
5
 
6
-
7
  #%%
8
- whisper = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim', use_auth_token=os.environ['HUGGINGFACE_TOKEN'])
9
- # bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
10
 
11
  #%%
12
- def transcribe(audio_file):
13
- if audio_file is not None:
 
 
14
  return whisper(audio_file)['text']
15
  else:
16
  return 'There was no audio to transcribe...'
17
 
18
  #%%
19
- # def extractCallSignCommand(transcription):
20
- # if type(transcription) is str:
21
- # result = bert_atco_ner(transcription)
22
- # callsigns = []
23
- # commands = []
24
- # values = []
25
- # for item in result:
26
- # if 'callsign' in item['entity']:
27
- # callsigns.append(item['word'])
28
- # if 'command' in item['entity']:
29
- # commands.append(item['word'])
30
- # if 'value' in item['entity']:
31
- # values.append(item['word'])
32
 
33
- # return 'Callsigns: ' + ', '.join(callsigns) + '\nCommands: ' + ', '.join(commands) + '\nValues: ' + ', '.join(values)
34
- # else:
35
- # return 'There was no transcription to extract a callsign or command from...'
36
 
37
  #%%
38
- # def transcribeAndExtract(audio_mic, audio_file, transcribe_only):
39
- # transcription = transcribe(audio_mic, audio_file)
40
- # if not transcribe_only:
41
- # callSignCommandValues = extractCallSignCommand(transcription)
42
- # else:
43
- # callSignCommandValues = ''
44
- # return transcription, callSignCommandValues
45
 
46
  #%%
47
  iface = gr.Interface(
48
- fn=transcribe,
49
- inputs=gr.Audio(source='upload', type='filepath'),
50
- outputs=gr.Text(label='Transcription'),
51
  title='Whisper Large v2 - ATCO2-ASR-ATCOSIM',
52
  description='This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.',
53
  )
 
3
  import gradio as gr
4
  import os
5
 
 
6
  #%%
7
+ whisper = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
8
+ bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
9
 
10
  #%%
11
+ def transcribe(audio_file, audio_mic):
12
+ if audio_mic is not None:
13
+ return whisper(audio_mic)['text']
14
+ elif audio_file is not None:
15
  return whisper(audio_file)['text']
16
  else:
17
  return 'There was no audio to transcribe...'
18
 
19
  #%%
20
+ def extractCallSignCommand(transcription):
21
+ if type(transcription) is str:
22
+ result = bert_atco_ner(transcription)
23
+ callsigns = []
24
+ commands = []
25
+ values = []
26
+ for item in result:
27
+ if 'callsign' in item['entity']:
28
+ callsigns.append(item['word'])
29
+ if 'command' in item['entity']:
30
+ commands.append(item['word'])
31
+ if 'value' in item['entity']:
32
+ values.append(item['word'])
33
 
34
+ return 'Callsigns: ' + ', '.join(callsigns) + '\nCommands: ' + ', '.join(commands) + '\nValues: ' + ', '.join(values)
35
+ else:
36
+ return 'There was no transcription to extract a callsign or command from...'
37
 
38
  #%%
39
+ def transcribeAndExtract(audio_mic, audio_file, transcribe_only):
40
+ transcription = transcribe(audio_mic, audio_file)
41
+ if not transcribe_only:
42
+ callSignCommandValues = extractCallSignCommand(transcription)
43
+ else:
44
+ callSignCommandValues = ''
45
+ return transcription, callSignCommandValues
46
 
47
  #%%
48
  iface = gr.Interface(
49
+ fn=transcribeAndExtract,
50
+ inputs=[gr.Audio(source='upload', type='filepath'), gr.Audio(source='microphone', type='filepath'), gr.Checkbox(label='Transcribe only', default=False)],
51
+ outputs=[gr.Text(label='Transcription'), gr.Text(label='Callsigns, commands and values')],
52
  title='Whisper Large v2 - ATCO2-ASR-ATCOSIM',
53
  description='This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.',
54
  )