Spaces:

jlvdoorn
/

WhisperATC

Running

App Files Files Community

Jan van Doorn commited on Aug 30, 2023

Commit

ab2b897

•

1 Parent(s): bffc737

commit

Browse files

Files changed (1) hide show

app.py +9 -2

app.py CHANGED Viewed

@@ -1,10 +1,13 @@
 from transformers import pipeline
 import gradio as gr
 import os
-whisper = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim', use_auth_token=os.environ['HUGGINGFACE_TOKEN'])
 bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
 def transcribe(audio_mic, audio_file):
     if audio_file is not None:
         return whisper(audio_file)['text']
@@ -13,6 +16,7 @@ def transcribe(audio_mic, audio_file):
     else:
         return 'There was no audio to transcribe...'
 def extractCallSignCommand(transcription):
     if type(transcription) is str:
         result = bert_atco_ner(transcription)
@@ -31,6 +35,7 @@ def extractCallSignCommand(transcription):
     else:
         return 'There was no transcription to extract a callsign or command from...'
 def transcribeAndExtract(audio_mic, audio_file, transcribe_only):
     transcription = transcribe(audio_mic, audio_file)
     if not transcribe_only:
@@ -39,12 +44,14 @@ def transcribeAndExtract(audio_mic, audio_file, transcribe_only):
         callSignCommandValues = ''
     return transcription, callSignCommandValues
 iface = gr.Interface(
         fn=transcribeAndExtract,
         inputs=[gr.Audio(source='microphone', type='filepath'), gr.Audio(source='upload', type='filepath'), gr.Checkbox(label='Transcribe only', default=False)],
         outputs=[gr.Text(label='Transcription'), gr.Text(label='Callsigns, commands and values')],
         title='Whisper Large v2 - ATCO2-ASR-ATCOSIM',
-        description='Whisper Large v2 model fine-tuned on the ATCO2-ASR and ATCOSIM datasets.'
 )
 iface.launch()

+#%%
 from transformers import pipeline
 import gradio as gr
 import os
+#%%
+whisper = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim', token=os.environ['HUGGINGFACE_TOKEN'])
 bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
+#%%
 def transcribe(audio_mic, audio_file):
     if audio_file is not None:
         return whisper(audio_file)['text']
     else:
         return 'There was no audio to transcribe...'
+#%%
 def extractCallSignCommand(transcription):
     if type(transcription) is str:
         result = bert_atco_ner(transcription)
     else:
         return 'There was no transcription to extract a callsign or command from...'
+#%%
 def transcribeAndExtract(audio_mic, audio_file, transcribe_only):
     transcription = transcribe(audio_mic, audio_file)
     if not transcribe_only:
         callSignCommandValues = ''
     return transcription, callSignCommandValues
+#%%
 iface = gr.Interface(
         fn=transcribeAndExtract,
         inputs=[gr.Audio(source='microphone', type='filepath'), gr.Audio(source='upload', type='filepath'), gr.Checkbox(label='Transcribe only', default=False)],
         outputs=[gr.Text(label='Transcription'), gr.Text(label='Callsigns, commands and values')],
         title='Whisper Large v2 - ATCO2-ASR-ATCOSIM',
+        description='This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.',
 )
+#%%
 iface.launch()