Spaces:

jlvdoorn
/

WhisperATC

Running

App Files Files Community

jlvdoorn commited on Aug 31, 2023

Commit

f091ddf

•

1 Parent(s): c5c476a

Fixed to working version using local model

Browse files

Files changed (1) hide show

app.py +32 -31

app.py CHANGED Viewed

@@ -3,51 +3,52 @@ from transformers import pipeline
 import gradio as gr
 import os
 #%%
-whisper = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim', use_auth_token=os.environ['HUGGINGFACE_TOKEN'])
-# bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
 #%%
-def transcribe(audio_file):
-    if audio_file is not None:
         return whisper(audio_file)['text']
     else:
         return 'There was no audio to transcribe...'
 #%%
-# def extractCallSignCommand(transcription):
-#     if type(transcription) is str:
-#         result = bert_atco_ner(transcription)
-#         callsigns = []
-#         commands = []
-#         values = []
-#         for item in result:
-#             if 'callsign' in item['entity']:
-#                 callsigns.append(item['word'])
-#             if 'command' in item['entity']:
-#                 commands.append(item['word'])
-#             if 'value' in item['entity']:
-#                 values.append(item['word'])
-#         return 'Callsigns: ' + ', '.join(callsigns) + '\nCommands: ' + ', '.join(commands) + '\nValues: ' + ', '.join(values)
-#     else:
-#         return 'There was no transcription to extract a callsign or command from...'
 #%%
-# def transcribeAndExtract(audio_mic, audio_file, transcribe_only):
-#     transcription = transcribe(audio_mic, audio_file)
-#     if not transcribe_only:
-#         callSignCommandValues = extractCallSignCommand(transcription)
-#     else:
-#         callSignCommandValues = ''
-#     return transcription, callSignCommandValues
 #%%
 iface = gr.Interface(
-        fn=transcribe,
-        inputs=gr.Audio(source='upload', type='filepath'),
-        outputs=gr.Text(label='Transcription'),
         title='Whisper Large v2 - ATCO2-ASR-ATCOSIM',
         description='This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.',
 )

 import gradio as gr
 import os
 #%%
+whisper = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
+bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
 #%%
+def transcribe(audio_file, audio_mic):
+    if audio_mic is not None:
+        return whisper(audio_mic)['text']
+    elif audio_file is not None:
         return whisper(audio_file)['text']
     else:
         return 'There was no audio to transcribe...'
 #%%
+def extractCallSignCommand(transcription):
+    if type(transcription) is str:
+        result = bert_atco_ner(transcription)
+        callsigns = []
+        commands = []
+        values = []
+        for item in result:
+            if 'callsign' in item['entity']:
+                callsigns.append(item['word'])
+            if 'command' in item['entity']:
+                commands.append(item['word'])
+            if 'value' in item['entity']:
+                values.append(item['word'])
+        return 'Callsigns: ' + ', '.join(callsigns) + '\nCommands: ' + ', '.join(commands) + '\nValues: ' + ', '.join(values)
+    else:
+        return 'There was no transcription to extract a callsign or command from...'
 #%%
+def transcribeAndExtract(audio_mic, audio_file, transcribe_only):
+    transcription = transcribe(audio_mic, audio_file)
+    if not transcribe_only:
+        callSignCommandValues = extractCallSignCommand(transcription)
+    else:
+        callSignCommandValues = ''
+    return transcription, callSignCommandValues
 #%%
 iface = gr.Interface(
+        fn=transcribeAndExtract,
+        inputs=[gr.Audio(source='upload', type='filepath'), gr.Audio(source='microphone', type='filepath'), gr.Checkbox(label='Transcribe only', default=False)],
+        outputs=[gr.Text(label='Transcription'), gr.Text(label='Callsigns, commands and values')],
         title='Whisper Large v2 - ATCO2-ASR-ATCOSIM',
         description='This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.',
 )