Spaces:

ayaanzaveri
/

whisper-lit

Runtime error

App Files Files Community

ayaanzaveri commited on Apr 2, 2023

Commit

b144145

1 Parent(s): e9c4b9d

Add application file

Browse files

Files changed (1) hide show

app.py +114 -0

app.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import os
+import whisper
+import streamlit as st
+from pydub import AudioSegment
+st.set_page_config(
+    page_title="Whisper based ASR",
+    page_icon="musical_note",
+    layout="wide",
+    initial_sidebar_state="auto",
+)
+audio_tags = {'comments': 'Converted using pydub!'}
+upload_path = "uploads/"
+download_path = "downloads/"
+transcript_path = "transcripts/"
+@st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
+def to_mp3(audio_file, output_audio_file, upload_path, download_path):
+    ## Converting Different Audio Formats To MP3 ##
+    if audio_file.name.split('.')[-1].lower()=="wav":
+        audio_data = AudioSegment.from_wav(os.path.join(upload_path,audio_file.name))
+        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
+    elif audio_file.name.split('.')[-1].lower()=="mp3":
+        audio_data = AudioSegment.from_mp3(os.path.join(upload_path,audio_file.name))
+        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
+    elif audio_file.name.split('.')[-1].lower()=="ogg":
+        audio_data = AudioSegment.from_ogg(os.path.join(upload_path,audio_file.name))
+        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
+    elif audio_file.name.split('.')[-1].lower()=="wma":
+        audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"wma")
+        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
+    elif audio_file.name.split('.')[-1].lower()=="aac":
+        audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"aac")
+        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
+    elif audio_file.name.split('.')[-1].lower()=="flac":
+        audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"flac")
+        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
+    elif audio_file.name.split('.')[-1].lower()=="flv":
+        audio_data = AudioSegment.from_flv(os.path.join(upload_path,audio_file.name))
+        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
+    elif audio_file.name.split('.')[-1].lower()=="mp4":
+        audio_data = AudioSegment.from_file(os.path.join(upload_path,audio_file.name),"mp4")
+        audio_data.export(os.path.join(download_path,output_audio_file), format="mp3", tags=audio_tags)
+    return output_audio_file
+@st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
+def process_audio(filename, model_type):
+    model = whisper.load_model(model_type)
+    result = model.transcribe(filename)
+    return result["text"]
+@st.cache(persist=True,allow_output_mutation=False,show_spinner=True,suppress_st_warning=True)
+def save_transcript(transcript_data, txt_file):
+    with open(os.path.join(transcript_path, txt_file),"w") as f:
+        f.write(transcript_data)
+st.title("🗣 Automatic Speech Recognition using whisper by OpenAI ✨")
+st.info('✨ Supports all popular audio formats - WAV, MP3, MP4, OGG, WMA, AAC, FLAC, FLV 😉')
+uploaded_file = st.file_uploader("Upload audio file", type=["wav","mp3","ogg","wma","aac","flac","mp4","flv"])
+audio_file = None
+if uploaded_file is not None:
+    audio_bytes = uploaded_file.read()
+    with open(os.path.join(upload_path,uploaded_file.name),"wb") as f:
+        f.write((uploaded_file).getbuffer())
+    with st.spinner(f"Processing Audio ... 💫"):
+        output_audio_file = uploaded_file.name.split('.')[0] + '.mp3'
+        output_audio_file = to_mp3(uploaded_file, output_audio_file, upload_path, download_path)
+        audio_file = open(os.path.join(download_path,output_audio_file), 'rb')
+        audio_bytes = audio_file.read()
+    print("Opening ",audio_file)
+    st.markdown("---")
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("Feel free to play your uploaded audio file 🎼")
+        st.audio(audio_bytes)
+    with col2:
+        whisper_model_type = st.radio("Please choose your model type", ('Tiny', 'Base', 'Small', 'Medium', 'Large'))
+    if st.button("Generate Transcript"):
+        with st.spinner(f"Generating Transcript... 💫"):
+            transcript = process_audio(str(os.path.abspath(os.path.join(download_path,output_audio_file))), whisper_model_type.lower())
+            output_txt_file = str(output_audio_file.split('.')[0]+".txt")
+            save_transcript(transcript, output_txt_file)
+            output_file = open(os.path.join(transcript_path,output_txt_file),"r")
+            output_file_data = output_file.read()
+        if st.download_button(
+                             label="Download Transcript 📝",
+                             data=output_file_data,
+                             file_name=output_txt_file,
+                             mime='text/plain'
+                         ):
+            st.balloons()
+            st.success('✅ Download Successful !!')
+else:
+    st.warning('⚠ Please upload your audio file 😯')
+st.markdown("<br><hr><center>Made with ❤️ by <a href='mailto:[email protected]?subject=ASR Whisper WebApp!&body=Please specify the issue you are facing with the app.'><strong>Prateek Ralhan</strong></a> with the help of [whisper](https://github.com/openai/whisper) built by [OpenAI](https://github.com/openai) ✨</center><hr>", unsafe_allow_html=True)