Spaces:

Amarsaish
/

speech-to-text

Running

File size: 7,753 Bytes

# # import os
# # import streamlit as st
# # from pydub import AudioSegment
# # from groq import Groq

# # # Set ffmpeg path
# # ffmpeg_path = r"ffmpeg.exe"
# # os.environ["PATH"] += os.pathsep + os.path.dirname(ffmpeg_path)
# # AudioSegment.converter = ffmpeg_path

# # # Groq API configuration
# # groq_api_key = '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'  # Replace with your actual API key
# # client = Groq(api_key=groq_api_key)
# # model = 'whisper-large-v3'

# # # Function to ensure the file is in a suitable format
# # def ensure_suitable_format(file_path):
# #     allowed_formats = ["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"]
# #     file_extension = file_path.split('.')[-1].lower()
# #     if file_extension not in allowed_formats:
# #         new_file_path = f"{os.path.splitext(file_path)[0]}.wav"
# #         os.rename(file_path, new_file_path)
# #         return new_file_path
# #     return file_path

# # # Function to convert audio to WAV
# # def convert_audio_to_wav(input_path, output_path):
# #     audio = AudioSegment.from_file(input_path)
# #     audio.export(output_path, format="wav")
# #     return output_path

# # # Function to transcribe audio using Groq
# # def audio_to_text(filepath):
# #     with open(filepath, "rb") as file:
# #         translation = client.audio.translations.create(
# #             file=(filepath, file.read()),
# #             model=model,
# #         )
# #     return translation.text

# # # Streamlit App UI
# # st.title("Audio-to-Text Transcription")
# # st.write("Upload an audio file to get the transcribed text.")

# # # File upload
# # uploaded_file = st.file_uploader("Upload your audio file", type=["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"])

# # if uploaded_file:
# #     # Save the uploaded file locally
# #     file_path = os.path.join("uploaded_audio", uploaded_file.name)
# #     os.makedirs("uploaded_audio", exist_ok=True)
# #     with open(file_path, "wb") as f:
# #         f.write(uploaded_file.getbuffer())
    
# #     st.write(f"File uploaded: {uploaded_file.name}")

# #     # Ensure file format is suitable
# #     suitable_audio_path = ensure_suitable_format(file_path)

# #     # Convert audio to WAV
# #     wav_path = f"{os.path.splitext(suitable_audio_path)[0]}.wav"
# #     converted_audio = convert_audio_to_wav(suitable_audio_path, wav_path)

# #     # Transcribe audio
# #     st.write("Processing transcription...")
# #     try:
# #         transcription = audio_to_text(converted_audio)
# #         st.success("Transcription complete!")
# #         st.text_area("Transcribed Text", transcription, height=200)
# #     except Exception as e:
# #         st.error(f"Error during transcription: {e}")


# from pydub import AudioSegment
# from groq import Groq
# import os
# import streamlit as st

# # Groq API configuration
# groq_api_key = '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'  # Replace with your actual API key
# client = Groq(api_key=groq_api_key)
# model = 'whisper-large-v3'

# # Function to ensure the file is in a suitable format
# def ensure_suitable_format(file_path):
#     allowed_formats = ["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"]
#     file_extension = file_path.split('.')[-1].lower()
#     if file_extension not in allowed_formats:
#         new_file_path = f"{os.path.splitext(file_path)[0]}.wav"
#         os.rename(file_path, new_file_path)
#         return new_file_path
#     return file_path

# # Function to convert audio to WAV
# def convert_audio_to_wav(input_path, output_path):
#     audio = AudioSegment.from_file(input_path)
#     audio.export(output_path, format="wav")
#     return output_path

# # Function to transcribe audio using Groq
# def audio_to_text(filepath):
#     with open(filepath, "rb") as file:
#         translation = client.audio.translations.create(
#             file=(filepath, file.read()),
#             model=model,
#         )
#     return translation.text

# # Streamlit App UI
# st.title("Audio-to-Text Transcription")
# st.write("Upload an audio file to get the transcribed text.")

# # File upload
# uploaded_file = st.file_uploader("Upload your audio file", type=["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"])

# if uploaded_file:
#     # Save the uploaded file locally
#     file_path = os.path.join("uploaded_audio", uploaded_file.name)
#     os.makedirs("uploaded_audio", exist_ok=True)
#     with open(file_path, "wb") as f:
#         f.write(uploaded_file.getbuffer())
    
#     st.write(f"File uploaded: {uploaded_file.name}")

#     # Ensure file format is suitable
#     suitable_audio_path = ensure_suitable_format(file_path)

#     # Convert audio to WAV
#     wav_path = f"{os.path.splitext(suitable_audio_path)[0]}.wav"
#     converted_audio = convert_audio_to_wav(suitable_audio_path, wav_path)

#     # Transcribe audio
#     st.write("Processing transcription...")
#     try:
#         transcription = audio_to_text(converted_audio)
#         st.success("Transcription complete!")
#         st.text_area("Transcribed Text", transcription, height=200)
#     except Exception as e:
#         st.error(f"Error during transcription: {e}")

import os
import streamlit as st
from pydub import AudioSegment
from groq import Groq

# Function to ensure the file is in a suitable format
def ensure_suitable_format(file_path):
    allowed_formats = ["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"]
    file_extension = file_path.split('.')[-1].lower()
    if file_extension not in allowed_formats:
        new_file_path = f"{os.path.splitext(file_path)[0]}.wav"
        os.rename(file_path, new_file_path)
        return new_file_path
    return file_path

# Function to convert audio to WAV
def convert_audio_to_wav(input_path, output_path):
    audio = AudioSegment.from_file(input_path)
    audio.export(output_path, format="wav")
    return output_path

# Function to transcribe audio using Groq
def audio_to_text(filepath, groq_api_key):
    client = Groq(api_key=groq_api_key)
    model = 'whisper-large-v3'
    with open(filepath, "rb") as file:
        translation = client.audio.translations.create(
            file=(filepath, file.read()),
            model=model,
        )
    return translation.text

# Streamlit App UI
st.title("Audio-to-Text Transcription")
st.write("Upload an audio file to get the transcribed text.")

# Input for API key
groq_api_key = st.text_input("Enter your Groq API Key", type="password")

# File upload
uploaded_file = st.file_uploader("Upload your audio file", type=["flac", "mp3", "mp4", "mpeg", "mpga", "m4a", "ogg", "opus", "wav", "webm"])

if groq_api_key and uploaded_file:
    # Save the uploaded file locally
    file_path = os.path.join("uploaded_audio", uploaded_file.name)
    os.makedirs("uploaded_audio", exist_ok=True)
    with open(file_path, "wb") as f:
        f.write(uploaded_file.getbuffer())
    
    st.write(f"File uploaded: {uploaded_file.name}")

    # Ensure file format is suitable
    suitable_audio_path = ensure_suitable_format(file_path)

    # Convert audio to WAV
    wav_path = f"{os.path.splitext(suitable_audio_path)[0]}.wav"
    converted_audio = convert_audio_to_wav(suitable_audio_path, wav_path)

    # Transcribe audio
    st.write("Processing transcription...")
    try:
        transcription = audio_to_text(converted_audio, groq_api_key)
        st.success("Transcription complete!")
        st.text_area("Transcribed Text", transcription, height=200)
    except Exception as e:
        st.error(f"Error during transcription: {e}")
elif not groq_api_key:
    st.warning("Please enter your Groq API Key to proceed.")