Engr-Saeed's picture
Update app.py
e1a0aa9 verified
import os
import gradio as gr
import whisper
from gtts import gTTS
from groq import Groq
# Load the Groq API key from the environment variable
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
raise ValueError("No API key found. Please set the GROQ_API_KEY environment variable.")
# Initialize Whisper and Groq
whisper_model = whisper.load_model("base")
client = Groq(api_key=api_key)
def chatbot(audio_input):
try:
# Debug: Check the type and content of audio_input
print(f"Audio input type: {type(audio_input)}")
if audio_input is None:
raise ValueError("Audio input is None. Please provide a valid audio file.")
# Debug: Check if the file exists
if not os.path.exists(audio_input):
raise FileNotFoundError(f"Audio file {audio_input} not found.")
# Step 1: Load and transcribe audio input using Whisper
audio = whisper.load_audio(audio_input)
transcription_result = whisper_model.transcribe(audio)
if transcription_result is None or not transcription_result.get("text"):
raise ValueError("Whisper transcription failed or returned empty text.")
transcription = transcription_result["text"]
# Step 2: Generate response using LLaMA 8B model via Groq API
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": transcription,
}
],
model="llama3-8b-8192",
)
# Check if the response from Groq is valid
if chat_completion and chat_completion.choices:
response_text = chat_completion.choices[0].message.content
else:
raise ValueError("Invalid response from Groq API")
# Step 3: Convert text response to speech using GTTS
if response_text.strip():
tts = gTTS(response_text)
tts.save("response.mp3")
else:
raise ValueError("Response text is empty or invalid")
# Step 4: Return the response audio and text transcription
return "response.mp3", transcription, response_text
except Exception as e:
# Handle and display the error
return None, transcription if 'transcription' in locals() else None, f"Error: {str(e)}"
# Define the Gradio interface
interface = gr.Interface(
fn=chatbot,
inputs=gr.Audio(type="filepath"),
outputs=[gr.Audio(type="filepath"), "text", "text"],
title="Voice-to-Voice Chatbot",
description="Speak to the chatbot and get a real-time response.",
live=True # Automatically processes input without requiring a button click
)
# Launch the Gradio app
if __name__ == "__main__":
interface.launch()