Spaces:
Running
Running
import os | |
import gradio as gr | |
import whisper | |
from gtts import gTTS | |
from groq import Groq | |
# Load the Groq API key from the environment variable | |
api_key = os.getenv("GROQ_API_KEY") | |
if not api_key: | |
raise ValueError("No API key found. Please set the GROQ_API_KEY environment variable.") | |
# Initialize Whisper and Groq | |
whisper_model = whisper.load_model("base") | |
client = Groq(api_key=api_key) | |
def chatbot(audio_input): | |
try: | |
# Debug: Check the type and content of audio_input | |
print(f"Audio input type: {type(audio_input)}") | |
if audio_input is None: | |
raise ValueError("Audio input is None. Please provide a valid audio file.") | |
# Debug: Check if the file exists | |
if not os.path.exists(audio_input): | |
raise FileNotFoundError(f"Audio file {audio_input} not found.") | |
# Step 1: Load and transcribe audio input using Whisper | |
audio = whisper.load_audio(audio_input) | |
transcription_result = whisper_model.transcribe(audio) | |
if transcription_result is None or not transcription_result.get("text"): | |
raise ValueError("Whisper transcription failed or returned empty text.") | |
transcription = transcription_result["text"] | |
# Step 2: Generate response using LLaMA 8B model via Groq API | |
chat_completion = client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": transcription, | |
} | |
], | |
model="llama3-8b-8192", | |
) | |
# Check if the response from Groq is valid | |
if chat_completion and chat_completion.choices: | |
response_text = chat_completion.choices[0].message.content | |
else: | |
raise ValueError("Invalid response from Groq API") | |
# Step 3: Convert text response to speech using GTTS | |
if response_text.strip(): | |
tts = gTTS(response_text) | |
tts.save("response.mp3") | |
else: | |
raise ValueError("Response text is empty or invalid") | |
# Step 4: Return the response audio and text transcription | |
return "response.mp3", transcription, response_text | |
except Exception as e: | |
# Handle and display the error | |
return None, transcription if 'transcription' in locals() else None, f"Error: {str(e)}" | |
# Define the Gradio interface | |
interface = gr.Interface( | |
fn=chatbot, | |
inputs=gr.Audio(type="filepath"), | |
outputs=[gr.Audio(type="filepath"), "text", "text"], | |
title="Voice-to-Voice Chatbot", | |
description="Speak to the chatbot and get a real-time response.", | |
live=True # Automatically processes input without requiring a button click | |
) | |
# Launch the Gradio app | |
if __name__ == "__main__": | |
interface.launch() | |