import os import tempfile import numpy as np import gradio as gr import whisper from gtts import gTTS from groq import Groq import soundfile as sf # Set up Groq API key os.environ['GROQ_API_KEY'] = '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' groq_client = Groq(api_key=os.environ.get('GROQ_API_KEY')) # Load Whisper model whisper_model = whisper.load_model("base") def process_audio(audio_file_path): try: # Ensure audio_file_path is valid if not audio_file_path: raise ValueError("No audio file provided") print(f"Received audio file path: {audio_file_path}") # Read the audio file from the file path with open(audio_file_path, 'rb') as f: audio_data = f.read() # Save the audio data to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio_file: temp_audio_path = temp_audio_file.name temp_audio_file.write(audio_data) # Ensure the temporary file is properly closed before processing temp_audio_file.close() # Transcribe audio using Whisper result = whisper_model.transcribe(temp_audio_path) user_text = result['text'] print(f"Transcribed text: {user_text}") # Generate response using Llama 8b model with Groq API chat_completion = groq_client.chat.completions.create( messages=[ { "role": "user", "content": user_text, } ], model="llama3-8b-8192", ) response_text = chat_completion.choices[0].message.content print(f"Response text: {response_text}") # Convert response text to speech using gTTS tts = gTTS(text=response_text, lang='en') with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio_file: response_audio_path = temp_audio_file.name tts.save(response_audio_path) # Ensure the temporary file is properly closed before returning the path temp_audio_file.close() return response_text, response_audio_path except Exception as e: return f"Error: {str(e)}", None # Create Gradio interface with updated layout with gr.Blocks() as demo: gr.Markdown( """ """ ) gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool") gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.") with gr.Row(): with gr.Column(): audio_input = gr.Audio(type="filepath", label="Upload Audio File") submit_button = gr.Button("Submit") with gr.Column(): response_text = gr.Textbox(label="Response Text", placeholder="Generated response will appear here") response_audio = gr.Audio(label="Response Audio", type="filepath") submit_button.click(process_audio, inputs=audio_input, outputs=[response_text, response_audio]) # Launch the Gradio app demo.launch()