Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
import whisper | |
from gtts import gTTS | |
import io | |
from groq import Groq | |
# Initialize the Groq client | |
client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
# Load the Whisper model | |
model = whisper.load_model("base") | |
def process_audio(file_path): | |
try: | |
# Load the audio file | |
audio = whisper.load_audio(file_path) | |
# Transcribe the audio using Whisper | |
result = model.transcribe(audio) | |
text = result["text"] | |
# Generate a response using Groq | |
chat_completion = client.chat.completions.create( | |
messages=[{"role": "user", "content": text}], | |
model="llama3-8b-8192", # Replace with the correct model if necessary | |
) | |
# Access the response using dot notation | |
response_message = chat_completion.choices[0].message.content.strip() | |
# Convert the response text to speech | |
tts = gTTS(response_message) | |
response_audio_io = io.BytesIO() | |
tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object | |
response_audio_io.seek(0) | |
# Save audio to a file to ensure it's generated correctly | |
response_audio_path = "response.mp3" | |
with open(response_audio_path, "wb") as audio_file: | |
audio_file.write(response_audio_io.getvalue()) | |
# Return the response text and the path to the saved audio file | |
return response_message, response_audio_path | |
except Exception as e: | |
return f"An error occurred: {e}", None | |
# Create the Gradio interface with customized UI | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
<style> | |
.gradio-container { | |
font-family: Arial, sans-serif; | |
background-color: #f0f4c3; /* Light green background color */ | |
border-radius: 10px; | |
padding: 20px; | |
box-shadow: 0 4px 12px rgba(0,0,0,0.2); | |
text-align: center; | |
} | |
.gradio-input, .gradio-output { | |
border-radius: 6px; | |
border: 1px solid #ddd; | |
padding: 10px; | |
} | |
.gradio-button { | |
background-color: #ff7043; | |
color: white; | |
border-radius: 6px; | |
border: none; | |
padding: 10px 20px; /* Adjusted padding */ | |
font-size: 16px; /* Adjusted font size */ | |
cursor: pointer; | |
} | |
.gradio-button:hover { | |
background-color: #e64a19; | |
} | |
.gradio-title { | |
font-size: 28px; | |
font-weight: bold; | |
margin-bottom: 20px; | |
color: #37474f; | |
} | |
.gradio-description { | |
font-size: 16px; | |
margin-bottom: 20px; | |
color: #616161; | |
} | |
</style> | |
""" | |
) | |
gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool ❤️") | |
gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.") | |
with gr.Row(): | |
with gr.Column(): | |
audio_input = gr.Audio(type="filepath", label="Upload Audio File") | |
submit_button = gr.Button("Submit") | |
with gr.Column(): | |
response_text = gr.Textbox(label="Response Text", placeholder="The AI-generated response will appear here", lines=5) | |
response_audio = gr.Audio(label="Response Audio", type="filepath") | |
# Link the submit button to the process_audio function | |
submit_button.click(fn=process_audio, inputs=audio_input, outputs=[response_text, response_audio]) | |
# Launch the Gradio app | |
demo.launch() | |