|
import gradio as gr |
|
import groq |
|
import io |
|
import numpy as np |
|
import soundfile as sf |
|
import requests |
|
|
|
|
|
def transcribe_audio(audio, api_key): |
|
if audio is None: |
|
return "" |
|
|
|
client = groq.Client(api_key=api_key) |
|
|
|
|
|
audio_data = audio[1] |
|
buffer = io.BytesIO() |
|
sf.write(buffer, audio_data, audio[0], format='wav') |
|
buffer.seek(0) |
|
|
|
try: |
|
|
|
completion = client.audio.transcriptions.create( |
|
model="distil-whisper-large-v3-en", |
|
file=("audio.wav", buffer), |
|
response_format="text" |
|
) |
|
return completion.get('text', '') |
|
except Exception as e: |
|
return f"Error in transcription: {str(e)}" |
|
|
|
|
|
def generate_response(transcription, api_key): |
|
if not transcription: |
|
return "No transcription available. Please try speaking again." |
|
|
|
client = groq.Client(api_key=api_key) |
|
|
|
try: |
|
|
|
completion = client.chat.completions.create( |
|
model="llama3-70b-8192", |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant."}, |
|
{"role": "user", "content": transcription} |
|
], |
|
) |
|
return completion.choices[0].message['content'] |
|
except Exception as e: |
|
return f"Error in response generation: {str(e)}" |
|
|
|
|
|
def text_to_speech(text, tts_api_key): |
|
url = "https://api.voicerss.org/" |
|
params = { |
|
'key': tts_api_key, |
|
'src': text, |
|
'hl': 'en-us', |
|
'r': '0', |
|
'c': 'mp3', |
|
'f': '48khz_16bit_stereo' |
|
} |
|
|
|
try: |
|
response = requests.get(url, params=params) |
|
if response.status_code == 200: |
|
return response.content |
|
else: |
|
return f"Error in TTS conversion: {response.status_code}" |
|
except Exception as e: |
|
return f"Error in TTS conversion: {str(e)}" |
|
|
|
|
|
def process_audio(audio, groq_api_key, tts_api_key): |
|
if not groq_api_key: |
|
return "Please enter your Groq API key.", "API key is required.", None |
|
|
|
transcription = transcribe_audio(audio, groq_api_key) |
|
response = generate_response(transcription, groq_api_key) |
|
|
|
|
|
audio_response = text_to_speech(response, tts_api_key) |
|
|
|
return transcription, response, audio_response |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Default()) as demo: |
|
gr.Markdown("# ποΈ Groq x Gradio Voice-Powered AI Assistant with TTS") |
|
|
|
api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key") |
|
tts_api_key_input = gr.Textbox(type="password", label="Enter your VoiceRSS API Key") |
|
|
|
with gr.Row(): |
|
audio_input = gr.Audio(label="Speak!", type="numpy") |
|
|
|
with gr.Row(): |
|
transcription_output = gr.Textbox(label="Transcription") |
|
response_output = gr.Textbox(label="AI Assistant Response") |
|
|
|
audio_output = gr.Audio(label="AI Response (Audio)", type="auto") |
|
|
|
submit_button = gr.Button("Process", variant="primary") |
|
|
|
submit_button.click( |
|
process_audio, |
|
inputs=[audio_input, api_key_input, tts_api_key_input], |
|
outputs=[transcription_output, response_output, audio_output] |
|
) |
|
|
|
demo.launch() |
|
|