import gradio as gr
import groq
import io
import numpy as np
import soundfile as sf
import requests

# Function to transcribe audio using Groq
def transcribe_audio(audio, api_key):
    if audio is None:
        return ""
    
    client = groq.Client(api_key=api_key)
    
    # Convert audio to the format expected by the model
    audio_data = audio[1]  # Get the numpy array from the tuple
    buffer = io.BytesIO()
    sf.write(buffer, audio_data, audio[0], format='wav')
    buffer.seek(0)

    try:
        # Use Distil-Whisper English powered by Groq for transcription
        completion = client.audio.transcriptions.create(
            model="distil-whisper-large-v3-en",
            file=("audio.wav", buffer),
            response_format="text"
        )
        return completion.get('text', '')  # Extract transcription text from response
    except Exception as e:
        return f"Error in transcription: {str(e)}"

# Function to generate AI response using Groq
def generate_response(transcription, api_key):
    if not transcription:
        return "No transcription available. Please try speaking again."
    
    client = groq.Client(api_key=api_key)
    
    try:
        # Use Llama 3 70B powered by Groq for text generation
        completion = client.chat.completions.create(
            model="llama3-70b-8192",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": transcription}
            ],
        )
        return completion.choices[0].message['content']
    except Exception as e:
        return f"Error in response generation: {str(e)}"

# VoiceRSS TTS function
def text_to_speech(text, tts_api_key):
    url = "https://api.voicerss.org/"
    params = {
        'key': tts_api_key,
        'src': text,
        'hl': 'en-us',  # Language: English (US)
        'r': '0',  # Speech rate
        'c': 'mp3',  # Audio format (mp3)
        'f': '48khz_16bit_stereo'  # Frequency and bitrate
    }

    try:
        response = requests.get(url, params=params)
        if response.status_code == 200:
            return response.content  # Return the audio data
        else:
            return f"Error in TTS conversion: {response.status_code}"
    except Exception as e:
        return f"Error in TTS conversion: {str(e)}"

# Process audio function to handle transcription, response generation, and TTS
def process_audio(audio, groq_api_key, tts_api_key):
    if not groq_api_key:
        return "Please enter your Groq API key.", "API key is required.", None
    
    transcription = transcribe_audio(audio, groq_api_key)
    response = generate_response(transcription, groq_api_key)
    
    # Convert the AI response to speech using VoiceRSS
    audio_response = text_to_speech(response, tts_api_key)
    
    return transcription, response, audio_response

# Gradio interface with TTS
with gr.Blocks(theme=gr.themes.Default()) as demo:
    gr.Markdown("# 🎙️ Groq x Gradio Voice-Powered AI Assistant with TTS")
    
    api_key_input = gr.Textbox(type="password", label="Enter your Groq API Key")
    tts_api_key_input = gr.Textbox(type="password", label="Enter your VoiceRSS API Key")
    
    with gr.Row():
        audio_input = gr.Audio(label="Speak!", type="numpy")
    
    with gr.Row():
        transcription_output = gr.Textbox(label="Transcription")
        response_output = gr.Textbox(label="AI Assistant Response")
    
    audio_output = gr.Audio(label="AI Response (Audio)", type="auto")
    
    submit_button = gr.Button("Process", variant="primary")
    
    submit_button.click(
        process_audio,
        inputs=[audio_input, api_key_input, tts_api_key_input],
        outputs=[transcription_output, response_output, audio_output]
    )

demo.launch()