import whisper
from groq import Groq
from gtts import gTTS
import gradio as gr
import os
import tempfile


# Load Whisper model for transcription
whisper_model = whisper.load_model("base")

# Set up Groq client for LLM interaction


os.environ["GROQ_API_KEY"] = "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"

client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

def process_speech_to_speech(audio):
    # Step 1: Transcribe the audio using Whisper
    transcript = whisper_model.transcribe(audio)["text"]
    
    # Step 2: Send transcription to the LLM via Groq
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": transcript}],
        model="llama3-8b-8192"
    )
    response_text = chat_completion.choices[0].message.content

    # Step 3: Convert LLM response to speech using gTTS
    tts = gTTS(text=response_text, lang="en")
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
        tts.save(fp.name)
        audio_output = fp.name  # Path to the generated speech file

    return response_text, audio_output

# Gradio interface to deploy the application
iface = gr.Interface(
    fn=process_speech_to_speech,
    inputs=gr.Audio(type="filepath", label="Record your audio"),
    outputs=[gr.Textbox(label="LLM Response"), gr.Audio(type="filepath", label="Spoken Response")],
    live=True
)


iface.launch()