import gradio as gr
from huggingface_hub import InferenceClient

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

import os
import time
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import EmbeddingRetriever
from haystack.pipelines import Pipeline
from groq import Groq

api_key = os.getenv("groq_api")

# Load the FAISS index from the saved file
faiss_index_path = "./faiss_index"  # Replace with your actual path
document_store = FAISSDocumentStore.load(faiss_index_path)

# Initialize the Retriever
retriever = EmbeddingRetriever(
    document_store=document_store,
    embedding_model="intfloat/multilingual-e5-large-instruct",
    model_format="sentence_transformers"
)

def get_detailed_instruct(task_description: str, query: str) -> str:
    return f'Instruct: {task_description}\nQuery: {query}'

# task = 'Given a web search query, retrieve relevant passages that answer the query'

def respond(text):
    # Set up the query
    # query = text
    task = 'Given a web search query, retrieve relevant passages that answer the query'
    query = get_detailed_instruct(task, text)
    # Step 1: Measure retrieval time and retrieve top documents
    start_time = time.time()
    retrieval_results = retriever.retrieve(query=query, top_k=10)
    print(f"Document retrieval completed in {time.time() - start_time:.2f} seconds")

    # Step 2: Extract context from the retrieved documents
    context = "\n\n".join([doc.content for doc in retrieval_results])

    # Step 3: Initialize Groq Client for LLM (Ensure API key is stored securely)
    os.environ['GROQ_API_KEY'] = api_key  # Ensure this key is valid
    client = Groq(api_key=os.environ["GROQ_API_KEY"])

    # Step 4: Send the query and retrieved context to LLM for completion
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "system",
                "content": (
                    "You are a retrieval-augmented generation system (RAG) designed to answer complex and general questions. "
                    "Do not say that you are RAG and the retrieved documents"
                    "Please use the retrieved documents to generate accurate and contextually relevant responses. "
                    "You should find the answer to the query from the retrieved documents."
                )
            },
            {
                "role": "user",
                "content": f"Here are the retrieved documents:\n\n{context}\n\nUsing this information, answer the question: {query}",
            }
        ],
        model="llama-3.1-70b-versatile",  # Specify the model you want to use
        temperature=0
    )

    # Step 5: Output the LLM's response
    # print("Response from LLM:")
    return chat_completion.choices[0].message.content


title = "Powered by Kazuk"
description = "Kazuk Team"

css = """
body {
    background-color: #690088 !important;
}
"""

demo = gr.Interface(fn=respond, 
        inputs="text", 
        outputs="text",
         title=title,
        description=description,
        css=css
        )

if __name__ == "__main__":
    demo.launch(share=True)