import gradio as gr from huggingface_hub import InferenceClient """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") import os import time from haystack.document_stores import FAISSDocumentStore from haystack.nodes import EmbeddingRetriever from haystack.pipelines import Pipeline from groq import Groq api_key = os.getenv("groq_api") # Load the FAISS index from the saved file faiss_index_path = "./faiss_index" # Replace with your actual path document_store = FAISSDocumentStore.load(faiss_index_path) # Initialize the Retriever retriever = EmbeddingRetriever( document_store=document_store, embedding_model="intfloat/multilingual-e5-large-instruct", model_format="sentence_transformers" ) def get_detailed_instruct(task_description: str, query: str) -> str: return f'Instruct: {task_description}\nQuery: {query}' # task = 'Given a web search query, retrieve relevant passages that answer the query' def respond(text): # Set up the query # query = text task = 'Given a web search query, retrieve relevant passages that answer the query' query = get_detailed_instruct(task, text) # Step 1: Measure retrieval time and retrieve top documents start_time = time.time() retrieval_results = retriever.retrieve(query=query, top_k=10) print(f"Document retrieval completed in {time.time() - start_time:.2f} seconds") # Step 2: Extract context from the retrieved documents context = "\n\n".join([doc.content for doc in retrieval_results]) # Step 3: Initialize Groq Client for LLM (Ensure API key is stored securely) os.environ['GROQ_API_KEY'] = api_key # Ensure this key is valid client = Groq(api_key=os.environ["GROQ_API_KEY"]) # Step 4: Send the query and retrieved context to LLM for completion chat_completion = client.chat.completions.create( messages=[ { "role": "system", "content": ( "You are a retrieval-augmented generation system (RAG) designed to answer complex and general questions. " "Do not say that you are RAG and the retrieved documents" "Please use the retrieved documents to generate accurate and contextually relevant responses. " "You should find the answer to the query from the retrieved documents." ) }, { "role": "user", "content": f"Here are the retrieved documents:\n\n{context}\n\nUsing this information, answer the question: {query}", } ], model="llama-3.1-70b-versatile", # Specify the model you want to use temperature=0 ) # Step 5: Output the LLM's response # print("Response from LLM:") return chat_completion.choices[0].message.content title = "Powered by Kazuk" description = "Kazuk Team" css = """ body { background-color: #690088 !important; } """ demo = gr.Interface(fn=respond, inputs="text", outputs="text", title=title, description=description, css=css ) if __name__ == "__main__": demo.launch(share=True)