import os import gradio as gr from llama_index.core import SimpleDirectoryReader, VectorStoreIndex from langchain_community.embeddings import HuggingFaceEmbeddings from llama_index.llms.llama_cpp import LlamaCPP from llama_index.llms.llama_cpp.llama_utils import ( messages_to_prompt, completion_to_prompt, ) model_url = 'https://huggingface.co/bartowski/Llama-3.2-3B-Instruct-GGUF/resolve/main/Llama-3.2-3B-Instruct-Q4_K_M.gguf' llm = LlamaCPP( # You can pass in the URL to a GGML model to download it automatically model_url=model_url, temperature=0.1, max_new_tokens=256, context_window=2048, # kwargs to pass to __call__() generate_kwargs={}, # kwargs to pass to __init__() # set to at least 1 to use GPU model_kwargs={"n_gpu_layers": 1}, # transform inputs into Llama2 format messages_to_prompt=messages_to_prompt, completion_to_prompt=completion_to_prompt, verbose=True, ) # Initialize embeddings and LLM embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5") def initialize_index(): """Initialize the vector store index from PDF files in the data directory""" # Load documents from the data directory loader = SimpleDirectoryReader( input_dir="data", required_exts=[".pdf"] ) documents = loader.load_data() # Create index index = VectorStoreIndex.from_documents( documents, embed_model=embeddings, ) # Return query engine with Llama return index.as_query_engine(llm=llm) # Initialize the query engine at startup query_engine = initialize_index() def process_query( message: str, history: list[tuple[str, str]], ) -> str: """Process a query using the RAG system""" try: # Get response from the query engine response = query_engine.query( message, #streaming=True ) return str(response) except Exception as e: return f"Error processing query: {str(e)}" # Create the Gradio interface demo = gr.ChatInterface( process_query, title="PDF Question Answering with RAG + Llama", description="Ask questions about the content of the loaded PDF documents using Llama model", #undo_btn="Delete Previous", #clear_btn="Clear", ) if __name__ == "__main__": demo.launch(debug=True)