import os import gradio as gr from dotenv import load_dotenv from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler, CBEventType from llama_index.core.node_parser import SentenceSplitter from llama_index.core.postprocessor import SimilarityPostprocessor from llama_index.llms.openai import OpenAI from llama_index.llms.groq import Groq from llama_index.core.base.embeddings.base import similarity from llama_index.llms.ollama import Ollama from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings from llama_index.core import StorageContext from llama_index.vector_stores.chroma import ChromaVectorStore from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core import load_index_from_storage load_dotenv() # set up LLM GROQ_API_KEY = os.getenv('GROQ_API_KEY') llm = Groq(model="llama3-70b-8192") Settings.llm = llm # set up callback manager llama_debug = LlamaDebugHandler(print_trace_on_end=True) callback_manager = CallbackManager([llama_debug]) Settings.callback_manager = callback_manager # converting documents into embeddings and indexing embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") Settings.embed_model = embed_model # create splitter splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=20) Settings.transformations = [splitter] if os.path.exists("./vectordb"): storage_context = StorageContext.from_defaults(persist_dir="./vectordb") index = load_index_from_storage(storage_context) else: filename_fn = lambda filename: {"file_name": filename} required_exts = [".pdf",".docx"] reader = SimpleDirectoryReader( input_dir="./data", required_exts=required_exts, recursive=True, file_metadata=filename_fn ) documents = reader.load_data() for doc in documents: doc.text = str(doc.metadata) +' '+ doc.text print("index creating with `%d` documents", len(documents)) index = VectorStoreIndex.from_documents(documents, embed_model=embed_model, text_splitter=splitter) index.storage_context.persist(persist_dir="./vectordb") # set up query engine query_engine = index.as_query_engine( similarity_top_k=5, #node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)], verbose=True, ) def retreive(question): qns_w_source = "Answer the following question: " + question + " Followed by providing the page and file name of the source document as well, thank you! As the user might get the answer he or she wants, you could help provide more details and relevant information referred for your answer. This will enable/prompt them to further phrase their questions." streaming_response = query_engine.query(qns_w_source) #sources = streaming_response.get_formatted_sources(length=5000) return str(streaming_response) # + "\n" + str(sources) demo = gr.Interface(fn=retreive, inputs="textbox", outputs="textbox") if __name__ == "__main__": demo.launch(share=True)