samsonleegh's picture
update with better prompt
1dff34d verified
raw
history blame
3 kB
import os
import gradio as gr
from dotenv import load_dotenv
from llama_index.core.callbacks import CallbackManager, LlamaDebugHandler, CBEventType
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.llms.openai import OpenAI
from llama_index.llms.groq import Groq
from llama_index.core.base.embeddings.base import similarity
from llama_index.llms.ollama import Ollama
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core import StorageContext
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import load_index_from_storage
load_dotenv()
# set up LLM
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
llm = Groq(model="llama3-70b-8192")
Settings.llm = llm
# set up callback manager
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])
Settings.callback_manager = callback_manager
# converting documents into embeddings and indexing
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
Settings.embed_model = embed_model
# create splitter
splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=20)
Settings.transformations = [splitter]
if os.path.exists("./vectordb"):
storage_context = StorageContext.from_defaults(persist_dir="./vectordb")
index = load_index_from_storage(storage_context)
else:
filename_fn = lambda filename: {"file_name": filename}
required_exts = [".pdf",".docx"]
reader = SimpleDirectoryReader(
input_dir="./data",
required_exts=required_exts,
recursive=True,
file_metadata=filename_fn
)
documents = reader.load_data()
for doc in documents:
doc.text = str(doc.metadata) +' '+ doc.text
print("index creating with `%d` documents", len(documents))
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model, text_splitter=splitter)
index.storage_context.persist(persist_dir="./vectordb")
# set up query engine
query_engine = index.as_query_engine(
similarity_top_k=5,
#node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7)],
verbose=True,
)
def retreive(question):
qns_w_source = "Answer the following question: " + question + " Followed by providing the page and file name of the source document as well, thank you! As the user might get the answer he or she wants, you could help provide more details and relevant information referred for your answer. This will enable/prompt them to further phrase their questions."
streaming_response = query_engine.query(qns_w_source)
#sources = streaming_response.get_formatted_sources(length=5000)
return str(streaming_response) # + "\n" + str(sources)
demo = gr.Interface(fn=retreive, inputs="textbox", outputs="textbox")
if __name__ == "__main__":
demo.launch(share=True)