from Functions.write_stream import user_data import streamlit as st from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, ServiceContext from llama_index.llms.llama_cpp import LlamaCPP from llama_index.llms.llama_cpp.llama_utils import messages_to_prompt, completion_to_prompt from langchain.embeddings.huggingface import HuggingFaceEmbeddings directory = "Knowledge Base/" documents = SimpleDirectoryReader(directory).load_data() llm = LlamaCPP( # You can pass in the URL to a GGML model to download it automatically model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf', # optionally, you can set the path to a pre-downloaded model instead of model_url model_path=None, temperature=0.75, max_new_tokens=256, # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room context_window=3900, messages_to_prompt=messages_to_prompt, completion_to_prompt=completion_to_prompt, verbose=True, ) embed_model = HuggingFaceEmbeddings(model_name="thenlper/gte-large") service_context = ServiceContext.from_defaults( chunk_size= 256, llm=llm, embed_model=embed_model ) index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True) query_engine = index.as_query_engine() ###############============= USER INTERFACE (UI )###############============= st.title("Wiki Bot") if "messages" not in st.session_state: st.session_state.messages = [] for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) prompt = st.chat_input("Enter Your Question:") if prompt: with st.chat_message("user"): st.markdown(prompt) st.session_state.messages.append({"role":"user","content":prompt}) reply= query_engine.query(prompt) response = user_data(function_name=reply) with st.chat_message("assistant"): st.write_stream(response) print("working!!") st.session_state.messages.append({"role":"assistant","content":reply})