chgsvc2 commited on
Commit
ec97e51
1 Parent(s): a5f357a

Added necessary files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.faiss filter=lfs diff=lfs merge=lfs -text
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from edubot import EduBotCreator
2
+ from config import *
3
+ import streamlit as st
4
+ from streamlit_chat import message
5
+
6
+ @st.cache_resource(show_spinner=True)
7
+ def create_edubot():
8
+ edubotcreator = EduBotCreator()
9
+ edubot = edubotcreator.create_edubot()
10
+ return edubot
11
+ edubot = create_edubot()
12
+
13
+ def infer_edubot(prompt):
14
+ model_out = edubot(prompt)
15
+ answer = model_out['result']
16
+ return answer
17
+
18
+ def display_conversation(history):
19
+ for i in range(len(history["assistant"])):
20
+ message(history["user"][i], is_user=True, key=str(i) + "_user")
21
+ message(history["assistant"][i],key=str(i))
22
+
23
+ def main():
24
+
25
+ st.title("Krish Sir AI Bot 📚🤖")
26
+ st.subheader("A bot created using Langchain 🦜 to run on cpu making your learning process easier")
27
+
28
+ user_input = st.text_input("Enter your query")
29
+
30
+ if "assistant" not in st.session_state:
31
+ st.session_state["assistant"] = ["I am ready to help you"]
32
+ if "user" not in st.session_state:
33
+ st.session_state["user"] = ["Hey there!"]
34
+
35
+ if st.button("Answer"):
36
+
37
+ answer = infer_edubot({'query': user_input})
38
+ st.session_state["user"].append(user_input)
39
+ st.session_state["assistant"].append(answer)
40
+
41
+ if st.session_state["assistant"]:
42
+ display_conversation(st.session_state)
43
+
44
+ if __name__ == "__main__":
45
+ main()
46
+
47
+
config.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ DATA_DIR_PATH = "data/"
2
+ VECTOR_DB_PATH = "faiss/education"
3
+ CHUNK_SIZE = 500
4
+ CHUNK_OVERLAP = 200
5
+ EMBEDDER = "thenlper/gte-large"
6
+ DEVICE = "cpu"
7
+ PROMPT_TEMPLATE = '''
8
+ With the information provided try to answer the question.
9
+ If you cant answer the question based on the information either say you cant find an answer or unable to find an answer.
10
+ So try to understand in depth about the context and answer only based on the information provided. Dont generate irrelevant answers
11
+
12
+ Context: {context}
13
+ Question: {question}
14
+ Do provide only helpful answers
15
+
16
+ Helpful answer:
17
+ '''
18
+ INP_VARS = ['context', 'question']
19
+ CHAIN_TYPE = "stuff"
20
+ SEARCH_KWARGS = {'k': 2}
21
+ MODEL_CKPT = "res/llama-2-7b-chat.ggmlv3.q4_1.bin"
22
+
23
+ MODEL_TYPE = "llama"
24
+ MAX_NEW_TOKENS = 512
25
+ TEMPERATURE = 0.9
edubot.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain import PromptTemplate
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.vectorstores import FAISS
4
+ from langchain.llms import CTransformers
5
+ from langchain.chains import RetrievalQA
6
+ from config import *
7
+
8
+ class EduBotCreator:
9
+
10
+ def __init__(self):
11
+ self.prompt_temp = PROMPT_TEMPLATE
12
+ self.input_variables = INP_VARS
13
+ self.chain_type = CHAIN_TYPE
14
+ self.search_kwargs = SEARCH_KWARGS
15
+ self.embedder = EMBEDDER
16
+ self.vector_db_path = VECTOR_DB_PATH
17
+ self.model_ckpt = MODEL_CKPT
18
+ self.model_type = MODEL_TYPE
19
+ self.max_new_tokens = MAX_NEW_TOKENS
20
+ self.temperature = TEMPERATURE
21
+
22
+ def create_custom_prompt(self):
23
+ custom_prompt_temp = PromptTemplate(template=self.prompt_temp,
24
+ input_variables=self.input_variables)
25
+ return custom_prompt_temp
26
+
27
+ def load_llm(self):
28
+ llm = CTransformers(
29
+ model = self.model_ckpt,
30
+ model_type=self.model_type,
31
+ max_new_tokens = self.max_new_tokens,
32
+ temperature = self.temperature
33
+ )
34
+ return llm
35
+
36
+ def load_vectordb(self):
37
+ hfembeddings = HuggingFaceEmbeddings(
38
+ model_name=self.embedder,
39
+ model_kwargs={'device': 'cpu'}
40
+ )
41
+
42
+ vector_db = FAISS.load_local(self.vector_db_path, hfembeddings)
43
+ return vector_db
44
+
45
+ def create_bot(self, custom_prompt, vectordb, llm):
46
+ retrieval_qa_chain = RetrievalQA.from_chain_type(
47
+ llm=llm,
48
+ chain_type=self.chain_type,
49
+ retriever=vectordb.as_retriever(search_kwargs=self.search_kwargs),
50
+ return_source_documents=True,
51
+ chain_type_kwargs={"prompt": custom_prompt}
52
+ )
53
+ return retrieval_qa_chain
54
+
55
+ def create_edubot(self):
56
+ self.custom_prompt = self.create_custom_prompt()
57
+ self.vector_db = self.load_vectordb()
58
+ self.llm = self.load_llm()
59
+ self.bot = self.create_bot(self.custom_prompt, self.vector_db, self.llm)
60
+ return self.bot
faiss1/education1/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddced03d12a951b7c75cf54b6b0ea2fde395ee5629b8c2e5d54a62366c3e05c0
3
+ size 1251885
faiss1/education1/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9963acc74fb1fbc70d5955ef11f1cf5aef8b21e28b3643b3f8c6a3b6bc5d85c9
3
+ size 452274
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ pypdf
2
+ langchain
3
+ torch
4
+ accelerate
5
+ bitsandbytes
6
+ transformers
7
+ sentence_transformers
8
+ faiss_cpu
9
+ streamlit
res/llama-2-7b-chat.ggmlv3.q2_K.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45833e0b59c8fe80676c664f556031fc411da8856e0716ac7b8ed201b7221c08
3
+ size 2866807424