nightfury commited on
Commit
3cd0964
Β·
verified Β·
1 Parent(s): 8cc2932

Update appChatbot.py

Browse files
Files changed (1) hide show
  1. appChatbot.py +63 -2
appChatbot.py CHANGED
@@ -6,6 +6,60 @@ For more information on `huggingface_hub` Inference API support, please check th
6
  """
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def respond(
11
  message,
@@ -37,7 +91,8 @@ def respond(
37
  token = message.choices[0].delta.content
38
 
39
  response += token
40
- yield response
 
41
 
42
 
43
  """
@@ -60,5 +115,11 @@ demo = gr.ChatInterface(
60
  )
61
 
62
 
63
- if __name__ == "__main__":
 
 
64
  demo.launch()
 
 
 
 
 
6
  """
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
+ ABS_PATH = os.path.dirname(os.path.abspath(__file__))
10
+ DB_DIR = os.path.join(ABS_PATH, "db")
11
+
12
+ def replace_newlines_and_spaces(text):
13
+ # Replace all newline characters with spaces
14
+ text = text.replace("\n", " ")
15
+ # Replace multiple spaces with a single space
16
+ text = re.sub(r'\s+', ' ', text)
17
+ return text
18
+
19
+
20
+ def get_documents():
21
+ return PyPDFLoader("AI-smart-water-management-systems.pdf").load()
22
+
23
+
24
+ def init_chromadb():
25
+ # Delete existing index directory and recreate the directory
26
+ if os.path.exists(DB_DIR):
27
+ import shutil
28
+ shutil.rmtree(DB_DIR, ignore_errors=True)
29
+ os.mkdir(DB_DIR)
30
+
31
+ documents = []
32
+ for num, doc in enumerate(get_documents()):
33
+ doc.page_content = replace_newlines_and_spaces(doc.page_content)
34
+ documents.append(doc)
35
+
36
+ # Split the documents into chunks
37
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
38
+ texts = text_splitter.split_documents(documents)
39
+ # Select which embeddings we want to use
40
+ #embeddings = OpenAIEmbeddings()
41
+ #query_chromadb()
42
+
43
+ # Create the vectorestore to use as the index
44
+ vectorstore = Chroma.from_documents(texts, embeddings, persist_directory=DB_DIR)
45
+ vectorstore.persist()
46
+ print(vectorstore)
47
+ vectorstore = None
48
+
49
+ def query_chromadb(ASK):
50
+ if not os.path.exists(DB_DIR):
51
+ raise Exception(f"{DB_DIR} does not exist, nothing can be queried")
52
+
53
+ # Select which embeddings we want to use
54
+ embeddings = OpenAIEmbeddings()
55
+ # Load Vector store from local disk
56
+ vectorstore = Chroma(persist_directory=DB_DIR, embedding_function=embeddings)
57
+
58
+ result = vectorstore.similarity_search_with_score(query=ASK, k=4)
59
+ jsonable_result = jsonable_encoder(result)
60
+ print(json.dumps(jsonable_result, indent=2))
61
+ return json.dumps(jsonable_result, indent=2)
62
+
63
 
64
  def respond(
65
  message,
 
91
  token = message.choices[0].delta.content
92
 
93
  response += token
94
+ #yield response
95
+ yield query_chromadb(message)
96
 
97
 
98
  """
 
115
  )
116
 
117
 
118
+
119
+ def main():
120
+ init_chromadb()
121
  demo.launch()
122
+
123
+ if __name__ == "__main__":
124
+ main()
125
+ #demo.launch()