Spaces:

nightfury
/

ChromaDB

Runtime error

App Files Files Community

nightfury commited on 2 days ago

Commit

3cd0964

verified ·

1 Parent(s): 8cc2932

Update appChatbot.py

Browse files

Files changed (1) hide show

appChatbot.py +63 -2

appChatbot.py CHANGED Viewed

@@ -6,6 +6,60 @@ For more information on `huggingface_hub` Inference API support, please check th
 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
@@ -37,7 +91,8 @@ def respond(
         token = message.choices[0].delta.content
         response += token
-        yield response
 """
@@ -60,5 +115,11 @@ demo = gr.ChatInterface(
 )
-if __name__ == "__main__":
     demo.launch()

 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+ABS_PATH = os.path.dirname(os.path.abspath(__file__))
+DB_DIR = os.path.join(ABS_PATH, "db")
+def replace_newlines_and_spaces(text):
+    # Replace all newline characters with spaces
+    text = text.replace("\n", " ")
+    # Replace multiple spaces with a single space
+    text = re.sub(r'\s+', ' ', text)
+    return text
+def get_documents():
+    return PyPDFLoader("AI-smart-water-management-systems.pdf").load()
+def init_chromadb():
+    # Delete existing index directory and recreate the directory
+    if os.path.exists(DB_DIR):
+        import shutil
+        shutil.rmtree(DB_DIR, ignore_errors=True)
+        os.mkdir(DB_DIR)
+    documents = []
+    for num, doc in enumerate(get_documents()):
+        doc.page_content = replace_newlines_and_spaces(doc.page_content)
+        documents.append(doc)
+    # Split the documents into chunks
+    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+    texts = text_splitter.split_documents(documents)
+    # Select which embeddings we want to use
+    #embeddings = OpenAIEmbeddings()
+    #query_chromadb()
+    # Create the vectorestore to use as the index
+    vectorstore = Chroma.from_documents(texts, embeddings, persist_directory=DB_DIR)
+    vectorstore.persist()
+    print(vectorstore)
+    vectorstore = None
+def query_chromadb(ASK):
+    if not os.path.exists(DB_DIR):
+        raise Exception(f"{DB_DIR} does not exist, nothing can be queried")
+    # Select which embeddings we want to use
+    embeddings = OpenAIEmbeddings()
+    # Load Vector store from local disk
+    vectorstore = Chroma(persist_directory=DB_DIR, embedding_function=embeddings)
+    result = vectorstore.similarity_search_with_score(query=ASK, k=4)
+    jsonable_result = jsonable_encoder(result)
+    print(json.dumps(jsonable_result, indent=2))
+    return json.dumps(jsonable_result, indent=2)
 def respond(
     message,
         token = message.choices[0].delta.content
         response += token
+        #yield response
+        yield query_chromadb(message)
 """
 )
+def main():
+    init_chromadb()
     demo.launch()
+if __name__ == "__main__":
+    main()
+    #demo.launch()