Update appChatbot.py
Browse files- appChatbot.py +9 -7
appChatbot.py
CHANGED
@@ -6,8 +6,9 @@ import sys
|
|
6 |
import gradio as gr
|
7 |
from huggingface_hub import InferenceClient
|
8 |
|
|
|
9 |
#from chromadb.utils import embedding_functions
|
10 |
-
from langchain_community.embeddings import SentenceTransformerEmbeddings
|
11 |
|
12 |
from langchain.text_splitter import CharacterTextSplitter
|
13 |
from langchain.embeddings import OpenAIEmbeddings
|
@@ -20,6 +21,13 @@ For more information on `huggingface_hub` Inference API support, please check th
|
|
20 |
"""
|
21 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
|
24 |
DB_DIR = os.path.join(ABS_PATH, "db")
|
25 |
|
@@ -50,9 +58,6 @@ def init_chromadb():
|
|
50 |
# Split the documents into chunks
|
51 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
52 |
texts = text_splitter.split_documents(documents)
|
53 |
-
# Select which embeddings we want to use
|
54 |
-
#embeddings = OpenAIEmbeddings()
|
55 |
-
embeddings = SentenceTransformerEmbeddings(model_name="nomic-ai/nomic-embed-text-v1", model_kwargs={"trust_remote_code":True})
|
56 |
#query_chromadb()
|
57 |
|
58 |
# Create the vectorestore to use as the index
|
@@ -65,9 +70,6 @@ def query_chromadb(ASK):
|
|
65 |
if not os.path.exists(DB_DIR):
|
66 |
raise Exception(f"{DB_DIR} does not exist, nothing can be queried")
|
67 |
|
68 |
-
# Select which embeddings we want to use
|
69 |
-
#embeddings = OpenAIEmbeddings()
|
70 |
-
embeddings = SentenceTransformerEmbeddings(model_name="nomic-ai/nomic-embed-text-v1", model_kwargs={"trust_remote_code":True})
|
71 |
# Load Vector store from local disk
|
72 |
vectorstore = Chroma(persist_directory=DB_DIR, embedding_function=embeddings)
|
73 |
|
|
|
6 |
import gradio as gr
|
7 |
from huggingface_hub import InferenceClient
|
8 |
|
9 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
10 |
#from chromadb.utils import embedding_functions
|
11 |
+
#from langchain_community.embeddings import SentenceTransformerEmbeddings
|
12 |
|
13 |
from langchain.text_splitter import CharacterTextSplitter
|
14 |
from langchain.embeddings import OpenAIEmbeddings
|
|
|
21 |
"""
|
22 |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
|
23 |
|
24 |
+
# Select which embeddings we want to use
|
25 |
+
#embeddings = OpenAIEmbeddings()
|
26 |
+
#embeddings = SentenceTransformerEmbeddings(model_name="nomic-ai/nomic-embed-text-v1", model_kwargs={"trust_remote_code":True})
|
27 |
+
|
28 |
+
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
|
29 |
+
|
30 |
+
|
31 |
ABS_PATH = os.path.dirname(os.path.abspath(__file__))
|
32 |
DB_DIR = os.path.join(ABS_PATH, "db")
|
33 |
|
|
|
58 |
# Split the documents into chunks
|
59 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
60 |
texts = text_splitter.split_documents(documents)
|
|
|
|
|
|
|
61 |
#query_chromadb()
|
62 |
|
63 |
# Create the vectorestore to use as the index
|
|
|
70 |
if not os.path.exists(DB_DIR):
|
71 |
raise Exception(f"{DB_DIR} does not exist, nothing can be queried")
|
72 |
|
|
|
|
|
|
|
73 |
# Load Vector store from local disk
|
74 |
vectorstore = Chroma(persist_directory=DB_DIR, embedding_function=embeddings)
|
75 |
|