Spaces:
Runtime error
Runtime error
allowing the embedding model to be changed via .env
Browse files
example.env
CHANGED
@@ -11,4 +11,6 @@ SERPAPI_API_KEY=
|
|
11 |
VECTOR_DATABASE_LOCATION=
|
12 |
|
13 |
# Name for the Conversation Memory Collection
|
14 |
-
CONVERSATION_COLLECTION_NAME="ConversationMemory"
|
|
|
|
|
|
11 |
VECTOR_DATABASE_LOCATION=
|
12 |
|
13 |
# Name for the Conversation Memory Collection
|
14 |
+
CONVERSATION_COLLECTION_NAME="ConversationMemory"
|
15 |
+
|
16 |
+
EMBEDDING_MODEL="sentence-transformers/all-MiniLM-L6-v2"
|
innovation_pathfinder_ai/structured_tools/structured_tools.py
CHANGED
@@ -52,7 +52,7 @@ def memory_search(query:str) -> str:
|
|
52 |
#store using envar
|
53 |
|
54 |
embedding_function = SentenceTransformerEmbeddings(
|
55 |
-
model_name="
|
56 |
)
|
57 |
|
58 |
vector_db = Chroma(
|
@@ -78,7 +78,7 @@ def knowledgeBase_search(query:str) -> str:
|
|
78 |
#store using envar
|
79 |
|
80 |
embedding_function = SentenceTransformerEmbeddings(
|
81 |
-
model_name="
|
82 |
)
|
83 |
|
84 |
vector_db = Chroma(
|
@@ -152,7 +152,7 @@ def embed_arvix_paper(paper_id:str) -> None:
|
|
152 |
#store using envar
|
153 |
|
154 |
embedding_function = SentenceTransformerEmbeddings(
|
155 |
-
model_name="
|
156 |
)
|
157 |
|
158 |
full_path = os.path.join(pdf_directory, pdf_file_name)
|
|
|
52 |
#store using envar
|
53 |
|
54 |
embedding_function = SentenceTransformerEmbeddings(
|
55 |
+
model_name=os.getenv("EMBEDDING_MODEL"),
|
56 |
)
|
57 |
|
58 |
vector_db = Chroma(
|
|
|
78 |
#store using envar
|
79 |
|
80 |
embedding_function = SentenceTransformerEmbeddings(
|
81 |
+
model_name=os.getenv("EMBEDDING_MODEL"),
|
82 |
)
|
83 |
|
84 |
vector_db = Chroma(
|
|
|
152 |
#store using envar
|
153 |
|
154 |
embedding_function = SentenceTransformerEmbeddings(
|
155 |
+
model_name=os.getenv("EMBEDDING_MODEL"),
|
156 |
)
|
157 |
|
158 |
full_path = os.path.join(pdf_directory, pdf_file_name)
|
innovation_pathfinder_ai/vector_store/chroma_vector_store.py
CHANGED
@@ -99,9 +99,9 @@ def add_markdown_to_collection(
|
|
99 |
name=collection_name,
|
100 |
)
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
|
106 |
documents_page_content:list = [i.page_content for i in splits]
|
107 |
|
@@ -111,7 +111,7 @@ def add_markdown_to_collection(
|
|
111 |
collection.add(
|
112 |
ids=[generate_uuid()], # give each document a uuid
|
113 |
documents=documents_page_content[i], # contents of document
|
114 |
-
embeddings=
|
115 |
metadatas=data.metadata, # type: ignore
|
116 |
)
|
117 |
|
@@ -181,13 +181,9 @@ def add_pdf_to_vector_store(
|
|
181 |
name=collection_name,
|
182 |
)
|
183 |
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
)
|
188 |
-
|
189 |
-
# create the open-source embedding function
|
190 |
-
# embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
|
191 |
|
192 |
documents_page_content:list = [i.page_content for i in split_docs]
|
193 |
|
@@ -198,7 +194,7 @@ def add_pdf_to_vector_store(
|
|
198 |
collection.add(
|
199 |
ids=[generate_uuid()], # give each document a uuid
|
200 |
documents=documents_page_content[i], # contents of document
|
201 |
-
embeddings=
|
202 |
metadatas=data.metadata, # type: ignore
|
203 |
)
|
204 |
|
@@ -244,7 +240,7 @@ if __name__ == "__main__":
|
|
244 |
|
245 |
# create the open-source embedding function
|
246 |
embedding_function = SentenceTransformerEmbeddings(
|
247 |
-
model_name="
|
248 |
)
|
249 |
|
250 |
#method of integrating Chroma and Langchain
|
|
|
99 |
name=collection_name,
|
100 |
)
|
101 |
|
102 |
+
embedding_function = SentenceTransformerEmbeddings(
|
103 |
+
model_name=os.getenv("EMBEDDING_MODEL"),
|
104 |
+
)
|
105 |
|
106 |
documents_page_content:list = [i.page_content for i in splits]
|
107 |
|
|
|
111 |
collection.add(
|
112 |
ids=[generate_uuid()], # give each document a uuid
|
113 |
documents=documents_page_content[i], # contents of document
|
114 |
+
embeddings=embedding_function(documents_page_content[i]),
|
115 |
metadatas=data.metadata, # type: ignore
|
116 |
)
|
117 |
|
|
|
181 |
name=collection_name,
|
182 |
)
|
183 |
|
184 |
+
embedding_function = SentenceTransformerEmbeddings(
|
185 |
+
model_name=os.getenv("EMBEDDING_MODEL"),
|
186 |
+
)
|
|
|
|
|
|
|
|
|
187 |
|
188 |
documents_page_content:list = [i.page_content for i in split_docs]
|
189 |
|
|
|
194 |
collection.add(
|
195 |
ids=[generate_uuid()], # give each document a uuid
|
196 |
documents=documents_page_content[i], # contents of document
|
197 |
+
embeddings=embedding_function(documents_page_content[i]),
|
198 |
metadatas=data.metadata, # type: ignore
|
199 |
)
|
200 |
|
|
|
240 |
|
241 |
# create the open-source embedding function
|
242 |
embedding_function = SentenceTransformerEmbeddings(
|
243 |
+
model_name=os.getenv("EMBEDDING_MODEL"),
|
244 |
)
|
245 |
|
246 |
#method of integrating Chroma and Langchain
|