Spaces:
Runtime error
Runtime error
LOUIS SANNA
commited on
Commit
·
d6936f0
1
Parent(s):
780c913
feat(data): add analec
Browse files- README.md +1 -1
- anyqa/build_index.py +5 -1
- chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/data_level0.bin +0 -0
- chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/header.bin +0 -0
- chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/length.bin +0 -0
- chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/link_lists.bin +0 -0
- chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/data_level0.bin +0 -3
- chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/header.bin +0 -3
- chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/length.bin +0 -3
- chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/link_lists.bin +0 -0
- chroma_db/chroma.sqlite3 +2 -2
- constitution.pdf +0 -0
- data/Confucianism/Analects of Confucius.pdf +0 -0
- data/{daoism/tao-te-ching.pdf → Daoism/Tao_Te_Ching.pdf} +0 -0
- data/us-founding/constitution.pdf +0 -0
- data/us-founding/declaration-of-independance.pdf +0 -0
- declaration-of-independance.pdf +0 -0
README.md
CHANGED
@@ -18,5 +18,5 @@ We abstracted the code so it's easy to build another tool based on another domai
|
|
18 |
## Build vector index
|
19 |
|
20 |
```bash
|
21 |
-
python -m
|
22 |
```
|
|
|
18 |
## Build vector index
|
19 |
|
20 |
```bash
|
21 |
+
python -m anyqa.build_index
|
22 |
```
|
anyqa/build_index.py
CHANGED
@@ -10,14 +10,18 @@ from .vectorstore import PERSIST_DIRECTORY, get_vectorstore
|
|
10 |
|
11 |
|
12 |
def load_data():
|
|
|
13 |
docs = parse_data()
|
|
|
14 |
embedding_function = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
|
|
|
15 |
vectorstore = get_vectorstore(embedding_function)
|
16 |
|
17 |
assert isinstance(vectorstore, Chroma)
|
18 |
vectorstore.from_documents(
|
19 |
docs, embedding_function, persist_directory=PERSIST_DIRECTORY
|
20 |
)
|
|
|
21 |
return vectorstore
|
22 |
|
23 |
|
@@ -47,7 +51,7 @@ def parse_data():
|
|
47 |
|
48 |
|
49 |
def parse_name(source: str) -> str:
|
50 |
-
return source.split("/")[-1].split(".")[0]
|
51 |
|
52 |
|
53 |
def parse_domain(source: str) -> str:
|
|
|
10 |
|
11 |
|
12 |
def load_data():
|
13 |
+
print("Loading data...")
|
14 |
docs = parse_data()
|
15 |
+
print("Loaded documents")
|
16 |
embedding_function = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL_NAME)
|
17 |
+
print("Building index...")
|
18 |
vectorstore = get_vectorstore(embedding_function)
|
19 |
|
20 |
assert isinstance(vectorstore, Chroma)
|
21 |
vectorstore.from_documents(
|
22 |
docs, embedding_function, persist_directory=PERSIST_DIRECTORY
|
23 |
)
|
24 |
+
print("Index built")
|
25 |
return vectorstore
|
26 |
|
27 |
|
|
|
51 |
|
52 |
|
53 |
def parse_name(source: str) -> str:
|
54 |
+
return source.split("/")[-1].split(".")[0].replace("_", " ")
|
55 |
|
56 |
|
57 |
def parse_domain(source: str) -> str:
|
chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/data_level0.bin
RENAMED
File without changes
|
chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/header.bin
RENAMED
File without changes
|
chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/length.bin
RENAMED
File without changes
|
chroma_db/{13934663-2db5-404d-be0f-51734d442e08 → 1730b83a-f75a-41e2-aba7-637881bb5ea8}/link_lists.bin
RENAMED
File without changes
|
chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/data_level0.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:a13e72541800c513c73dccea69f79e39cf4baef4fa23f7e117c0d6b0f5f99670
|
3 |
-
size 3212000
|
|
|
|
|
|
|
|
chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/header.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:0ec6df10978b056a10062ed99efeef2702fa4a1301fad702b53dd2517103c746
|
3 |
-
size 100
|
|
|
|
|
|
|
|
chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/length.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:fc19b1997119425765295aeab72d76faa6927d4f83985d328c26f20468d6cc76
|
3 |
-
size 4000
|
|
|
|
|
|
|
|
chroma_db/5fa47764-2449-49fb-ae2f-0fd1886dfa2d/link_lists.bin
DELETED
File without changes
|
chroma_db/chroma.sqlite3
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d627997dd35604ac27e67f35911999f234285c39362fffecddd50621d9f01d77
|
3 |
+
size 4067328
|
constitution.pdf
DELETED
Binary file (414 kB)
|
|
data/Confucianism/Analects of Confucius.pdf
ADDED
Binary file (711 kB). View file
|
|
data/{daoism/tao-te-ching.pdf → Daoism/Tao_Te_Ching.pdf}
RENAMED
File without changes
|
data/us-founding/constitution.pdf
DELETED
Binary file (414 kB)
|
|
data/us-founding/declaration-of-independance.pdf
DELETED
Binary file (742 kB)
|
|
declaration-of-independance.pdf
DELETED
Binary file (742 kB)
|
|