Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
•
93f267c
1
Parent(s):
52369d8
update embedding model
Browse files- load_data.py +7 -4
load_data.py
CHANGED
@@ -24,10 +24,10 @@ load_dotenv()
|
|
24 |
|
25 |
|
26 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
27 |
-
EMBEDDING_MODEL_NAME = "
|
28 |
-
EMBEDDING_MODEL_REVISION = "
|
29 |
INFERENCE_MODEL_URL = (
|
30 |
-
"https://
|
31 |
)
|
32 |
DATASET_PARQUET_URL = (
|
33 |
"hf://datasets/librarian-bots/dataset_cards_with_metadata/data/train-*.parquet"
|
@@ -62,7 +62,10 @@ def get_embedding_function():
|
|
62 |
def get_collection(chroma_client, embedding_function):
|
63 |
logger.info(f"Getting or creating collection: {COLLECTION_NAME}")
|
64 |
return chroma_client.create_collection(
|
65 |
-
name=COLLECTION_NAME,
|
|
|
|
|
|
|
66 |
)
|
67 |
|
68 |
|
|
|
24 |
|
25 |
|
26 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
27 |
+
EMBEDDING_MODEL_NAME = "Alibaba-NLP/gte-large-en-v1.5"
|
28 |
+
EMBEDDING_MODEL_REVISION = "104333d6af6f97649377c2afbde10a7704870c7b"
|
29 |
INFERENCE_MODEL_URL = (
|
30 |
+
"https://spwy1g6626yhjhpr.us-east-1.aws.endpoints.huggingface.cloud"
|
31 |
)
|
32 |
DATASET_PARQUET_URL = (
|
33 |
"hf://datasets/librarian-bots/dataset_cards_with_metadata/data/train-*.parquet"
|
|
|
62 |
def get_collection(chroma_client, embedding_function):
|
63 |
logger.info(f"Getting or creating collection: {COLLECTION_NAME}")
|
64 |
return chroma_client.create_collection(
|
65 |
+
name=COLLECTION_NAME,
|
66 |
+
get_or_create=True,
|
67 |
+
embedding_function=embedding_function,
|
68 |
+
metadata={"hnsw:space": "cosine"},
|
69 |
)
|
70 |
|
71 |
|