davanstrien HF staff commited on
Commit
e8ef27e
1 Parent(s): bae8f11
Files changed (1) hide show
  1. load_card_data.py +2 -4
load_card_data.py CHANGED
@@ -26,7 +26,7 @@ HF_TOKEN = os.getenv("HF_TOKEN")
26
  EMBEDDING_MODEL_NAME = "Alibaba-NLP/gte-large-en-v1.5"
27
  EMBEDDING_MODEL_REVISION = "104333d6af6f97649377c2afbde10a7704870c7b"
28
  INFERENCE_MODEL_URL = (
29
- "https://spwy1g6626yhjhjhpr.us-east-1.aws.endpoints.huggingface.cloud"
30
  )
31
  DATASET_PARQUET_URL = (
32
  "hf://datasets/librarian-bots/dataset_cards_with_metadata/data/train-*.parquet"
@@ -168,9 +168,7 @@ def refresh_card_data(min_len: int = 250, min_likes: Optional[int] = None):
168
  collection = get_collection(chroma_client, embedding_function, COLLECTION_NAME)
169
  most_recent = get_last_modified_in_collection(collection)
170
 
171
- if data := load_cards(
172
- min_len=min_len, min_likes=min_likes, last_modified=most_recent
173
- ):
174
  _create_and_upsert_embeddings(data, collection)
175
  else:
176
  logger.info("No new data to refresh")
 
26
  EMBEDDING_MODEL_NAME = "Alibaba-NLP/gte-large-en-v1.5"
27
  EMBEDDING_MODEL_REVISION = "104333d6af6f97649377c2afbde10a7704870c7b"
28
  INFERENCE_MODEL_URL = (
29
+ "https://spwy1g6626yhjhpr.us-east-1.aws.endpoints.huggingface.cloud"
30
  )
31
  DATASET_PARQUET_URL = (
32
  "hf://datasets/librarian-bots/dataset_cards_with_metadata/data/train-*.parquet"
 
168
  collection = get_collection(chroma_client, embedding_function, COLLECTION_NAME)
169
  most_recent = get_last_modified_in_collection(collection)
170
 
171
+ if data := load_cards(min_len=min_len, min_likes=min_likes, last_modified=None):
 
 
172
  _create_and_upsert_embeddings(data, collection)
173
  else:
174
  logger.info("No new data to refresh")