Spaces:

GIZ
/

audit_assistant

Running on CPU Upgrade

ppsingh commited on 7 days ago

Commit

3580bbe

verified ·

1 Parent(s): 51fc305

Update auditqa/process_chunks.py

Files changed (1) hide show

auditqa/process_chunks.py CHANGED Viewed

@@ -62,13 +62,14 @@ def load_chunks():
     embeddings = HuggingFaceEmbeddings(
         model_kwargs = {'device': device},
         show_progress= True,
-        encode_kwargs = {'normalize_embeddings': bool(int(config.get('retriever','NORMALIZE'))),},
         model_name=config.get('retriever','MODEL')
     )
     # placeholder for collection
     qdrant_collections = {}
     print("embeddings started")
-    batch_size = 1000  # Adjust this value based on your system's memory capacity
     #for i in range(0, len(chunks_list), batch_size):
     #    print("embedding",(i+batch_size)/1000)
     #    batch_docs = chunks_list[i:i+batch_size]

     embeddings = HuggingFaceEmbeddings(
         model_kwargs = {'device': device},
         show_progress= True,
+        encode_kwargs = {'normalize_embeddings': bool(int(config.get('retriever','NORMALIZE'))),
+                        'batch_size':100},
         model_name=config.get('retriever','MODEL')
     )
     # placeholder for collection
     qdrant_collections = {}
     print("embeddings started")
+    #batch_size = 1000  # Adjust this value based on your system's memory capacity
     #for i in range(0, len(chunks_list), batch_size):
     #    print("embedding",(i+batch_size)/1000)
     #    batch_docs = chunks_list[i:i+batch_size]