ppsingh commited on
Commit
3580bbe
·
verified ·
1 Parent(s): 51fc305

Update auditqa/process_chunks.py

Browse files
Files changed (1) hide show
  1. auditqa/process_chunks.py +3 -2
auditqa/process_chunks.py CHANGED
@@ -62,13 +62,14 @@ def load_chunks():
62
  embeddings = HuggingFaceEmbeddings(
63
  model_kwargs = {'device': device},
64
  show_progress= True,
65
- encode_kwargs = {'normalize_embeddings': bool(int(config.get('retriever','NORMALIZE'))),},
 
66
  model_name=config.get('retriever','MODEL')
67
  )
68
  # placeholder for collection
69
  qdrant_collections = {}
70
  print("embeddings started")
71
- batch_size = 1000 # Adjust this value based on your system's memory capacity
72
  #for i in range(0, len(chunks_list), batch_size):
73
  # print("embedding",(i+batch_size)/1000)
74
  # batch_docs = chunks_list[i:i+batch_size]
 
62
  embeddings = HuggingFaceEmbeddings(
63
  model_kwargs = {'device': device},
64
  show_progress= True,
65
+ encode_kwargs = {'normalize_embeddings': bool(int(config.get('retriever','NORMALIZE'))),
66
+ 'batch_size':100},
67
  model_name=config.get('retriever','MODEL')
68
  )
69
  # placeholder for collection
70
  qdrant_collections = {}
71
  print("embeddings started")
72
+ #batch_size = 1000 # Adjust this value based on your system's memory capacity
73
  #for i in range(0, len(chunks_list), batch_size):
74
  # print("embedding",(i+batch_size)/1000)
75
  # batch_docs = chunks_list[i:i+batch_size]