ppsingh commited on
Commit
f5dac9b
·
verified ·
1 Parent(s): b60ea35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -1
app.py CHANGED
@@ -2,6 +2,9 @@ import streamlit as st
2
  import pandas as pd
3
  from langchain_text_splitters import TokenTextSplitter
4
  from langchain.docstore.document import Document
 
 
 
5
 
6
 
7
  st.set_page_config(page_title="SEARCH IATI",layout='wide')
@@ -49,8 +52,28 @@ def get_chunks():
49
  "status":giz_df.loc[i,'status'],
50
  "title_main":giz_df.loc[i,'title_main'],}))
51
  return placeholder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  chunks = get_chunks()
53
-
54
 
55
  button=st.button("search")
56
 
 
2
  import pandas as pd
3
  from langchain_text_splitters import TokenTextSplitter
4
  from langchain.docstore.document import Document
5
+ from torch import cuda
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceInferenceAPIEmbeddings
7
+ device = 'cuda' if cuda.is_available() else 'cpu'
8
 
9
 
10
  st.set_page_config(page_title="SEARCH IATI",layout='wide')
 
52
  "status":giz_df.loc[i,'status'],
53
  "title_main":giz_df.loc[i,'title_main'],}))
54
  return placeholder
55
+
56
+ def embed_chunks(chunks):
57
+ embeddings = HuggingFaceEmbeddings(
58
+ model_kwargs = {'device': device},
59
+ encode_kwargs = {'normalize_embeddings': True},
60
+ model_name='BAAI/bge-m3'
61
+ )
62
+ # placeholder for collection
63
+ qdrant_collections = {}
64
+ qdrant_collections['all'] = Qdrant.from_documents(
65
+ chunks,
66
+ embeddings,
67
+ path="/data/local_qdrant",
68
+ collection_name='all',
69
+ )
70
+
71
+ print(qdrant_collections)
72
+ print("vector embeddings done")
73
+ return qdrant_collections
74
+
75
  chunks = get_chunks()
76
+ qdrant_col = embed_chunks(chunks)
77
 
78
  button=st.button("search")
79