Spaces:
Build error
Build error
import logging | |
from pathlib import Path | |
import time | |
import lancedb | |
from sentence_transformers import SentenceTransformer | |
# Setting up the logging | |
logging.basicConfig(level=logging.INFO) | |
logger = logging.getLogger(__name__) | |
# Start the timer for loading the QdrantDocumentStore | |
start_time = time.perf_counter() | |
proj_dir = Path(__file__).parents[1] | |
# Log the time taken to load the QdrantDocumentStore | |
db = lancedb.connect(proj_dir/"lancedb") | |
tbl = db.open_table('arabic-wiki') | |
lancedb_loading_time = time.perf_counter() - start_time | |
logger.info(f"Time taken to load LanceDB: {lancedb_loading_time:.6f} seconds") | |
# Start the timer for loading the EmbeddingRetriever | |
start_time = time.perf_counter() | |
name="sentence-transformers/paraphrase-multilingual-minilm-l12-v2" | |
st_model = SentenceTransformer(name, device='cuda') | |
# used for both training and querying | |
def embed_func(query): | |
return st_model.encode(query) | |
def vector_search(query_vector, top_k): | |
return tbl.search(query_vector).limit(top_k).to_list() | |
def retriever(query, top_k=3): | |
query_vector = embed_func(query) | |
documents = vector_search(query_vector, top_k) | |
return documents | |
# Log the time taken to load the EmbeddingRetriever | |
retriever_loading_time = time.perf_counter() - start_time | |
logger.info(f"Time taken to load EmbeddingRetriever: {retriever_loading_time:.6f} seconds") |