|
import streamlit as st |
|
|
|
from transformers import pipeline |
|
from pinecone import Pinecone, ServerlessSpec |
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
|
|
bi_encoder = SentenceTransformer('msmarco-distilbert-base-v4') |
|
bi_encoder.max_seq_length = 256 |
|
|
|
|
|
INDEX_NAME = 'cl-search-idx' |
|
NAMESPACE = 'webpages' |
|
|
|
index = pc.Index(name=INDEX_NAME) |
|
|
|
def query_from_pinecone(index, question_embedding, top_k=3): |
|
|
|
|
|
return index.query( |
|
vector=question_embedding, |
|
top_k=top_k, |
|
namespace=NAMESPACE, |
|
include_metadata=True |
|
).get('matches') |
|
|
|
|
|
QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') |
|
question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True) |
|
resp= query_from_pinecone(question_embedding.tolist(), 3) |
|
docresult= resp[0]['metadata']['text'] |
|
|
|
|
|
st.json(out) |
|
|
|
|