File size: 1,163 Bytes
99a1d6d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import streamlit as st
from transformers import pipeline
from pinecone import Pinecone, ServerlessSpec
from sentence_transformers import SentenceTransformer, util
bi_encoder = SentenceTransformer('msmarco-distilbert-base-v4')
bi_encoder.max_seq_length = 256 # Truncate long documents to 256 tokens
# Store the index as a variable
INDEX_NAME = 'cl-search-idx'
NAMESPACE = 'webpages'
index = pc.Index(name=INDEX_NAME)
def query_from_pinecone(index, question_embedding, top_k=3):
# get embedding from THE SAME embedder as the documents
return index.query(
vector=question_embedding,
top_k=top_k,
namespace=NAMESPACE,
include_metadata=True # gets the metadata (dates, text, etc)
).get('matches')
QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
resp= query_from_pinecone(question_embedding.tolist(), 3)
docresult= resp[0]['metadata']['text']
#+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
st.json(out)
|