import streamlit as st from transformers import pipeline from pinecone import Pinecone, ServerlessSpec from sentence_transformers import SentenceTransformer, util bi_encoder = SentenceTransformer('msmarco-distilbert-base-v4') bi_encoder.max_seq_length = 256 # Truncate long documents to 256 tokens # Store the index as a variable INDEX_NAME = 'cl-search-idx' NAMESPACE = 'webpages' index = pc.Index(name=INDEX_NAME) def query_from_pinecone(index, question_embedding, top_k=3): # get embedding from THE SAME embedder as the documents return index.query( vector=question_embedding, top_k=top_k, namespace=NAMESPACE, include_metadata=True # gets the metadata (dates, text, etc) ).get('matches') QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?' question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True) resp= query_from_pinecone(question_embedding.tolist(), 3) docresult= resp[0]['metadata']['text'] #+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text'] st.json(out)