|
import streamlit as st |
|
|
|
|
|
from pinecone import Pinecone, ServerlessSpec |
|
from sentence_transformers import SentenceTransformer, util |
|
from openai import OpenAI |
|
from datetime import datetime |
|
import pandas as pd |
|
import os |
|
api_key='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' |
|
|
|
os.environ["OPENAI_API_KEY"] = api_key |
|
|
|
os.environ.get("OPENAI_API_KEY") |
|
|
|
bi_encoder = SentenceTransformer('msmarco-distilbert-base-v4') |
|
bi_encoder.max_seq_length = 256 |
|
|
|
|
|
INDEX_NAME = 'cl-search-idx' |
|
|
|
pc_api_key= '3f916d01-2a69-457d-85eb-966c5d1849a8' |
|
pc = Pinecone(api_key=pc_api_key) |
|
index = pc.Index(name=INDEX_NAME) |
|
|
|
try: |
|
df_log=pd.read_csv('query.csv', index_col=0) |
|
except: |
|
df_log=pd.DataFrame(columns=['query','url', 'result', 'ts']) |
|
|
|
def query_from_pinecone(index,namespace, question_embedding, top_k=3): |
|
|
|
|
|
return index.query( |
|
vector=question_embedding, |
|
top_k=top_k, |
|
namespace=namespace, |
|
include_metadata=True |
|
).get('matches') |
|
|
|
QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') |
|
|
|
if QUESTION: |
|
question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True) |
|
|
|
ns='full' |
|
resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3) |
|
if len(resp)>0: |
|
out= resp[0]['metadata']['data'] |
|
url= "Matching url "+resp[0]['id'] |
|
|
|
|
|
|
|
system_instructions_text=''' |
|
Your task is to extract the answer to a question from a body of text provided to you. |
|
The body of text will be enclosed within the delimiter tags <text> and </text> |
|
|
|
For example, |
|
<text> General Preparation Tips for VARC Section: |
|
|
|
You need to develop an incessant habit of speed reading. |
|
Start with reading newspapers, editorials, fiction and nonfiction novels and simple passages. |
|
The more you read, the faster you read. Learn the basic grammar concepts like parts of speech, articles,verbs, adjectives, tenses, auxiliary verbs, modifiers, modals etc. |
|
Revise at least 50 new words every day |
|
</text> |
|
|
|
Question: What are some tips for preparing for VARC? |
|
Here are some tips for preparing for the VARC section: |
|
1. develop an incessant habit of speed reading |
|
2. Start reading newspapers, editorials, fiction and nonfiction novels |
|
3. Learn basic grammar concepts\n |
|
4. Revise at least 50 new words a day |
|
|
|
Question: How many new words are to be learnt in a day? |
|
It is advised that 50 new words are learn every day |
|
|
|
Your response should be based on the information contained in the provided text and should not included any other sources. |
|
If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this" |
|
Do repeat the question. Do not make a pointed reference to the text provided. Directly answer the question |
|
''' |
|
|
|
client = OpenAI() |
|
content=""" |
|
<text> |
|
{} |
|
</text> |
|
""".format(out) |
|
|
|
response = client.chat.completions.create( |
|
model="gpt-3.5-turbo", |
|
messages=[ |
|
{"role": "system", "content":system_instructions_text }, |
|
{"role": "user", "content": content}, |
|
{"role": "user", "content": "Question:"+QUESTION} |
|
] |
|
) |
|
|
|
ans= response.choices[0].message.content |
|
|
|
st.write(url) |
|
st.write(ans) |
|
now= str(datetime.utcnow()) |
|
df_log.loc[len(df_log)]=[QUESTION,resp[0]['id'],ans,now] |
|
df_log.to_csv('query.csv') |
|
|
|
else: |
|
st.write("No matches for query") |
|
now= str(datetime.utcnow()) |
|
df_log.loc[len(df_log)]=[QUESTION,'No match','-',now] |
|
df_log.to_csv('query.csv') |
|
|