Changed to webpages namespace
Browse files
app.py
CHANGED
@@ -23,35 +23,7 @@ pc_api_key= '3f916d01-2a69-457d-85eb-966c5d1849a8' #AWS
|
|
23 |
pc = Pinecone(api_key=pc_api_key)
|
24 |
index = pc.Index(name=INDEX_NAME)
|
25 |
|
26 |
-
|
27 |
-
df_log=pd.read_csv('query.csv', index_col=0)
|
28 |
-
except:
|
29 |
-
df_log=pd.DataFrame(columns=['query','url', 'result', 'ts'])
|
30 |
-
|
31 |
-
def query_from_pinecone(index,namespace, question_embedding, top_k=3):
|
32 |
-
# get embedding from THE SAME embedder as the documents
|
33 |
-
|
34 |
-
return index.query(
|
35 |
-
vector=question_embedding,
|
36 |
-
top_k=top_k,
|
37 |
-
namespace=namespace,
|
38 |
-
include_metadata=True # gets the metadata (dates, text, etc)
|
39 |
-
).get('matches')
|
40 |
-
|
41 |
-
QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
|
42 |
-
|
43 |
-
if QUESTION:
|
44 |
-
question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
|
45 |
-
|
46 |
-
ns='pages'
|
47 |
-
resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
|
48 |
-
if len(resp)>0:
|
49 |
-
out= resp[0]['metadata']['data']
|
50 |
-
url= "Matching url "+resp[0]['id']
|
51 |
-
#+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
|
52 |
-
|
53 |
-
|
54 |
-
system_instructions_text='''
|
55 |
Your task is to extract the answer to a question from a body of text provided to you.
|
56 |
The body of text will be enclosed within the delimiter tags <text> and </text>
|
57 |
|
@@ -76,35 +48,69 @@ if QUESTION:
|
|
76 |
|
77 |
Your response should be based on the information contained in the provided text and should not included any other sources.
|
78 |
If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
|
79 |
-
Do repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
|
80 |
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
st.write(ans)
|
|
|
102 |
now= str(datetime.utcnow())
|
103 |
-
df_log.loc[len(df_log)]=[QUESTION,
|
104 |
-
df_log.to_csv(
|
105 |
|
106 |
else:
|
107 |
st.write("No matches for query")
|
108 |
now= str(datetime.utcnow())
|
109 |
-
df_log.loc[len(df_log)]=[QUESTION,'No match','-',now]
|
110 |
-
df_log.to_csv(
|
|
|
23 |
pc = Pinecone(api_key=pc_api_key)
|
24 |
index = pc.Index(name=INDEX_NAME)
|
25 |
|
26 |
+
system_instructions_text='''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
Your task is to extract the answer to a question from a body of text provided to you.
|
28 |
The body of text will be enclosed within the delimiter tags <text> and </text>
|
29 |
|
|
|
48 |
|
49 |
Your response should be based on the information contained in the provided text and should not included any other sources.
|
50 |
If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
|
51 |
+
Do not repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
|
52 |
'''
|
53 |
+
logfile='querylog.csv'
|
54 |
+
try:
|
55 |
+
df_log=pd.read_csv(logfile, index_col=0)
|
56 |
+
except:
|
57 |
+
df_log=pd.DataFrame(columns=['query','url','score','ans', 'ts'])
|
58 |
+
|
59 |
+
def query_from_pinecone(index,namespace, question_embedding, top_k=3):
|
60 |
+
# get embedding from THE SAME embedder as the documents
|
61 |
+
|
62 |
+
return index.query(
|
63 |
+
vector=question_embedding,
|
64 |
+
top_k=top_k,
|
65 |
+
namespace=namespace,
|
66 |
+
include_metadata=True # gets the metadata (dates, text, etc)
|
67 |
+
).get('matches')
|
68 |
+
|
69 |
+
QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
|
70 |
+
score=0
|
71 |
+
|
72 |
+
if QUESTION:
|
73 |
+
question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
|
74 |
|
75 |
+
ns='webpages'
|
76 |
+
resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
|
77 |
+
if len(resp)>0:
|
78 |
+
out= resp[0]['metadata']['text']
|
79 |
+
url= resp[0]['id']
|
80 |
+
score=resp[0]['score']
|
81 |
+
title='NA' #resp[0]['metadata']['title']
|
82 |
+
#+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
|
83 |
+
|
84 |
+
if score>.5:
|
85 |
+
client = OpenAI()
|
86 |
+
content="""
|
87 |
+
<text>
|
88 |
+
{}
|
89 |
+
</text>
|
90 |
+
""".format(out)
|
91 |
+
|
92 |
+
response = client.chat.completions.create(
|
93 |
+
model="gpt-3.5-turbo",
|
94 |
+
messages=[
|
95 |
+
{"role": "system", "content":system_instructions_text },
|
96 |
+
{"role": "user", "content": content},
|
97 |
+
{"role": "user", "content": "Question:"+QUESTION}
|
98 |
+
]
|
99 |
+
)
|
100 |
+
|
101 |
+
ans= response.choices[0].message.content
|
102 |
+
else:
|
103 |
+
ans='Weak match to your query. Please try reframing your question'
|
104 |
+
|
105 |
+
st.write("Matched URL:{} Score:{}".format(url,score))
|
106 |
st.write(ans)
|
107 |
+
|
108 |
now= str(datetime.utcnow())
|
109 |
+
df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
|
110 |
+
df_log.to_csv(logfile)
|
111 |
|
112 |
else:
|
113 |
st.write("No matches for query")
|
114 |
now= str(datetime.utcnow())
|
115 |
+
df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
|
116 |
+
df_log.to_csv(logfile)
|