sujitb commited on
Commit
8560ab0
1 Parent(s): 9b74006

Changed to webpages namespace

Browse files
Files changed (1) hide show
  1. app.py +59 -53
app.py CHANGED
@@ -23,35 +23,7 @@ pc_api_key= '3f916d01-2a69-457d-85eb-966c5d1849a8' #AWS
23
  pc = Pinecone(api_key=pc_api_key)
24
  index = pc.Index(name=INDEX_NAME)
25
 
26
- try:
27
- df_log=pd.read_csv('query.csv', index_col=0)
28
- except:
29
- df_log=pd.DataFrame(columns=['query','url', 'result', 'ts'])
30
-
31
- def query_from_pinecone(index,namespace, question_embedding, top_k=3):
32
- # get embedding from THE SAME embedder as the documents
33
-
34
- return index.query(
35
- vector=question_embedding,
36
- top_k=top_k,
37
- namespace=namespace,
38
- include_metadata=True # gets the metadata (dates, text, etc)
39
- ).get('matches')
40
-
41
- QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
42
-
43
- if QUESTION:
44
- question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
45
-
46
- ns='pages'
47
- resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
48
- if len(resp)>0:
49
- out= resp[0]['metadata']['data']
50
- url= "Matching url "+resp[0]['id']
51
- #+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
52
-
53
-
54
- system_instructions_text='''
55
  Your task is to extract the answer to a question from a body of text provided to you.
56
  The body of text will be enclosed within the delimiter tags <text> and </text>
57
 
@@ -76,35 +48,69 @@ if QUESTION:
76
 
77
  Your response should be based on the information contained in the provided text and should not included any other sources.
78
  If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
79
- Do repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
80
  '''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- client = OpenAI()
83
- content="""
84
- <text>
85
- {}
86
- </text>
87
- """.format(out)
88
-
89
- response = client.chat.completions.create(
90
- model="gpt-3.5-turbo",
91
- messages=[
92
- {"role": "system", "content":system_instructions_text },
93
- {"role": "user", "content": content},
94
- {"role": "user", "content": "Question:"+QUESTION}
95
- ]
96
- )
97
-
98
- ans= response.choices[0].message.content
99
-
100
- st.write(url)
 
 
 
 
 
 
 
 
 
 
 
 
101
  st.write(ans)
 
102
  now= str(datetime.utcnow())
103
- df_log.loc[len(df_log)]=[QUESTION,resp[0]['id'],ans,now]
104
- df_log.to_csv('query.csv')
105
 
106
  else:
107
  st.write("No matches for query")
108
  now= str(datetime.utcnow())
109
- df_log.loc[len(df_log)]=[QUESTION,'No match','-',now]
110
- df_log.to_csv('query.csv')
 
23
  pc = Pinecone(api_key=pc_api_key)
24
  index = pc.Index(name=INDEX_NAME)
25
 
26
+ system_instructions_text='''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  Your task is to extract the answer to a question from a body of text provided to you.
28
  The body of text will be enclosed within the delimiter tags <text> and </text>
29
 
 
48
 
49
  Your response should be based on the information contained in the provided text and should not included any other sources.
50
  If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
51
+ Do not repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
52
  '''
53
+ logfile='querylog.csv'
54
+ try:
55
+ df_log=pd.read_csv(logfile, index_col=0)
56
+ except:
57
+ df_log=pd.DataFrame(columns=['query','url','score','ans', 'ts'])
58
+
59
+ def query_from_pinecone(index,namespace, question_embedding, top_k=3):
60
+ # get embedding from THE SAME embedder as the documents
61
+
62
+ return index.query(
63
+ vector=question_embedding,
64
+ top_k=top_k,
65
+ namespace=namespace,
66
+ include_metadata=True # gets the metadata (dates, text, etc)
67
+ ).get('matches')
68
+
69
+ QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
70
+ score=0
71
+
72
+ if QUESTION:
73
+ question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
74
 
75
+ ns='webpages'
76
+ resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
77
+ if len(resp)>0:
78
+ out= resp[0]['metadata']['text']
79
+ url= resp[0]['id']
80
+ score=resp[0]['score']
81
+ title='NA' #resp[0]['metadata']['title']
82
+ #+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
83
+
84
+ if score>.5:
85
+ client = OpenAI()
86
+ content="""
87
+ <text>
88
+ {}
89
+ </text>
90
+ """.format(out)
91
+
92
+ response = client.chat.completions.create(
93
+ model="gpt-3.5-turbo",
94
+ messages=[
95
+ {"role": "system", "content":system_instructions_text },
96
+ {"role": "user", "content": content},
97
+ {"role": "user", "content": "Question:"+QUESTION}
98
+ ]
99
+ )
100
+
101
+ ans= response.choices[0].message.content
102
+ else:
103
+ ans='Weak match to your query. Please try reframing your question'
104
+
105
+ st.write("Matched URL:{} Score:{}".format(url,score))
106
  st.write(ans)
107
+
108
  now= str(datetime.utcnow())
109
+ df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
110
+ df_log.to_csv(logfile)
111
 
112
  else:
113
  st.write("No matches for query")
114
  now= str(datetime.utcnow())
115
+ df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
116
+ df_log.to_csv(logfile)