chat

Sleeping

App Files Files Community

sujitb commited on Mar 29, 2024

Commit

8560ab0

verified ·

1 Parent(s): 9b74006

Changed to webpages namespace

Browse files

Files changed (1) hide show

app.py +59 -53

app.py CHANGED Viewed

@@ -23,35 +23,7 @@ pc_api_key= '3f916d01-2a69-457d-85eb-966c5d1849a8'  #AWS
 pc = Pinecone(api_key=pc_api_key)
 index = pc.Index(name=INDEX_NAME)
-try:
-    df_log=pd.read_csv('query.csv', index_col=0)
-except:
-    df_log=pd.DataFrame(columns=['query','url', 'result', 'ts'])
-def query_from_pinecone(index,namespace, question_embedding, top_k=3):
-    # get embedding from THE SAME embedder as the documents
-    return index.query(
-      vector=question_embedding,
-      top_k=top_k,
-      namespace=namespace,
-      include_metadata=True   # gets the metadata (dates, text, etc)
-    ).get('matches')
-QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?')  ##' How to prepare for Verbal section ?'
-if QUESTION:
-    question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
-    ns='pages'
-    resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
-    if len(resp)>0:
-        out= resp[0]['metadata']['data']
-        url= "Matching url "+resp[0]['id']
-        #+ '\n*************\n'+  resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
-        system_instructions_text='''
           Your task is to extract the answer to a question from a body of text provided to you.
           The body of text will be enclosed within the delimiter tags  <text> and </text>
@@ -76,35 +48,69 @@ if QUESTION:
           Your response should be based on the information contained in the provided text and should not included any other sources.
           If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
-           Do repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
           '''
-        client = OpenAI()
-        content="""
-        <text>
-        {}
-        </text>
-        """.format(out)
-        response = client.chat.completions.create(
-          model="gpt-3.5-turbo",
-          messages=[
-            {"role": "system", "content":system_instructions_text },
-            {"role": "user", "content": content},
-            {"role": "user", "content": "Question:"+QUESTION}
-          ]
-        )
-        ans= response.choices[0].message.content
-        st.write(url)
         st.write(ans)
         now= str(datetime.utcnow())
-        df_log.loc[len(df_log)]=[QUESTION,resp[0]['id'],ans,now]
-        df_log.to_csv('query.csv')
     else:
         st.write("No matches for query")
         now= str(datetime.utcnow())
-        df_log.loc[len(df_log)]=[QUESTION,'No match','-',now]
-        df_log.to_csv('query.csv')

 pc = Pinecone(api_key=pc_api_key)
 index = pc.Index(name=INDEX_NAME)
+system_instructions_text='''
           Your task is to extract the answer to a question from a body of text provided to you.
           The body of text will be enclosed within the delimiter tags  <text> and </text>
           Your response should be based on the information contained in the provided text and should not included any other sources.
           If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
+           Do not repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
           '''
+logfile='querylog.csv'
+try:
+    df_log=pd.read_csv(logfile, index_col=0)
+except:
+    df_log=pd.DataFrame(columns=['query','url','score','ans', 'ts'])
+def query_from_pinecone(index,namespace, question_embedding, top_k=3):
+    # get embedding from THE SAME embedder as the documents
+    return index.query(
+      vector=question_embedding,
+      top_k=top_k,
+      namespace=namespace,
+      include_metadata=True   # gets the metadata (dates, text, etc)
+    ).get('matches')
+QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?')  ##' How to prepare for Verbal section ?'
+score=0
+if QUESTION:
+    question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
+    ns='webpages'
+    resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
+    if len(resp)>0:
+        out= resp[0]['metadata']['text']
+        url= resp[0]['id']
+        score=resp[0]['score']
+        title='NA' #resp[0]['metadata']['title']
+        #+ '\n*************\n'+  resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
+        if score>.5:
+            client = OpenAI()
+            content="""
+            <text>
+            {}
+            </text>
+            """.format(out)
+            response = client.chat.completions.create(
+              model="gpt-3.5-turbo",
+              messages=[
+                {"role": "system", "content":system_instructions_text },
+                {"role": "user", "content": content},
+                {"role": "user", "content": "Question:"+QUESTION}
+              ]
+            )
+            ans= response.choices[0].message.content
+        else:
+            ans='Weak match to your query. Please try reframing your question'
+        st.write("Matched URL:{}  Score:{}".format(url,score))
         st.write(ans)
         now= str(datetime.utcnow())
+        df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
+        df_log.to_csv(logfile)
     else:
         st.write("No matches for query")
         now= str(datetime.utcnow())
+        df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
+        df_log.to_csv(logfile)