chat

Sleeping

App Files Files Community

sujitb commited on Mar 31, 2024

Commit

976a040

verified ·

1 Parent(s): 8560ab0

Chat

Browse files

Files changed (1) hide show

app.py +70 -9

app.py CHANGED Viewed

@@ -7,6 +7,13 @@ from openai import OpenAI
 from datetime import datetime
 import pandas as pd
 import os
 api_key='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'
 os.environ["OPENAI_API_KEY"] = api_key
@@ -48,7 +55,7 @@ system_instructions_text='''
           Your response should be based on the information contained in the provided text and should not included any other sources.
           If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
-           Do not repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
           '''
 logfile='querylog.csv'
 try:
@@ -66,19 +73,53 @@ def query_from_pinecone(index,namespace, question_embedding, top_k=3):
       include_metadata=True   # gets the metadata (dates, text, etc)
     ).get('matches')
-QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?')  ##' How to prepare for Verbal section ?'
 score=0
 if QUESTION:
     question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
     ns='webpages'
     resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
     if len(resp)>0:
-        out= resp[0]['metadata']['text']
         url= resp[0]['id']
         score=resp[0]['score']
-        title='NA' #resp[0]['metadata']['title']
         #+ '\n*************\n'+  resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
         if score>.5:
@@ -102,15 +143,35 @@ if QUESTION:
         else:
             ans='Weak match to your query. Please try reframing your question'
-        st.write("Matched URL:{}  Score:{}".format(url,score))
-        st.write(ans)
         now= str(datetime.utcnow())
         df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
-        df_log.to_csv(logfile)
     else:
-        st.write("No matches for query")
         now= str(datetime.utcnow())
         df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
-        df_log.to_csv(logfile)

 from datetime import datetime
 import pandas as pd
 import os
+from huggingface_hub import HfFileSystem
+fs = HfFileSystem()
+import time
 api_key='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'
 os.environ["OPENAI_API_KEY"] = api_key
           Your response should be based on the information contained in the provided text and should not included any other sources.
           If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
+           Do repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
           '''
 logfile='querylog.csv'
 try:
       include_metadata=True   # gets the metadata (dates, text, etc)
     ).get('matches')
+def response_generator(response):
+    for word in response.split():
+        yield word + " "
+        time.sleep(0.05)
+st.title('CLLM Answering Machine')
+# Initialize chat history
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# Display chat messages from history on app rerun
+for message in st.session_state.messages:
+    with st.chat_message(message["role"]):
+        st.markdown(message["content"])
+QUESTION = st.chat_input('Ask a question -e.g How to prepare for Verbal section for CAT?')
+#QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?')  ##' How to prepare for Verbal section ?'
 score=0
+testing=True
 if QUESTION:
+    with st.chat_message("user"):
+        st.markdown(QUESTION)
+    # Add user message to chat history
+    st.session_state.messages.append({"role": "user", "content": QUESTION})
     question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
     ns='webpages'
+    ns='full'
     resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
     if len(resp)>0:
+        #st.write("Got results...extracting an answer")
+        out= resp[0]['metadata']['data']
+        try:
+            url= resp[0]['metadata']['url']
+        except:
+            url= resp[0]['id']
         url= resp[0]['id']
         score=resp[0]['score']
+        title=resp[0]['metadata']['title']
         #+ '\n*************\n'+  resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
         if score>.5:
         else:
             ans='Weak match to your query. Please try reframing your question'
+        #st.write("Matched URL:{}  Score:{}".format(url,score))
+        testing = False
+        if testing:
+            if len(resp)>=1:
+                st.write("2nd Matched URL:{}  Score:{}".format(resp[1]['id'],resp[1]['score']))
+            if len(resp)>=2:
+                st.write("3rd Matched URL:{}  Score:{}".format(resp[2]['id'],resp[2]['score']))
+        with st.chat_message("assistant"):
+            response = st.write_stream(response_generator(ans))
+            # Add assistant response to chat history
+            st.session_state.messages.append({"role": "assistant", "content": response})
+        #st.write(ans)
+        #st.write(' ----------------------')
+        #st.write(out)
         now= str(datetime.utcnow())
         df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
+        #df.to_csv("hf://datasets/sujitb/data/test.csv")
+        df_log.to_csv("hf://datasets/sujitb/data/"+logfile)
     else:
+        #st.write("No matches for query")
+        ans= "No matches for query"
+        response = st.write_stream(response_generator(ans))
+        # Add assistant response to chat history
+        st.session_state.messages.append({"role": "assistant", "content": response})
         now= str(datetime.utcnow())
         df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
+        df_log.to_csv("hf://datasets/sujitb/data/"+logfile)