chat

Sleeping

App Files Files Community

sujitb commited on Apr 24

Commit

6d6a553

•

1 Parent(s): 6b136b5

major changes - added Tavily

Browse files

Files changed (1) hide show

app.py +80 -85

app.py CHANGED Viewed

@@ -116,24 +116,41 @@ def response_generator(response):
         yield word + " "
         time.sleep(0.05)
-def write_log(query,url, score, ans, ts):
-    # Construct new row
-    score = str(score)
-    ans = ans.replace('"', '""')
-    new_row = f'\n"{query}","{url}",{score},"{ans}","{ts}"'
-    with fs.open("datasets/sujitb/data/querylog.csv", "r") as f:
-        buffer = f.read()
-    # Append the new row to buffer
-    buffer += new_row
-    # Write the buffer to the file in "W" mode
-    with fs.open("datasets/sujitb/data/querylog.csv", "w",encoding="utf-8") as f:
-        f.write(buffer)
     return
 logfile='querylog.csv'
 qlist=[
@@ -185,6 +202,9 @@ QUESTION = st.chat_input('Ask a question -e.g How to prepare for Verbal section
 #QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?')  ##' How to prepare for Verbal section ?'
 score=0
 testing=True
 ext_url=''
@@ -231,8 +251,9 @@ if QUESTION:
             d['url']= r['metadata']['url']
             d['score']=r['score']
             resplist.append(d)
-    if len(resplist)>0:
         sorted_indices = sorted(range(len(resplist)), key=lambda i: resplist[i]['score'], reverse=True)
         # Get the elements with the top 2 highest values
@@ -242,9 +263,11 @@ if QUESTION:
         json_data = json.dumps(top_2)
         goodmatch=False
-        if resplist[sorted_indices[0]]['score']>=THRESHOLD:
             st.write('Preparing answers...')
             goodmatch=True
             mode = "two"  # two passages
@@ -275,27 +298,11 @@ if QUESTION:
             )
             ans= response.choices[0].message.content
-        else:
-            ans='Weak match to your query. Please try reframing your question'
-            ## Call Tavily
-            tavily_key= os.environ["TAVILY_KEY"]
-            tavily = TavilyClient(api_key=tavily_key)
-            success= 0
-            while success<3:
-                success+=1
-                try:
-                    resp=tavily.search(query=QUESTION)
-                    with st.chat_message("assistant"):
-                        ans=resp['results'][0]['content']
-                    ext_url= resp['results'][0]['url']
-                    break
-                except:
-                    pass
             #st.write("Matched URL:{}  Score:{}".format(url,score))
             testing = False
@@ -304,54 +311,42 @@ if QUESTION:
                     st.write("2nd Matched URL:{}  Score:{}".format(resp[1]['id'],resp[1]['score']))
                 if len(resp)>=2:
                     st.write("3rd Matched URL:{}  Score:{}".format(resp[2]['id'],resp[2]['score']))
-        ##  Send RESPONSE
-        with st.chat_message("assistant"):
-            response = st.write_stream(response_generator(ans))
-            if goodmatch:
-                st.write('Resources:')
-                for k in range(2):
-                    disp_title=top_2[k]['title']
-                    disp_url= top_2[k]['url']
-                    if 'youtube' in disp_url:
-                        disp_title='Youtube: '+disp_title
-                    if k>0:
-                        if top_2[k]['url']==top_2[k-1]['url']:
-                            break
-                    st.write("["+disp_title+"]("+disp_url+")")
-            else: # not a good match
-                if len(ext_url)>5:
-                    st.write('External Site:',ext_url)
-                #st.write(top_2[0]['url'])
-            # Add assistant response to chat history
-            st.session_state.messages.append({"role": "assistant", "content": response})
-        #st.write(ans)
-        #st.write(' ----------------------')
-        #st.write(out)
-        now= str(datetime.utcnow())
-        url = top_2[0]['url'] + ' ; '+top_2[1]['url']
-        df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
-        #write_log(QUESTION,url, score, ans, now)
-        #df.to_csv("hf://datasets/sujitb/data/test.csv")
-        storage_options={"token":token}
-        df_log.to_csv("hf://datasets/sujitb/data/"+logfile,storage_options= storage_options)
-    else:  ## Zero response from pinecone query
-        #st.write("No matches for query")
-        ans= "No matches for query"
         response = st.write_stream(response_generator(ans))
         # Add assistant response to chat history
         st.session_state.messages.append({"role": "assistant", "content": response})
-        now= str(datetime.utcnow())
-        df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
-        storage_options={"token":token}
-        df_log.to_csv("hf://datasets/sujitb/data/"+logfile,storage_options= storage_options)
-        #write_log(QUESTION,'No match', 0, '-', now)

         yield word + " "
         time.sleep(0.05)
+def write_log(QUESTION,url, score, ans):
+    token = os.getenv('FILE_TOKEN')
+    now= str(datetime.utcnow())
+    df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
+    #df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
+    storage_options={"token":token}
+    df_log.to_csv("hf://datasets/sujitb/data/"+logfile,storage_options= storage_options)
     return
+def getTavilyResponse(QUESTION):
+    ## Call Tavily
+    tavily_key= os.environ["TAVILY_KEY"]
+    tavily = TavilyClient(api_key=tavily_key)
+    ans=''
+    ext_url=''
+    success= 0
+    while success<3:
+        success+=1
+        try:
+            resp=tavily.search(query=QUESTION)
+            ans=resp['results'][0]['content']
+            ext_url= resp['results'][0]['url']
+            break
+        except:
+            time.sleep(1)
+    return ans, ext_url
 logfile='querylog.csv'
 qlist=[
 #QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?')  ##' How to prepare for Verbal section ?'
 score=0
+top_score=0
+top_url=''
 testing=True
 ext_url=''
             d['url']= r['metadata']['url']
             d['score']=r['score']
             resplist.append(d)
+    if len(resplist)==0:
+        top_score=-1
+    elif len(resplist)>0:
         sorted_indices = sorted(range(len(resplist)), key=lambda i: resplist[i]['score'], reverse=True)
         # Get the elements with the top 2 highest values
         json_data = json.dumps(top_2)
+        top_score=resplist[sorted_indices[0]]['score']
+        top_url=resplist[sorted_indices[0]]['url']
         goodmatch=False
+        if top_score>=THRESHOLD:
             st.write('Preparing answers...')
             goodmatch=True
             mode = "two"  # two passages
             )
             ans= response.choices[0].message.content
+        else: ### BELOW THRESHOLD
+            #ans='Weak match to your query. Please try reframing your question'
+            ## get Tavily
+            ans, ext_url= getTavilyResponse(QUESTION)
             #st.write("Matched URL:{}  Score:{}".format(url,score))
             testing = False
                     st.write("2nd Matched URL:{}  Score:{}".format(resp[1]['id'],resp[1]['score']))
                 if len(resp)>=2:
                     st.write("3rd Matched URL:{}  Score:{}".format(resp[2]['id'],resp[2]['score']))
+    if goodmatch==False:  ## Either no match or Weak match
+        ## get Tavily
+        ans, ext_url= getTavilyResponse(QUESTION)
+        if len(ext_url)==0 :
+            ans="Unable to match your query. To get best matches mention clearly what information you seek. "
+        else:
+            top_url=ext_url
+    ##  Send RESPONSE
+    with st.chat_message("assistant"):
         response = st.write_stream(response_generator(ans))
+        if goodmatch:
+            st.write('Resources:')
+            for k in range(2):
+                disp_title=top_2[k]['title']
+                disp_url= top_2[k]['url']
+                if 'youtube' in disp_url:
+                    disp_title='Youtube: '+disp_title
+                if k>0:
+                    if top_2[k]['url']==top_2[k-1]['url']:
+                        break
+                st.write("["+disp_title+"]("+disp_url+")")
+        else: # not a good match
+            url='No match'
+            if len(ext_url)>5:
+                st.write('External Site:',ext_url)
+                url=ext_url
+                #st.write(top_2[0]['url'])
         # Add assistant response to chat history
         st.session_state.messages.append({"role": "assistant", "content": response})
+    ### log the result
+    write_log(QUESTION,top_url, top_score, ans)