sujitb commited on
Commit
6d6a553
1 Parent(s): 6b136b5

major changes - added Tavily

Browse files
Files changed (1) hide show
  1. app.py +80 -85
app.py CHANGED
@@ -116,24 +116,41 @@ def response_generator(response):
116
  yield word + " "
117
  time.sleep(0.05)
118
 
119
- def write_log(query,url, score, ans, ts):
120
- # Construct new row
121
- score = str(score)
122
- ans = ans.replace('"', '""')
123
- new_row = f'\n"{query}","{url}",{score},"{ans}","{ts}"'
124
 
125
- with fs.open("datasets/sujitb/data/querylog.csv", "r") as f:
126
- buffer = f.read()
127
-
128
- # Append the new row to buffer
129
- buffer += new_row
130
-
131
- # Write the buffer to the file in "W" mode
132
- with fs.open("datasets/sujitb/data/querylog.csv", "w",encoding="utf-8") as f:
133
- f.write(buffer)
134
 
135
  return
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  logfile='querylog.csv'
138
 
139
  qlist=[
@@ -185,6 +202,9 @@ QUESTION = st.chat_input('Ask a question -e.g How to prepare for Verbal section
185
 
186
  #QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
187
  score=0
 
 
 
188
  testing=True
189
  ext_url=''
190
 
@@ -231,8 +251,9 @@ if QUESTION:
231
  d['url']= r['metadata']['url']
232
  d['score']=r['score']
233
  resplist.append(d)
234
-
235
- if len(resplist)>0:
 
236
  sorted_indices = sorted(range(len(resplist)), key=lambda i: resplist[i]['score'], reverse=True)
237
 
238
  # Get the elements with the top 2 highest values
@@ -242,9 +263,11 @@ if QUESTION:
242
 
243
  json_data = json.dumps(top_2)
244
 
245
-
 
 
246
  goodmatch=False
247
- if resplist[sorted_indices[0]]['score']>=THRESHOLD:
248
  st.write('Preparing answers...')
249
  goodmatch=True
250
  mode = "two" # two passages
@@ -275,27 +298,11 @@ if QUESTION:
275
  )
276
 
277
  ans= response.choices[0].message.content
278
- else:
279
- ans='Weak match to your query. Please try reframing your question'
280
-
281
- ## Call Tavily
282
- tavily_key= os.environ["TAVILY_KEY"]
283
-
284
- tavily = TavilyClient(api_key=tavily_key)
285
-
286
- success= 0
287
- while success<3:
288
- success+=1
289
- try:
290
- resp=tavily.search(query=QUESTION)
291
- with st.chat_message("assistant"):
292
- ans=resp['results'][0]['content']
293
-
294
- ext_url= resp['results'][0]['url']
295
- break
296
- except:
297
- pass
298
-
299
 
300
  #st.write("Matched URL:{} Score:{}".format(url,score))
301
  testing = False
@@ -304,54 +311,42 @@ if QUESTION:
304
  st.write("2nd Matched URL:{} Score:{}".format(resp[1]['id'],resp[1]['score']))
305
  if len(resp)>=2:
306
  st.write("3rd Matched URL:{} Score:{}".format(resp[2]['id'],resp[2]['score']))
307
-
308
-
309
- ## Send RESPONSE
310
- with st.chat_message("assistant"):
311
- response = st.write_stream(response_generator(ans))
312
- if goodmatch:
313
- st.write('Resources:')
314
- for k in range(2):
315
- disp_title=top_2[k]['title']
316
- disp_url= top_2[k]['url']
317
- if 'youtube' in disp_url:
318
- disp_title='Youtube: '+disp_title
319
-
320
- if k>0:
321
- if top_2[k]['url']==top_2[k-1]['url']:
322
- break
323
- st.write("["+disp_title+"]("+disp_url+")")
324
-
325
- else: # not a good match
326
 
327
- if len(ext_url)>5:
328
- st.write('External Site:',ext_url)
329
- #st.write(top_2[0]['url'])
330
- # Add assistant response to chat history
331
- st.session_state.messages.append({"role": "assistant", "content": response})
332
- #st.write(ans)
333
-
334
- #st.write(' ----------------------')
335
- #st.write(out)
336
-
337
- now= str(datetime.utcnow())
338
- url = top_2[0]['url'] + ' ; '+top_2[1]['url']
339
-
340
- df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
341
- #write_log(QUESTION,url, score, ans, now)
342
- #df.to_csv("hf://datasets/sujitb/data/test.csv")
343
- storage_options={"token":token}
344
- df_log.to_csv("hf://datasets/sujitb/data/"+logfile,storage_options= storage_options)
345
-
346
- else: ## Zero response from pinecone query
347
- #st.write("No matches for query")
348
- ans= "No matches for query"
349
  response = st.write_stream(response_generator(ans))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  # Add assistant response to chat history
351
  st.session_state.messages.append({"role": "assistant", "content": response})
352
 
353
- now= str(datetime.utcnow())
354
- df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
355
- storage_options={"token":token}
356
- df_log.to_csv("hf://datasets/sujitb/data/"+logfile,storage_options= storage_options)
357
- #write_log(QUESTION,'No match', 0, '-', now)
 
116
  yield word + " "
117
  time.sleep(0.05)
118
 
119
+ def write_log(QUESTION,url, score, ans):
120
+ token = os.getenv('FILE_TOKEN')
121
+ now= str(datetime.utcnow())
 
 
122
 
123
+ df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
124
+
125
+ #df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
126
+ storage_options={"token":token}
127
+ df_log.to_csv("hf://datasets/sujitb/data/"+logfile,storage_options= storage_options)
 
 
 
 
128
 
129
  return
130
 
131
+ def getTavilyResponse(QUESTION):
132
+ ## Call Tavily
133
+ tavily_key= os.environ["TAVILY_KEY"]
134
+
135
+ tavily = TavilyClient(api_key=tavily_key)
136
+ ans=''
137
+ ext_url=''
138
+ success= 0
139
+ while success<3:
140
+ success+=1
141
+ try:
142
+ resp=tavily.search(query=QUESTION)
143
+ ans=resp['results'][0]['content']
144
+
145
+ ext_url= resp['results'][0]['url']
146
+ break
147
+ except:
148
+ time.sleep(1)
149
+
150
+ return ans, ext_url
151
+
152
+
153
+
154
  logfile='querylog.csv'
155
 
156
  qlist=[
 
202
 
203
  #QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
204
  score=0
205
+ top_score=0
206
+ top_url=''
207
+
208
  testing=True
209
  ext_url=''
210
 
 
251
  d['url']= r['metadata']['url']
252
  d['score']=r['score']
253
  resplist.append(d)
254
+ if len(resplist)==0:
255
+ top_score=-1
256
+ elif len(resplist)>0:
257
  sorted_indices = sorted(range(len(resplist)), key=lambda i: resplist[i]['score'], reverse=True)
258
 
259
  # Get the elements with the top 2 highest values
 
263
 
264
  json_data = json.dumps(top_2)
265
 
266
+ top_score=resplist[sorted_indices[0]]['score']
267
+ top_url=resplist[sorted_indices[0]]['url']
268
+
269
  goodmatch=False
270
+ if top_score>=THRESHOLD:
271
  st.write('Preparing answers...')
272
  goodmatch=True
273
  mode = "two" # two passages
 
298
  )
299
 
300
  ans= response.choices[0].message.content
301
+ else: ### BELOW THRESHOLD
302
+ #ans='Weak match to your query. Please try reframing your question'
303
+
304
+ ## get Tavily
305
+ ans, ext_url= getTavilyResponse(QUESTION)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
 
307
  #st.write("Matched URL:{} Score:{}".format(url,score))
308
  testing = False
 
311
  st.write("2nd Matched URL:{} Score:{}".format(resp[1]['id'],resp[1]['score']))
312
  if len(resp)>=2:
313
  st.write("3rd Matched URL:{} Score:{}".format(resp[2]['id'],resp[2]['score']))
314
+ if goodmatch==False: ## Either no match or Weak match
315
+ ## get Tavily
316
+ ans, ext_url= getTavilyResponse(QUESTION)
317
+ if len(ext_url)==0 :
318
+ ans="Unable to match your query. To get best matches mention clearly what information you seek. "
319
+ else:
320
+ top_url=ext_url
321
+
 
 
 
 
 
 
 
 
 
 
 
322
 
323
+ ## Send RESPONSE
324
+ with st.chat_message("assistant"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  response = st.write_stream(response_generator(ans))
326
+ if goodmatch:
327
+ st.write('Resources:')
328
+ for k in range(2):
329
+ disp_title=top_2[k]['title']
330
+ disp_url= top_2[k]['url']
331
+ if 'youtube' in disp_url:
332
+ disp_title='Youtube: '+disp_title
333
+
334
+ if k>0:
335
+ if top_2[k]['url']==top_2[k-1]['url']:
336
+ break
337
+ st.write("["+disp_title+"]("+disp_url+")")
338
+
339
+ else: # not a good match
340
+ url='No match'
341
+ if len(ext_url)>5:
342
+ st.write('External Site:',ext_url)
343
+ url=ext_url
344
+
345
+ #st.write(top_2[0]['url'])
346
  # Add assistant response to chat history
347
  st.session_state.messages.append({"role": "assistant", "content": response})
348
 
349
+
350
+ ### log the result
351
+ write_log(QUESTION,top_url, top_score, ans)
352
+