major changes - added Tavily
Browse files
app.py
CHANGED
@@ -116,24 +116,41 @@ def response_generator(response):
|
|
116 |
yield word + " "
|
117 |
time.sleep(0.05)
|
118 |
|
119 |
-
def write_log(
|
120 |
-
|
121 |
-
|
122 |
-
ans = ans.replace('"', '""')
|
123 |
-
new_row = f'\n"{query}","{url}",{score},"{ans}","{ts}"'
|
124 |
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
# Write the buffer to the file in "W" mode
|
132 |
-
with fs.open("datasets/sujitb/data/querylog.csv", "w",encoding="utf-8") as f:
|
133 |
-
f.write(buffer)
|
134 |
|
135 |
return
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
logfile='querylog.csv'
|
138 |
|
139 |
qlist=[
|
@@ -185,6 +202,9 @@ QUESTION = st.chat_input('Ask a question -e.g How to prepare for Verbal section
|
|
185 |
|
186 |
#QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
|
187 |
score=0
|
|
|
|
|
|
|
188 |
testing=True
|
189 |
ext_url=''
|
190 |
|
@@ -231,8 +251,9 @@ if QUESTION:
|
|
231 |
d['url']= r['metadata']['url']
|
232 |
d['score']=r['score']
|
233 |
resplist.append(d)
|
234 |
-
|
235 |
-
|
|
|
236 |
sorted_indices = sorted(range(len(resplist)), key=lambda i: resplist[i]['score'], reverse=True)
|
237 |
|
238 |
# Get the elements with the top 2 highest values
|
@@ -242,9 +263,11 @@ if QUESTION:
|
|
242 |
|
243 |
json_data = json.dumps(top_2)
|
244 |
|
245 |
-
|
|
|
|
|
246 |
goodmatch=False
|
247 |
-
if
|
248 |
st.write('Preparing answers...')
|
249 |
goodmatch=True
|
250 |
mode = "two" # two passages
|
@@ -275,27 +298,11 @@ if QUESTION:
|
|
275 |
)
|
276 |
|
277 |
ans= response.choices[0].message.content
|
278 |
-
else:
|
279 |
-
ans='Weak match to your query. Please try reframing your question'
|
280 |
-
|
281 |
-
##
|
282 |
-
|
283 |
-
|
284 |
-
tavily = TavilyClient(api_key=tavily_key)
|
285 |
-
|
286 |
-
success= 0
|
287 |
-
while success<3:
|
288 |
-
success+=1
|
289 |
-
try:
|
290 |
-
resp=tavily.search(query=QUESTION)
|
291 |
-
with st.chat_message("assistant"):
|
292 |
-
ans=resp['results'][0]['content']
|
293 |
-
|
294 |
-
ext_url= resp['results'][0]['url']
|
295 |
-
break
|
296 |
-
except:
|
297 |
-
pass
|
298 |
-
|
299 |
|
300 |
#st.write("Matched URL:{} Score:{}".format(url,score))
|
301 |
testing = False
|
@@ -304,54 +311,42 @@ if QUESTION:
|
|
304 |
st.write("2nd Matched URL:{} Score:{}".format(resp[1]['id'],resp[1]['score']))
|
305 |
if len(resp)>=2:
|
306 |
st.write("3rd Matched URL:{} Score:{}".format(resp[2]['id'],resp[2]['score']))
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
disp_title=top_2[k]['title']
|
316 |
-
disp_url= top_2[k]['url']
|
317 |
-
if 'youtube' in disp_url:
|
318 |
-
disp_title='Youtube: '+disp_title
|
319 |
-
|
320 |
-
if k>0:
|
321 |
-
if top_2[k]['url']==top_2[k-1]['url']:
|
322 |
-
break
|
323 |
-
st.write("["+disp_title+"]("+disp_url+")")
|
324 |
-
|
325 |
-
else: # not a good match
|
326 |
|
327 |
-
|
328 |
-
|
329 |
-
#st.write(top_2[0]['url'])
|
330 |
-
# Add assistant response to chat history
|
331 |
-
st.session_state.messages.append({"role": "assistant", "content": response})
|
332 |
-
#st.write(ans)
|
333 |
-
|
334 |
-
#st.write(' ----------------------')
|
335 |
-
#st.write(out)
|
336 |
-
|
337 |
-
now= str(datetime.utcnow())
|
338 |
-
url = top_2[0]['url'] + ' ; '+top_2[1]['url']
|
339 |
-
|
340 |
-
df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
|
341 |
-
#write_log(QUESTION,url, score, ans, now)
|
342 |
-
#df.to_csv("hf://datasets/sujitb/data/test.csv")
|
343 |
-
storage_options={"token":token}
|
344 |
-
df_log.to_csv("hf://datasets/sujitb/data/"+logfile,storage_options= storage_options)
|
345 |
-
|
346 |
-
else: ## Zero response from pinecone query
|
347 |
-
#st.write("No matches for query")
|
348 |
-
ans= "No matches for query"
|
349 |
response = st.write_stream(response_generator(ans))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
350 |
# Add assistant response to chat history
|
351 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
352 |
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
#write_log(QUESTION,'No match', 0, '-', now)
|
|
|
116 |
yield word + " "
|
117 |
time.sleep(0.05)
|
118 |
|
119 |
+
def write_log(QUESTION,url, score, ans):
|
120 |
+
token = os.getenv('FILE_TOKEN')
|
121 |
+
now= str(datetime.utcnow())
|
|
|
|
|
122 |
|
123 |
+
df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
|
124 |
+
|
125 |
+
#df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
|
126 |
+
storage_options={"token":token}
|
127 |
+
df_log.to_csv("hf://datasets/sujitb/data/"+logfile,storage_options= storage_options)
|
|
|
|
|
|
|
|
|
128 |
|
129 |
return
|
130 |
|
131 |
+
def getTavilyResponse(QUESTION):
|
132 |
+
## Call Tavily
|
133 |
+
tavily_key= os.environ["TAVILY_KEY"]
|
134 |
+
|
135 |
+
tavily = TavilyClient(api_key=tavily_key)
|
136 |
+
ans=''
|
137 |
+
ext_url=''
|
138 |
+
success= 0
|
139 |
+
while success<3:
|
140 |
+
success+=1
|
141 |
+
try:
|
142 |
+
resp=tavily.search(query=QUESTION)
|
143 |
+
ans=resp['results'][0]['content']
|
144 |
+
|
145 |
+
ext_url= resp['results'][0]['url']
|
146 |
+
break
|
147 |
+
except:
|
148 |
+
time.sleep(1)
|
149 |
+
|
150 |
+
return ans, ext_url
|
151 |
+
|
152 |
+
|
153 |
+
|
154 |
logfile='querylog.csv'
|
155 |
|
156 |
qlist=[
|
|
|
202 |
|
203 |
#QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
|
204 |
score=0
|
205 |
+
top_score=0
|
206 |
+
top_url=''
|
207 |
+
|
208 |
testing=True
|
209 |
ext_url=''
|
210 |
|
|
|
251 |
d['url']= r['metadata']['url']
|
252 |
d['score']=r['score']
|
253 |
resplist.append(d)
|
254 |
+
if len(resplist)==0:
|
255 |
+
top_score=-1
|
256 |
+
elif len(resplist)>0:
|
257 |
sorted_indices = sorted(range(len(resplist)), key=lambda i: resplist[i]['score'], reverse=True)
|
258 |
|
259 |
# Get the elements with the top 2 highest values
|
|
|
263 |
|
264 |
json_data = json.dumps(top_2)
|
265 |
|
266 |
+
top_score=resplist[sorted_indices[0]]['score']
|
267 |
+
top_url=resplist[sorted_indices[0]]['url']
|
268 |
+
|
269 |
goodmatch=False
|
270 |
+
if top_score>=THRESHOLD:
|
271 |
st.write('Preparing answers...')
|
272 |
goodmatch=True
|
273 |
mode = "two" # two passages
|
|
|
298 |
)
|
299 |
|
300 |
ans= response.choices[0].message.content
|
301 |
+
else: ### BELOW THRESHOLD
|
302 |
+
#ans='Weak match to your query. Please try reframing your question'
|
303 |
+
|
304 |
+
## get Tavily
|
305 |
+
ans, ext_url= getTavilyResponse(QUESTION)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
|
307 |
#st.write("Matched URL:{} Score:{}".format(url,score))
|
308 |
testing = False
|
|
|
311 |
st.write("2nd Matched URL:{} Score:{}".format(resp[1]['id'],resp[1]['score']))
|
312 |
if len(resp)>=2:
|
313 |
st.write("3rd Matched URL:{} Score:{}".format(resp[2]['id'],resp[2]['score']))
|
314 |
+
if goodmatch==False: ## Either no match or Weak match
|
315 |
+
## get Tavily
|
316 |
+
ans, ext_url= getTavilyResponse(QUESTION)
|
317 |
+
if len(ext_url)==0 :
|
318 |
+
ans="Unable to match your query. To get best matches mention clearly what information you seek. "
|
319 |
+
else:
|
320 |
+
top_url=ext_url
|
321 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
|
323 |
+
## Send RESPONSE
|
324 |
+
with st.chat_message("assistant"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
325 |
response = st.write_stream(response_generator(ans))
|
326 |
+
if goodmatch:
|
327 |
+
st.write('Resources:')
|
328 |
+
for k in range(2):
|
329 |
+
disp_title=top_2[k]['title']
|
330 |
+
disp_url= top_2[k]['url']
|
331 |
+
if 'youtube' in disp_url:
|
332 |
+
disp_title='Youtube: '+disp_title
|
333 |
+
|
334 |
+
if k>0:
|
335 |
+
if top_2[k]['url']==top_2[k-1]['url']:
|
336 |
+
break
|
337 |
+
st.write("["+disp_title+"]("+disp_url+")")
|
338 |
+
|
339 |
+
else: # not a good match
|
340 |
+
url='No match'
|
341 |
+
if len(ext_url)>5:
|
342 |
+
st.write('External Site:',ext_url)
|
343 |
+
url=ext_url
|
344 |
+
|
345 |
+
#st.write(top_2[0]['url'])
|
346 |
# Add assistant response to chat history
|
347 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
348 |
|
349 |
+
|
350 |
+
### log the result
|
351 |
+
write_log(QUESTION,top_url, top_score, ans)
|
352 |
+
|
|