sujitb commited on
Commit
976a040
1 Parent(s): 8560ab0
Files changed (1) hide show
  1. app.py +70 -9
app.py CHANGED
@@ -7,6 +7,13 @@ from openai import OpenAI
7
  from datetime import datetime
8
  import pandas as pd
9
  import os
 
 
 
 
 
 
 
10
  api_key='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'
11
 
12
  os.environ["OPENAI_API_KEY"] = api_key
@@ -48,7 +55,7 @@ system_instructions_text='''
48
 
49
  Your response should be based on the information contained in the provided text and should not included any other sources.
50
  If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
51
- Do not repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
52
  '''
53
  logfile='querylog.csv'
54
  try:
@@ -66,19 +73,53 @@ def query_from_pinecone(index,namespace, question_embedding, top_k=3):
66
  include_metadata=True # gets the metadata (dates, text, etc)
67
  ).get('matches')
68
 
69
- QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  score=0
 
 
71
 
72
  if QUESTION:
 
 
 
 
 
 
73
  question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
74
 
75
  ns='webpages'
 
76
  resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
77
  if len(resp)>0:
78
- out= resp[0]['metadata']['text']
 
 
 
 
 
79
  url= resp[0]['id']
80
  score=resp[0]['score']
81
- title='NA' #resp[0]['metadata']['title']
82
  #+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
83
 
84
  if score>.5:
@@ -102,15 +143,35 @@ if QUESTION:
102
  else:
103
  ans='Weak match to your query. Please try reframing your question'
104
 
105
- st.write("Matched URL:{} Score:{}".format(url,score))
106
- st.write(ans)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  now= str(datetime.utcnow())
109
  df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
110
- df_log.to_csv(logfile)
 
111
 
112
  else:
113
- st.write("No matches for query")
 
 
 
 
 
114
  now= str(datetime.utcnow())
115
  df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
116
- df_log.to_csv(logfile)
 
7
  from datetime import datetime
8
  import pandas as pd
9
  import os
10
+
11
+ from huggingface_hub import HfFileSystem
12
+ fs = HfFileSystem()
13
+
14
+ import time
15
+
16
+
17
  api_key='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'
18
 
19
  os.environ["OPENAI_API_KEY"] = api_key
 
55
 
56
  Your response should be based on the information contained in the provided text and should not included any other sources.
57
  If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
58
+ Do repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
59
  '''
60
  logfile='querylog.csv'
61
  try:
 
73
  include_metadata=True # gets the metadata (dates, text, etc)
74
  ).get('matches')
75
 
76
+ def response_generator(response):
77
+
78
+ for word in response.split():
79
+ yield word + " "
80
+ time.sleep(0.05)
81
+
82
+ st.title('CLLM Answering Machine')
83
+
84
+ # Initialize chat history
85
+ if "messages" not in st.session_state:
86
+ st.session_state.messages = []
87
+ # Display chat messages from history on app rerun
88
+ for message in st.session_state.messages:
89
+ with st.chat_message(message["role"]):
90
+ st.markdown(message["content"])
91
+
92
+
93
+ QUESTION = st.chat_input('Ask a question -e.g How to prepare for Verbal section for CAT?')
94
+
95
+
96
+ #QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
97
  score=0
98
+ testing=True
99
+
100
 
101
  if QUESTION:
102
+
103
+ with st.chat_message("user"):
104
+ st.markdown(QUESTION)
105
+ # Add user message to chat history
106
+ st.session_state.messages.append({"role": "user", "content": QUESTION})
107
+
108
  question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
109
 
110
  ns='webpages'
111
+ ns='full'
112
  resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
113
  if len(resp)>0:
114
+ #st.write("Got results...extracting an answer")
115
+ out= resp[0]['metadata']['data']
116
+ try:
117
+ url= resp[0]['metadata']['url']
118
+ except:
119
+ url= resp[0]['id']
120
  url= resp[0]['id']
121
  score=resp[0]['score']
122
+ title=resp[0]['metadata']['title']
123
  #+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
124
 
125
  if score>.5:
 
143
  else:
144
  ans='Weak match to your query. Please try reframing your question'
145
 
146
+ #st.write("Matched URL:{} Score:{}".format(url,score))
147
+ testing = False
148
+ if testing:
149
+ if len(resp)>=1:
150
+ st.write("2nd Matched URL:{} Score:{}".format(resp[1]['id'],resp[1]['score']))
151
+ if len(resp)>=2:
152
+ st.write("3rd Matched URL:{} Score:{}".format(resp[2]['id'],resp[2]['score']))
153
+
154
+ with st.chat_message("assistant"):
155
+ response = st.write_stream(response_generator(ans))
156
+ # Add assistant response to chat history
157
+ st.session_state.messages.append({"role": "assistant", "content": response})
158
+ #st.write(ans)
159
+
160
+ #st.write(' ----------------------')
161
+ #st.write(out)
162
 
163
  now= str(datetime.utcnow())
164
  df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
165
+ #df.to_csv("hf://datasets/sujitb/data/test.csv")
166
+ df_log.to_csv("hf://datasets/sujitb/data/"+logfile)
167
 
168
  else:
169
+ #st.write("No matches for query")
170
+ ans= "No matches for query"
171
+ response = st.write_stream(response_generator(ans))
172
+ # Add assistant response to chat history
173
+ st.session_state.messages.append({"role": "assistant", "content": response})
174
+
175
  now= str(datetime.utcnow())
176
  df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
177
+ df_log.to_csv("hf://datasets/sujitb/data/"+logfile)