Chat
Browse files
app.py
CHANGED
@@ -7,6 +7,13 @@ from openai import OpenAI
|
|
7 |
from datetime import datetime
|
8 |
import pandas as pd
|
9 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
api_key='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'
|
11 |
|
12 |
os.environ["OPENAI_API_KEY"] = api_key
|
@@ -48,7 +55,7 @@ system_instructions_text='''
|
|
48 |
|
49 |
Your response should be based on the information contained in the provided text and should not included any other sources.
|
50 |
If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
|
51 |
-
Do
|
52 |
'''
|
53 |
logfile='querylog.csv'
|
54 |
try:
|
@@ -66,19 +73,53 @@ def query_from_pinecone(index,namespace, question_embedding, top_k=3):
|
|
66 |
include_metadata=True # gets the metadata (dates, text, etc)
|
67 |
).get('matches')
|
68 |
|
69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
score=0
|
|
|
|
|
71 |
|
72 |
if QUESTION:
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
|
74 |
|
75 |
ns='webpages'
|
|
|
76 |
resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
|
77 |
if len(resp)>0:
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
79 |
url= resp[0]['id']
|
80 |
score=resp[0]['score']
|
81 |
-
title=
|
82 |
#+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
|
83 |
|
84 |
if score>.5:
|
@@ -102,15 +143,35 @@ if QUESTION:
|
|
102 |
else:
|
103 |
ans='Weak match to your query. Please try reframing your question'
|
104 |
|
105 |
-
st.write("Matched URL:{} Score:{}".format(url,score))
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
now= str(datetime.utcnow())
|
109 |
df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
|
110 |
-
|
|
|
111 |
|
112 |
else:
|
113 |
-
st.write("No matches for query")
|
|
|
|
|
|
|
|
|
|
|
114 |
now= str(datetime.utcnow())
|
115 |
df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
|
116 |
-
df_log.to_csv(logfile)
|
|
|
7 |
from datetime import datetime
|
8 |
import pandas as pd
|
9 |
import os
|
10 |
+
|
11 |
+
from huggingface_hub import HfFileSystem
|
12 |
+
fs = HfFileSystem()
|
13 |
+
|
14 |
+
import time
|
15 |
+
|
16 |
+
|
17 |
api_key='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'
|
18 |
|
19 |
os.environ["OPENAI_API_KEY"] = api_key
|
|
|
55 |
|
56 |
Your response should be based on the information contained in the provided text and should not included any other sources.
|
57 |
If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
|
58 |
+
Do repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
|
59 |
'''
|
60 |
logfile='querylog.csv'
|
61 |
try:
|
|
|
73 |
include_metadata=True # gets the metadata (dates, text, etc)
|
74 |
).get('matches')
|
75 |
|
76 |
+
def response_generator(response):
|
77 |
+
|
78 |
+
for word in response.split():
|
79 |
+
yield word + " "
|
80 |
+
time.sleep(0.05)
|
81 |
+
|
82 |
+
st.title('CLLM Answering Machine')
|
83 |
+
|
84 |
+
# Initialize chat history
|
85 |
+
if "messages" not in st.session_state:
|
86 |
+
st.session_state.messages = []
|
87 |
+
# Display chat messages from history on app rerun
|
88 |
+
for message in st.session_state.messages:
|
89 |
+
with st.chat_message(message["role"]):
|
90 |
+
st.markdown(message["content"])
|
91 |
+
|
92 |
+
|
93 |
+
QUESTION = st.chat_input('Ask a question -e.g How to prepare for Verbal section for CAT?')
|
94 |
+
|
95 |
+
|
96 |
+
#QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
|
97 |
score=0
|
98 |
+
testing=True
|
99 |
+
|
100 |
|
101 |
if QUESTION:
|
102 |
+
|
103 |
+
with st.chat_message("user"):
|
104 |
+
st.markdown(QUESTION)
|
105 |
+
# Add user message to chat history
|
106 |
+
st.session_state.messages.append({"role": "user", "content": QUESTION})
|
107 |
+
|
108 |
question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
|
109 |
|
110 |
ns='webpages'
|
111 |
+
ns='full'
|
112 |
resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
|
113 |
if len(resp)>0:
|
114 |
+
#st.write("Got results...extracting an answer")
|
115 |
+
out= resp[0]['metadata']['data']
|
116 |
+
try:
|
117 |
+
url= resp[0]['metadata']['url']
|
118 |
+
except:
|
119 |
+
url= resp[0]['id']
|
120 |
url= resp[0]['id']
|
121 |
score=resp[0]['score']
|
122 |
+
title=resp[0]['metadata']['title']
|
123 |
#+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
|
124 |
|
125 |
if score>.5:
|
|
|
143 |
else:
|
144 |
ans='Weak match to your query. Please try reframing your question'
|
145 |
|
146 |
+
#st.write("Matched URL:{} Score:{}".format(url,score))
|
147 |
+
testing = False
|
148 |
+
if testing:
|
149 |
+
if len(resp)>=1:
|
150 |
+
st.write("2nd Matched URL:{} Score:{}".format(resp[1]['id'],resp[1]['score']))
|
151 |
+
if len(resp)>=2:
|
152 |
+
st.write("3rd Matched URL:{} Score:{}".format(resp[2]['id'],resp[2]['score']))
|
153 |
+
|
154 |
+
with st.chat_message("assistant"):
|
155 |
+
response = st.write_stream(response_generator(ans))
|
156 |
+
# Add assistant response to chat history
|
157 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
158 |
+
#st.write(ans)
|
159 |
+
|
160 |
+
#st.write(' ----------------------')
|
161 |
+
#st.write(out)
|
162 |
|
163 |
now= str(datetime.utcnow())
|
164 |
df_log.loc[len(df_log)]=[QUESTION,url,score,ans,now]
|
165 |
+
#df.to_csv("hf://datasets/sujitb/data/test.csv")
|
166 |
+
df_log.to_csv("hf://datasets/sujitb/data/"+logfile)
|
167 |
|
168 |
else:
|
169 |
+
#st.write("No matches for query")
|
170 |
+
ans= "No matches for query"
|
171 |
+
response = st.write_stream(response_generator(ans))
|
172 |
+
# Add assistant response to chat history
|
173 |
+
st.session_state.messages.append({"role": "assistant", "content": response})
|
174 |
+
|
175 |
now= str(datetime.utcnow())
|
176 |
df_log.loc[len(df_log)]=[QUESTION,'No match',0,'-',now]
|
177 |
+
df_log.to_csv("hf://datasets/sujitb/data/"+logfile)
|