Update app.py
Browse files
app.py
CHANGED
@@ -97,7 +97,7 @@ json_instructions='''
|
|
97 |
def get_meta_score(url,question_embedding):
|
98 |
qry = index.fetch(ids=[url], namespace="meta")
|
99 |
try:
|
100 |
-
emb=qry['vectors'][
|
101 |
vector1 = np.array(emb).reshape(1, -1) # Reshape to ensure compatibility with sklearn
|
102 |
vector2 = question_embedding.numpy().reshape(1, -1)
|
103 |
|
@@ -179,77 +179,64 @@ if QUESTION:
|
|
179 |
|
180 |
ns='webpages'
|
181 |
ns='full'
|
182 |
-
resp= query_from_pinecone(index,ns, question_embedding.tolist(),
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
url= resp[0]['metadata']['url']
|
188 |
-
except:
|
189 |
-
url= resp[0]['id']
|
190 |
-
url= resp[0]['id']
|
191 |
-
|
192 |
-
title=resp[0]['metadata']['title']
|
193 |
-
#+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
|
194 |
-
matches=[]
|
195 |
-
|
196 |
d={}
|
197 |
-
d['id']=
|
198 |
-
d['content']=
|
199 |
-
d['url']=
|
200 |
-
meta_score= get_meta_score(
|
201 |
-
score=.5*
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
matches.append(d)
|
211 |
-
|
212 |
-
sorted_indices = sorted(range(len(objects)), key=lambda i: objects[i]['score'], reverse=True)
|
213 |
-
|
214 |
-
# Get the elements with the top 2 highest values
|
215 |
-
top_2 = [objects[i] for i in sorted_indices[:2]]
|
216 |
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
|
|
|
|
253 |
|
254 |
#st.write("Matched URL:{} Score:{}".format(url,score))
|
255 |
testing = False
|
@@ -261,6 +248,9 @@ if QUESTION:
|
|
261 |
|
262 |
with st.chat_message("assistant"):
|
263 |
response = st.write_stream(response_generator(ans))
|
|
|
|
|
|
|
264 |
# Add assistant response to chat history
|
265 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
266 |
#st.write(ans)
|
|
|
97 |
def get_meta_score(url,question_embedding):
|
98 |
qry = index.fetch(ids=[url], namespace="meta")
|
99 |
try:
|
100 |
+
emb=qry['vectors'][url]['values']
|
101 |
vector1 = np.array(emb).reshape(1, -1) # Reshape to ensure compatibility with sklearn
|
102 |
vector2 = question_embedding.numpy().reshape(1, -1)
|
103 |
|
|
|
179 |
|
180 |
ns='webpages'
|
181 |
ns='full'
|
182 |
+
resp= query_from_pinecone(index,ns, question_embedding.tolist(), 10)
|
183 |
+
resplist=[]
|
184 |
+
id=0
|
185 |
+
for r in resp:
|
186 |
+
id+=1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
d={}
|
188 |
+
d['id']=id
|
189 |
+
d['content']=r['metadata']['data']
|
190 |
+
d['url']=r['id']
|
191 |
+
meta_score= get_meta_score(r['id'],question_embedding)
|
192 |
+
score=.5* r['score'] + .5*meta_score
|
193 |
+
d['score']=score
|
194 |
+
print(d['url'], score, r['score'], meta_score)
|
195 |
+
resplist.append(d)
|
196 |
+
|
197 |
+
sorted_indices = sorted(range(len(resplist)), key=lambda i: resplist[i]['score'], reverse=True)
|
198 |
+
|
199 |
+
# Get the elements with the top 2 highest values
|
200 |
+
top_2 = [resplist[i] for i in sorted_indices[:2]]
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
+
# covert to array
|
203 |
+
|
204 |
+
json_data = json.dumps(top_2)
|
205 |
|
206 |
+
st.write(json_data)
|
207 |
+
goodmatch=False
|
208 |
+
if resplist[sorted_indices[0]]['score']>.5:
|
209 |
+
goodmatch=True
|
210 |
+
mode = "two" # two passages
|
211 |
+
|
212 |
+
client = OpenAI()
|
213 |
+
|
214 |
+
if mode=="one":
|
215 |
+
instr=system_instructions_text
|
216 |
+
|
217 |
+
out= resplist[sorted_indices[0]]['content']
|
218 |
+
content="""
|
219 |
+
<text>
|
220 |
+
{}
|
221 |
+
</text>
|
222 |
+
""".format(out)
|
223 |
+
|
224 |
+
if mode=="two":
|
225 |
+
instr=json_instructions
|
226 |
+
content=json_data
|
227 |
+
|
228 |
+
response = client.chat.completions.create(
|
229 |
+
model="gpt-3.5-turbo",
|
230 |
+
messages=[
|
231 |
+
{"role": "system", "content":instr },
|
232 |
+
{"role": "user", "content": content},
|
233 |
+
{"role": "user", "content": "Question:"+QUESTION}
|
234 |
+
]
|
235 |
+
)
|
236 |
+
|
237 |
+
ans= response.choices[0].message.content
|
238 |
+
else:
|
239 |
+
ans='Weak match to your query. Please try reframing your question'
|
240 |
|
241 |
#st.write("Matched URL:{} Score:{}".format(url,score))
|
242 |
testing = False
|
|
|
248 |
|
249 |
with st.chat_message("assistant"):
|
250 |
response = st.write_stream(response_generator(ans))
|
251 |
+
if goodmatch:
|
252 |
+
st.write('Resources:'+top_2[0]['url'])
|
253 |
+
st.write(top_2[1]['url'])
|
254 |
# Add assistant response to chat history
|
255 |
st.session_state.messages.append({"role": "assistant", "content": response})
|
256 |
#st.write(ans)
|