Update app.py
Browse files
app.py
CHANGED
@@ -94,6 +94,19 @@ json_instructions='''
|
|
94 |
Do repeat the question. Do not make a pointed reference to the content provided. Directly answer the question
|
95 |
'''
|
96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
def query_from_pinecone(index,namespace, question_embedding, top_k=3):
|
98 |
# get embedding from THE SAME embedder as the documents
|
99 |
|
@@ -175,20 +188,32 @@ if QUESTION:
|
|
175 |
except:
|
176 |
url= resp[0]['id']
|
177 |
url= resp[0]['id']
|
178 |
-
|
179 |
title=resp[0]['metadata']['title']
|
180 |
#+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
|
181 |
d={}
|
182 |
d['id']=1
|
183 |
d['content']=resp[0]['metadata']['data']
|
|
|
|
|
|
|
184 |
|
185 |
if len(resp)>1:
|
186 |
d['id']=2
|
187 |
d['content']=resp[1]['metadata']['data']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
# covert to array
|
189 |
-
json_array = [{'id': key, 'content': value} for key, value in
|
190 |
json_data = json.dumps(json_array)
|
191 |
|
|
|
192 |
|
193 |
if score>.5:
|
194 |
|
|
|
94 |
Do repeat the question. Do not make a pointed reference to the content provided. Directly answer the question
|
95 |
'''
|
96 |
|
97 |
+
def get_meta_score(url,question_embedding):
|
98 |
+
qry = index.fetch(ids=[url], namespace="meta")
|
99 |
+
try:
|
100 |
+
emb=qry['vectors'][resp['id']]['values']
|
101 |
+
vector1 = np.array(emb).reshape(1, -1) # Reshape to ensure compatibility with sklearn
|
102 |
+
vector2 = question_embedding.numpy().reshape(1, -1)
|
103 |
+
|
104 |
+
# Calculate cosine similarity
|
105 |
+
cosine_scores = util.cos_sim(question_embedding, emb)
|
106 |
+
return cosine_scores.item())
|
107 |
+
except:
|
108 |
+
return 0
|
109 |
+
|
110 |
def query_from_pinecone(index,namespace, question_embedding, top_k=3):
|
111 |
# get embedding from THE SAME embedder as the documents
|
112 |
|
|
|
188 |
except:
|
189 |
url= resp[0]['id']
|
190 |
url= resp[0]['id']
|
191 |
+
|
192 |
title=resp[0]['metadata']['title']
|
193 |
#+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
|
194 |
d={}
|
195 |
d['id']=1
|
196 |
d['content']=resp[0]['metadata']['data']
|
197 |
+
d['url']=url
|
198 |
+
meta_score= get_meta_score(url,question_embedding)
|
199 |
+
score=.5* resp[0]['score'] + .5*meta_score
|
200 |
|
201 |
if len(resp)>1:
|
202 |
d['id']=2
|
203 |
d['content']=resp[1]['metadata']['data']
|
204 |
+
d['url']=url
|
205 |
+
meta_score= get_meta_score(url,question_embedding)
|
206 |
+
score=.5* resp[0]['score'] + .5*meta_score
|
207 |
+
|
208 |
+
|
209 |
+
# Get the elements with the top 2 highest values
|
210 |
+
top_2 = sorted(d.items(), key=lambda x: x[3], reverse=True)[:2]
|
211 |
+
|
212 |
# covert to array
|
213 |
+
json_array = [{'id': key, 'content': value} for key, value in top_2.items()]
|
214 |
json_data = json.dumps(json_array)
|
215 |
|
216 |
+
st.write(json_data)
|
217 |
|
218 |
if score>.5:
|
219 |
|