sujitb commited on
Commit
d9f949e
1 Parent(s): b492789

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -2
app.py CHANGED
@@ -94,6 +94,19 @@ json_instructions='''
94
  Do repeat the question. Do not make a pointed reference to the content provided. Directly answer the question
95
  '''
96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  def query_from_pinecone(index,namespace, question_embedding, top_k=3):
98
  # get embedding from THE SAME embedder as the documents
99
 
@@ -175,20 +188,32 @@ if QUESTION:
175
  except:
176
  url= resp[0]['id']
177
  url= resp[0]['id']
178
- score=resp[0]['score']
179
  title=resp[0]['metadata']['title']
180
  #+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
181
  d={}
182
  d['id']=1
183
  d['content']=resp[0]['metadata']['data']
 
 
 
184
 
185
  if len(resp)>1:
186
  d['id']=2
187
  d['content']=resp[1]['metadata']['data']
 
 
 
 
 
 
 
 
188
  # covert to array
189
- json_array = [{'id': key, 'content': value} for key, value in d.items()]
190
  json_data = json.dumps(json_array)
191
 
 
192
 
193
  if score>.5:
194
 
 
94
  Do repeat the question. Do not make a pointed reference to the content provided. Directly answer the question
95
  '''
96
 
97
+ def get_meta_score(url,question_embedding):
98
+ qry = index.fetch(ids=[url], namespace="meta")
99
+ try:
100
+ emb=qry['vectors'][resp['id']]['values']
101
+ vector1 = np.array(emb).reshape(1, -1) # Reshape to ensure compatibility with sklearn
102
+ vector2 = question_embedding.numpy().reshape(1, -1)
103
+
104
+ # Calculate cosine similarity
105
+ cosine_scores = util.cos_sim(question_embedding, emb)
106
+ return cosine_scores.item())
107
+ except:
108
+ return 0
109
+
110
  def query_from_pinecone(index,namespace, question_embedding, top_k=3):
111
  # get embedding from THE SAME embedder as the documents
112
 
 
188
  except:
189
  url= resp[0]['id']
190
  url= resp[0]['id']
191
+
192
  title=resp[0]['metadata']['title']
193
  #+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
194
  d={}
195
  d['id']=1
196
  d['content']=resp[0]['metadata']['data']
197
+ d['url']=url
198
+ meta_score= get_meta_score(url,question_embedding)
199
+ score=.5* resp[0]['score'] + .5*meta_score
200
 
201
  if len(resp)>1:
202
  d['id']=2
203
  d['content']=resp[1]['metadata']['data']
204
+ d['url']=url
205
+ meta_score= get_meta_score(url,question_embedding)
206
+ score=.5* resp[0]['score'] + .5*meta_score
207
+
208
+
209
+ # Get the elements with the top 2 highest values
210
+ top_2 = sorted(d.items(), key=lambda x: x[3], reverse=True)[:2]
211
+
212
  # covert to array
213
+ json_array = [{'id': key, 'content': value} for key, value in top_2.items()]
214
  json_data = json.dumps(json_array)
215
 
216
+ st.write(json_data)
217
 
218
  if score>.5:
219