chat

Sleeping

App Files Files Community

sujitb commited on Apr 1, 2024

Commit

d9f949e

verified ·

1 Parent(s): b492789

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -2

app.py CHANGED Viewed

@@ -94,6 +94,19 @@ json_instructions='''
            Do repeat the question. Do not make a pointed reference to the content provided. Directly answer the question
           '''
 def query_from_pinecone(index,namespace, question_embedding, top_k=3):
     # get embedding from THE SAME embedder as the documents
@@ -175,20 +188,32 @@ if QUESTION:
         except:
             url= resp[0]['id']
         url= resp[0]['id']
-        score=resp[0]['score']
         title=resp[0]['metadata']['title']
         #+ '\n*************\n'+  resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
         d={}
         d['id']=1
         d['content']=resp[0]['metadata']['data']
         if len(resp)>1:
             d['id']=2
             d['content']=resp[1]['metadata']['data']
         # covert to array
-        json_array = [{'id': key, 'content': value} for key, value in d.items()]
         json_data = json.dumps(json_array)
         if score>.5:

            Do repeat the question. Do not make a pointed reference to the content provided. Directly answer the question
           '''
+def get_meta_score(url,question_embedding):
+    qry = index.fetch(ids=[url], namespace="meta")
+    try:
+        emb=qry['vectors'][resp['id']]['values']
+        vector1 = np.array(emb).reshape(1, -1)  # Reshape to ensure compatibility with sklearn
+        vector2 = question_embedding.numpy().reshape(1, -1)
+        # Calculate cosine similarity
+        cosine_scores = util.cos_sim(question_embedding, emb)
+        return  cosine_scores.item())
+    except:
+        return 0
 def query_from_pinecone(index,namespace, question_embedding, top_k=3):
     # get embedding from THE SAME embedder as the documents
         except:
             url= resp[0]['id']
         url= resp[0]['id']
         title=resp[0]['metadata']['title']
         #+ '\n*************\n'+  resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
         d={}
         d['id']=1
         d['content']=resp[0]['metadata']['data']
+        d['url']=url
+        meta_score= get_meta_score(url,question_embedding)
+        score=.5* resp[0]['score'] + .5*meta_score
         if len(resp)>1:
             d['id']=2
             d['content']=resp[1]['metadata']['data']
+            d['url']=url
+            meta_score= get_meta_score(url,question_embedding)
+            score=.5* resp[0]['score'] + .5*meta_score
+        # Get the elements with the top 2 highest values
+        top_2 = sorted(d.items(), key=lambda x: x[3], reverse=True)[:2]
         # covert to array
+        json_array = [{'id': key, 'content': value} for key, value in top_2.items()]
         json_data = json.dumps(json_array)
+        st.write(json_data)
         if score>.5: