sujitb commited on
Commit
074b93b
1 Parent(s): 99a1d6d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -6
app.py CHANGED
@@ -1,12 +1,34 @@
1
  import streamlit as st
2
- import transformers
3
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- pipe= pipeline('sentiment-analysis')
 
 
 
 
 
6
 
7
- text = st.text_area('Enter some text')
8
 
9
- if text:
10
- out= pipe(text)
11
- st.json(out)
 
 
12
 
 
 
1
  import streamlit as st
2
+
3
  from transformers import pipeline
4
+ from pinecone import Pinecone, ServerlessSpec
5
+ from sentence_transformers import SentenceTransformer, util
6
+
7
+
8
+ bi_encoder = SentenceTransformer('msmarco-distilbert-base-v4')
9
+ bi_encoder.max_seq_length = 256 # Truncate long documents to 256 tokens
10
+
11
+ # Store the index as a variable
12
+ INDEX_NAME = 'cl-search-idx'
13
+ NAMESPACE = 'webpages'
14
+
15
+ index = pc.Index(name=INDEX_NAME)
16
+
17
+ def query_from_pinecone(index, question_embedding, top_k=3):
18
+ # get embedding from THE SAME embedder as the documents
19
 
20
+ return index.query(
21
+ vector=question_embedding,
22
+ top_k=top_k,
23
+ namespace=NAMESPACE,
24
+ include_metadata=True # gets the metadata (dates, text, etc)
25
+ ).get('matches')
26
 
 
27
 
28
+ QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'
29
+ question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)
30
+ resp= query_from_pinecone(question_embedding.tolist(), 3)
31
+ docresult= resp[0]['metadata']['text']
32
+ #+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
33
 
34
+ st.json(out)