Update app.py
Browse files
app.py
CHANGED
@@ -56,7 +56,42 @@ system_instructions_text='''
|
|
56 |
If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
|
57 |
Do repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
|
58 |
'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
def query_from_pinecone(index,namespace, question_embedding, top_k=3):
|
62 |
# get embedding from THE SAME embedder as the documents
|
@@ -142,19 +177,40 @@ if QUESTION:
|
|
142 |
score=resp[0]['score']
|
143 |
title=resp[0]['metadata']['title']
|
144 |
#+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
|
145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
146 |
if score>.5:
|
|
|
|
|
|
|
147 |
client = OpenAI()
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
response = client.chat.completions.create(
|
155 |
model="gpt-3.5-turbo",
|
156 |
messages=[
|
157 |
-
{"role": "system", "content":
|
158 |
{"role": "user", "content": content},
|
159 |
{"role": "user", "content": "Question:"+QUESTION}
|
160 |
]
|
|
|
56 |
If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
|
57 |
Do repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
|
58 |
'''
|
59 |
+
json_instructions='''
|
60 |
+
Your task is to extract the answer to a question from a body of text provided to you in a json array.
|
61 |
+
The json will contain two pieces of content in this format:
|
62 |
+
[
|
63 |
+
{"id":1 , "content": " first content"},
|
64 |
+
{"id":2 , "content": " second content"}
|
65 |
+
]
|
66 |
+
You need to check which content is most appropriate to answer the question and prepare
|
67 |
+
an answer based on the content
|
68 |
|
69 |
+
For example,
|
70 |
+
[
|
71 |
+
{ "id":1 , "content" : "General Preparation Tips for Verbal Section:\n
|
72 |
+
You need to develop an incessant habit of speed reading.
|
73 |
+
Start with reading newspapers, editorials, fiction and nonfiction novels and simple passages.
|
74 |
+
The more you read, the faster you read. Learn the basic grammar concepts like parts of speech, articles,verbs, adjectives, tenses, auxiliary verbs, modifiers, modals etc.
|
75 |
+
Revise at least 50 new words every day"},
|
76 |
+
{ "id":2 , "content" : "General Preparation Tips for Quantitative Section:\n
|
77 |
+
You need to develop an speed in solving math problems.
|
78 |
+
Start with reading funda books, math text books.
|
79 |
+
Learn the basic concepts like arithmetic, geometry, numbers, probability, etc.
|
80 |
+
Solve at least 50 new problems every day"}
|
81 |
+
]
|
82 |
+
|
83 |
+
Question: What are some tips for preparing for Verbal exam?
|
84 |
+
Here are some tips for preparing for the VARC section:
|
85 |
+
1. develop an incessant habit of speed reading
|
86 |
+
2. Start reading newspapers, editorials, fiction and nonfiction novels
|
87 |
+
3. Learn basic grammar concepts\n
|
88 |
+
4. Revise at least 50 new words a day
|
89 |
+
|
90 |
+
|
91 |
+
Your response should be based on the information contained in the provided content in the json and should not included any other sources.
|
92 |
+
If you are unable to answer the question from the content provided, please respond " Sorry. I do not have enough information to answer this"
|
93 |
+
Do repeat the question. Do not make a pointed reference to the content provided. Directly answer the question
|
94 |
+
'''
|
95 |
|
96 |
def query_from_pinecone(index,namespace, question_embedding, top_k=3):
|
97 |
# get embedding from THE SAME embedder as the documents
|
|
|
177 |
score=resp[0]['score']
|
178 |
title=resp[0]['metadata']['title']
|
179 |
#+ '\n*************\n'+ resp[1]['metadata']['text'] + '\n*************\n'+ resp[2]['metadata']['text']
|
180 |
+
d={}
|
181 |
+
d['id']=1
|
182 |
+
d['content']=resp[0]['metadata']['data']
|
183 |
+
|
184 |
+
if len(resp)>1:
|
185 |
+
d['id']=2
|
186 |
+
d['content']=resp[1]['metadata']['data']
|
187 |
+
# covert to array
|
188 |
+
json_array = [{'id': key, 'content': value} for key, value in d.items()]
|
189 |
+
json_data = json.dumps(json_array)
|
190 |
+
|
191 |
+
|
192 |
if score>.5:
|
193 |
+
|
194 |
+
mode = "two" # two passages
|
195 |
+
|
196 |
client = OpenAI()
|
197 |
+
|
198 |
+
if mode=="one":
|
199 |
+
instr=system_instructions_text
|
200 |
+
content="""
|
201 |
+
<text>
|
202 |
+
{}
|
203 |
+
</text>
|
204 |
+
""".format(out)
|
205 |
+
|
206 |
+
if mode=="two":
|
207 |
+
instr=json_instructions
|
208 |
+
content=json_data
|
209 |
|
210 |
response = client.chat.completions.create(
|
211 |
model="gpt-3.5-turbo",
|
212 |
messages=[
|
213 |
+
{"role": "system", "content":instr },
|
214 |
{"role": "user", "content": content},
|
215 |
{"role": "user", "content": "Question:"+QUESTION}
|
216 |
]
|