chat

Sleeping

App Files Files Community

chat / app.py

sujitb

Logging added

c690f92 verified 6 months ago

raw

history blame

No virus

4.16 kB

	import streamlit as st

	#from transformers import pipeline
	from pinecone import Pinecone, ServerlessSpec
	from sentence_transformers import SentenceTransformer, util
	from openai import OpenAI
	from datetime import datetime
	import pandas as pd
	import os
	api_key='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'

	os.environ["OPENAI_API_KEY"] = api_key

	os.environ.get("OPENAI_API_KEY")

	bi_encoder = SentenceTransformer('msmarco-distilbert-base-v4')
	bi_encoder.max_seq_length = 256 # Truncate long documents to 256 tokens

	# Store the index as a variable
	INDEX_NAME = 'cl-search-idx'

	pc_api_key= '3f916d01-2a69-457d-85eb-966c5d1849a8' #AWS
	pc = Pinecone(api_key=pc_api_key)
	index = pc.Index(name=INDEX_NAME)

	try:
	df_log=pd.read_csv('query.csv', index_col=0)
	except:
	df_log=pd.DataFrame(columns=['query','url', 'result', 'ts'])

	def query_from_pinecone(index,namespace, question_embedding, top_k=3):
	# get embedding from THE SAME embedder as the documents

	return index.query(
	vector=question_embedding,
	top_k=top_k,
	namespace=namespace,
	include_metadata=True # gets the metadata (dates, text, etc)
	).get('matches')

	QUESTION=st.text_area('Ask a question -e.g How to prepare for Verbal section for CAT?') ##' How to prepare for Verbal section ?'

	if QUESTION:
	question_embedding = bi_encoder.encode(QUESTION, convert_to_tensor=True)

	ns='full'
	resp= query_from_pinecone(index,ns, question_embedding.tolist(), 3)
	if len(resp)>0:
	out= resp[0]['metadata']['data']
	url= "Matching url "+resp[0]['id']
	#+ '\n***********\n'+ resp[1]['metadata']['text'] + '\n***********\n'+ resp[2]['metadata']['text']


	system_instructions_text='''
	Your task is to extract the answer to a question from a body of text provided to you.
	The body of text will be enclosed within the delimiter tags <text> and </text>

	For example,
	<text> General Preparation Tips for VARC Section:

	You need to develop an incessant habit of speed reading.
	Start with reading newspapers, editorials, fiction and nonfiction novels and simple passages.
	The more you read, the faster you read. Learn the basic grammar concepts like parts of speech, articles,verbs, adjectives, tenses, auxiliary verbs, modifiers, modals etc.
	Revise at least 50 new words every day
	</text>

	Question: What are some tips for preparing for VARC?
	Here are some tips for preparing for the VARC section:
	1. develop an incessant habit of speed reading
	2. Start reading newspapers, editorials, fiction and nonfiction novels
	3. Learn basic grammar concepts\n
	4. Revise at least 50 new words a day

	Question: How many new words are to be learnt in a day?
	It is advised that 50 new words are learn every day

	Your response should be based on the information contained in the provided text and should not included any other sources.
	If you are unable to answer the question from the text provided, please respond " Sorry. I do not have enough information to answer this"
	Do repeat the question. Do not make a pointed reference to the text provided. Directly answer the question
	'''

	client = OpenAI()
	content="""
	<text>
	{}
	</text>
	""".format(out)

	response = client.chat.completions.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content":system_instructions_text },
	{"role": "user", "content": content},
	{"role": "user", "content": "Question:"+QUESTION}
	]
	)

	ans= response.choices[0].message.content

	st.write(url)
	st.write(ans)
	now= str(datetime.utcnow())
	df_log.loc[len(df_log)]=[QUESTION,resp[0]['id'],ans,now]
	df_log.to_csv('query.csv')

	else:
	st.write("No matches for query")
	now= str(datetime.utcnow())
	df_log.loc[len(df_log)]=[QUESTION,'No match','-',now]
	df_log.to_csv('query.csv')