Spaces:

hubsnippetai
/

chatdocs

Sleeping

App Files Files Community

chatdocs / mainapp.py

hubsnippetai

Rename app.py to mainapp.py

1b6d6b8 verified 8 months ago

raw

history blame contribute delete

4.96 kB

	from langchain_community.document_loaders import UnstructuredPDFLoader, TextLoader # type: ignore
	from langchain_community.vectorstores import FAISS
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_cohere import ChatCohere
	from langchain_core.messages import HumanMessage
	import dotenv
	from langchain_core.output_parsers import StrOutputParser
	# from langchain_community.vectorstores import Chroma
	from langchain.schema.runnable import RunnablePassthrough
	from langchain_cohere import CohereEmbeddings
	from langchain_core.prompts import PromptTemplate, ChatPromptTemplate

	from langchain.memory.summary_buffer import ConversationSummaryBufferMemory
	from langchain.chains import ConversationChain
	from langchain_core.prompts.chat import MessagesPlaceholder

	from langchain.agents import AgentExecutor, create_tool_calling_agent

	import os

	#from langchain_community.utilities import GoogleSearchAPIWrapper
	from langchain_core.tools import Tool
	from langchain_google_community import GoogleSearchAPIWrapper

	dotenv.load_dotenv()
	#file_path = ( "/home/hubsnippet/Downloads/papers/2205.11916v4.pdf")

	#load the file to memory
	#loader = PyPDFLoader(file_path)

	#load the file content to data variable
	#data = loader.load_and_split()

	# embed the file data in a vector store

	#print(data[0])

	def parse_document(docs : str, question : str):
	# initialise an embedding for the vector store
	embeddings = CohereEmbeddings(model="embed-english-light-v3.0")

	# initialise the llm
	llm = ChatCohere(model='command-r-plus')

	# split the file into chunks
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size = 1000,
	chunk_overlap = 100
	)
	docs = text_splitter.split_text(docs)

	# initialize vectorstore
	faiss_vs = FAISS.from_texts(docs, embeddings)
	# res = faiss_vs.similarity_search(input, k=2)
	llm_retriever = faiss_vs.as_retriever(llm = llm, search_kwargs={'k':1})
	res = llm_retriever.invoke(question)[0].page_content

	return res

	#os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
	#os.environ["GOOGLE_CSE_ID"] = os.getenv("GOOGLE_CSE_ID")
	#COHERE_API_KEY = os.getenv("COHERE_API_KEY")

	GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
	GOOGLE_CSE_ID = os.getenv("GOOGLE_CSE_ID")
	COHERE_API_KEY = os.getenv("COHERE_API_KEY")


	# integrating an agent to perform the search with the URL
	llm = ChatCohere(model="command-r-plus", cohere_api_key=COHERE_API_KEY)
	# history = MessagesPlaceholder(variable_name="history")

	#question = ""
	#url = ""

	prompt_template = [
	("system", "You are a searh engine for a corpse of documentation. you will be provided with a url {url} \
	the url is your only source of information, so you should search the url pages by key words \
	you should only ground your responses with the url. \
	If {question} has no related content from the url, simple response 'no related content to your question'"),
	("human", "{question}"),
	("placeholder", "{agent_scratchpad}"),
	("ai", "")]
	# prompt_template = prompt_template.format(url=url, question=question)

	prompt = ChatPromptTemplate.from_messages([
	#SystemMessage(content="You are a helpful assistant. You should use the google_search_name agent tool for information."),
	#HumanMessage(content="{input}"),
	#AIMessage(content="{output}"),
	("system","You are a helpful virtual assistant." \
	"You should only use the google_search_name agent tool to search for information when necessary."),
	#MessagesPlaceholder(variable_name="history"),
	("human","{question}"),
	("placeholder", "{agent_scratchpad}")
	])

	# prompt template
	prompt_text = ChatPromptTemplate.from_messages(prompt_template)

	# print(prompt_text)
	# prompt template input variables
	# prompt_text.input_variables = ["question", "url"], input_variables = ["question", "url"]
	search = GoogleSearchAPIWrapper(google_api_key = GOOGLE_API_KEY, google_cse_id = GOOGLE_CSE_ID)

	tool = Tool(
	name="google_search_name",
	description="The model should use this tool when it needs more information from the internet.",
	func=search.run,
	)

	agent = create_tool_calling_agent(
	tools=[tool],
	llm=llm,
	#prompt = prompt_text
	prompt = prompt
	)

	agent_executor = AgentExecutor(
	agent=agent,
	tools=[tool],
	verbose=False
	)

	def parse_url(question : str) -> str:
	# initialise the llm

	response = agent_executor.invoke(input = {"question": question})
	# add memmory to your conversation


	# chain your llm to prompt
	# chain = prompt_text \| llm \| StrOutputParser()
	# chain = conversation_llm \| StrOutputParser()

	#response = chain.invoke(input = {"question" : question, "url":url})
	return response

	# message = HumanMessage(content="inurl: https://learn.microsoft.com 'what are cloud security best practices'")

	# print(parse_url(message))