from langchain_community.document_loaders import UnstructuredPDFLoader, TextLoader # type: ignore from langchain_community.vectorstores import FAISS from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader from langchain_cohere import ChatCohere from langchain_core.messages import HumanMessage import dotenv from langchain_core.output_parsers import StrOutputParser # from langchain_community.vectorstores import Chroma from langchain.schema.runnable import RunnablePassthrough from langchain_cohere import CohereEmbeddings from langchain_core.prompts import PromptTemplate, ChatPromptTemplate from langchain.memory.summary_buffer import ConversationSummaryBufferMemory from langchain.chains import ConversationChain from langchain_core.prompts.chat import MessagesPlaceholder from langchain.agents import AgentExecutor, create_tool_calling_agent import os #from langchain_community.utilities import GoogleSearchAPIWrapper from langchain_core.tools import Tool from langchain_google_community import GoogleSearchAPIWrapper dotenv.load_dotenv() #file_path = ( "/home/hubsnippet/Downloads/papers/2205.11916v4.pdf") #load the file to memory #loader = PyPDFLoader(file_path) #load the file content to data variable #data = loader.load_and_split() # embed the file data in a vector store #print(data[0]) def parse_document(docs : str, question : str): # initialise an embedding for the vector store embeddings = CohereEmbeddings(model="embed-english-light-v3.0") # initialise the llm llm = ChatCohere(model='command-r-plus') # split the file into chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size = 1000, chunk_overlap = 100 ) docs = text_splitter.split_text(docs) # initialize vectorstore faiss_vs = FAISS.from_texts(docs, embeddings) # res = faiss_vs.similarity_search(input, k=2) llm_retriever = faiss_vs.as_retriever(llm = llm, search_kwargs={'k':1}) res = llm_retriever.invoke(question)[0].page_content return res #os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY") #os.environ["GOOGLE_CSE_ID"] = os.getenv("GOOGLE_CSE_ID") #COHERE_API_KEY = os.getenv("COHERE_API_KEY") GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") GOOGLE_CSE_ID = os.getenv("GOOGLE_CSE_ID") COHERE_API_KEY = os.getenv("COHERE_API_KEY") # integrating an agent to perform the search with the URL llm = ChatCohere(model="command-r-plus", cohere_api_key=COHERE_API_KEY) # history = MessagesPlaceholder(variable_name="history") #question = "" #url = "" prompt_template = [ ("system", "You are a searh engine for a corpse of documentation. you will be provided with a url {url} \ the url is your only source of information, so you should search the url pages by key words \ you should only ground your responses with the url. \ If {question} has no related content from the url, simple response 'no related content to your question'"), ("human", "{question}"), ("placeholder", "{agent_scratchpad}"), ("ai", "")] # prompt_template = prompt_template.format(url=url, question=question) prompt = ChatPromptTemplate.from_messages([ #SystemMessage(content="You are a helpful assistant. You should use the google_search_name agent tool for information."), #HumanMessage(content="{input}"), #AIMessage(content="{output}"), ("system","You are a helpful virtual assistant." \ "You should only use the google_search_name agent tool to search for information when necessary."), #MessagesPlaceholder(variable_name="history"), ("human","{question}"), ("placeholder", "{agent_scratchpad}") ]) # prompt template prompt_text = ChatPromptTemplate.from_messages(prompt_template) # print(prompt_text) # prompt template input variables # prompt_text.input_variables = ["question", "url"], input_variables = ["question", "url"] search = GoogleSearchAPIWrapper(google_api_key = GOOGLE_API_KEY, google_cse_id = GOOGLE_CSE_ID) tool = Tool( name="google_search_name", description="The model should use this tool when it needs more information from the internet.", func=search.run, ) agent = create_tool_calling_agent( tools=[tool], llm=llm, #prompt = prompt_text prompt = prompt ) agent_executor = AgentExecutor( agent=agent, tools=[tool], verbose=False ) def parse_url(question : str) -> str: # initialise the llm response = agent_executor.invoke(input = {"question": question}) # add memmory to your conversation # chain your llm to prompt # chain = prompt_text | llm | StrOutputParser() # chain = conversation_llm | StrOutputParser() #response = chain.invoke(input = {"question" : question, "url":url}) return response # message = HumanMessage(content="inurl: https://learn.microsoft.com 'what are cloud security best practices'") # print(parse_url(message))