from langchain_community.document_loaders import PyPDFLoader import os from langchain_openai import ChatOpenAI from langchain_chroma import Chroma from langchain_openai import OpenAIEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate os.environ.get("OPENAI_API_KEY") def getPDF(file_path="./nike.pdf"): text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) loader = PyPDFLoader(file_path, extract_images=False) pages = loader.load_and_split(text_splitter) return pages def create_retriever(documents): vectorstore = Chroma.from_documents( documents, embedding=OpenAIEmbeddings(), ) retriever = vectorstore.as_retriever( search_type="similarity", search_kwargs={"k": 1}, ) return retriever def create_prompt_llm_chain(system_prompt): model = ChatOpenAI(model="gpt-4o-mini") system_prompt = system_prompt + "\n\n" + "{context}" prompt = ChatPromptTemplate.from_messages( [ ("system", system_prompt), ("human", "{input}"), ] ) question_answer_chain = create_stuff_documents_chain(model, prompt) return question_answer_chain