Spaces:
Running
Running
from langchain_community.document_loaders import PyPDFLoader | |
import os | |
from langchain_openai import ChatOpenAI | |
from langchain_chroma import Chroma | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_core.prompts import ChatPromptTemplate | |
os.environ.get("OPENAI_API_KEY") | |
def getPDF(file_path="./nike.pdf"): | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
loader = PyPDFLoader(file_path, extract_images=False) | |
pages = loader.load_and_split(text_splitter) | |
return pages | |
def create_retriever(documents): | |
vectorstore = Chroma.from_documents( | |
documents, | |
embedding=OpenAIEmbeddings(), | |
) | |
retriever = vectorstore.as_retriever( | |
search_type="similarity", | |
search_kwargs={"k": 1}, | |
) | |
return retriever | |
def create_prompt_llm_chain(system_prompt): | |
model = ChatOpenAI(model="gpt-4o-mini") | |
system_prompt = system_prompt + "\n\n" + "{context}" | |
prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", system_prompt), | |
("human", "{input}"), | |
] | |
) | |
question_answer_chain = create_stuff_documents_chain(model, prompt) | |
return question_answer_chain |