from langchain_community.document_loaders import PyPDFLoader import os from langchain_openai import ChatOpenAI from langchain_chroma import Chroma from langchain_openai import OpenAIEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_core.prompts import ChatPromptTemplate from langchain_huggingface import HuggingFaceEndpoint from setup.environment import default_model os.environ.get("OPENAI_API_KEY") os.environ.get("HUGGINGFACEHUB_API_TOKEN") def getPDF(file_path="./nike.pdf"): text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) loader = PyPDFLoader(file_path, extract_images=False) pages = loader.load_and_split(text_splitter) return pages def create_retriever(documents): vectorstore = Chroma.from_documents( documents, embedding=OpenAIEmbeddings(), ) retriever = vectorstore.as_retriever( search_type="similarity", search_kwargs={"k": 1}, ) return retriever def create_prompt_llm_chain(system_prompt, modelParam): if modelParam == default_model: model = ChatOpenAI(model=modelParam) else: model = HuggingFaceEndpoint( repo_id=modelParam, task="text-generation", max_new_tokens=100, do_sample=False, huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN") ) # result = model.invoke("Hugging Face is") # print('result: ', result) system_prompt = system_prompt + "\n\n" + "{context}" prompt = ChatPromptTemplate.from_messages( [ ("system", system_prompt), ("human", "{input}"), ] ) question_answer_chain = create_stuff_documents_chain(model, prompt) return question_answer_chain