Spaces:
Running
Running
from langchain_community.document_loaders import PyPDFLoader | |
import os | |
from langchain_openai import ChatOpenAI | |
from langchain_chroma import Chroma | |
from langchain_openai import OpenAIEmbeddings | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_huggingface import HuggingFaceEndpoint | |
from setup.environment import default_model | |
os.environ.get("OPENAI_API_KEY") | |
os.environ.get("HUGGINGFACEHUB_API_TOKEN") | |
def getPDF(file_path="./nike.pdf"): | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
loader = PyPDFLoader(file_path, extract_images=False) | |
pages = loader.load_and_split(text_splitter) | |
return pages | |
def create_retriever(documents): | |
vectorstore = Chroma.from_documents( | |
documents, | |
embedding=OpenAIEmbeddings(), | |
) | |
retriever = vectorstore.as_retriever( | |
search_type="similarity", | |
search_kwargs={"k": 1}, | |
) | |
return retriever | |
def create_prompt_llm_chain(system_prompt, modelParam): | |
if modelParam == default_model: | |
model = ChatOpenAI(model=modelParam) | |
else: | |
model = HuggingFaceEndpoint( | |
repo_id=modelParam, | |
task="text-generation", | |
max_new_tokens=100, | |
do_sample=False, | |
huggingfacehub_api_token=os.environ.get("HUGGINGFACEHUB_API_TOKEN") | |
) | |
# result = model.invoke("Hugging Face is") | |
# print('result: ', result) | |
system_prompt = system_prompt + "\n\n" + "{context}" | |
prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", system_prompt), | |
("human", "{input}"), | |
] | |
) | |
question_answer_chain = create_stuff_documents_chain(model, prompt) | |
return question_answer_chain |