import os os.environ['TOKENIZERS_PARALLELISM'] = 'true' from dotenv import load_dotenv load_dotenv() # load .env api keys mistral_api_key = os.getenv("MISTRAL_API_KEY") print("mistral_api_key", mistral_api_key) from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_mistralai import MistralAIEmbeddings from langchain import hub from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough from typing import Literal from langchain_core.prompts import ChatPromptTemplate from langchain_mistralai import ChatMistralAI from pathlib import Path from langchain.retrievers import ( MergerRetriever, ) from huggingface_hub import login login(token=os.getenv("HUGGING_FACE_TOKEN")) def load_chunk_persist_pdf() -> Chroma: pdf_folder_path = os.path.join(os.getcwd(),Path(f"data/pdf/{task}")) documents = [] for file in os.listdir(pdf_folder_path): if file.endswith('.pdf'): pdf_path = os.path.join(pdf_folder_path, file) loader = PyPDFLoader(pdf_path) documents.extend(loader.load()) text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=10) chunked_documents = text_splitter.split_documents(documents) os.makedirs("data/chroma_store/", exist_ok=True) vectorstore = Chroma.from_documents( documents=chunked_documents, embedding=MistralAIEmbeddings(api_key=mistral_api_key), persist_directory= os.path.join(os.getcwd(),Path("data/chroma_store/")) ) vectorstore.persist() return vectorstore zero2hero_vectorstore = load_chunk_persist_pdf("zero2hero") bodyweight_vectorstore = load_chunk_persist_pdf("bodyweight") nutrition_vectorstore = load_chunk_persist_pdf("nutrition") workout_vectorstore = load_chunk_persist_pdf("workout") zero2hero_retriever = zero2hero_vectorstore.as_retriever() nutrition_retriever = nutrition_vectorstore.as_retriever() bodyweight_retriever = bodyweight_vectorstore.as_retriever() workout_retriever = workout_vectorstore.as_retriever() llm = ChatMistralAI(model="mistral-large-latest", mistral_api_key=mistral_api_key, temperature=0) prompt = ChatPromptTemplate.from_template( """ You are a professional AI coach specialized in fitness, bodybuilding and nutrition. You must adapt to the user : if he is a beginner, use simple words. You are gentle and motivative. Use the following pieces of retrieved context to answer the question. If you don't know the answer, use your common knowledge. Use three sentences maximum and keep the answer concise. Question: {question} Context: {context} Answer: """, ) def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) retriever = MergerRetriever(retrievers=[zero2hero_retriever, bodyweight_retriever, nutrition_retriever, workout_retriever]) rag_chain = ( {"context": retriever | format_docs, "question": RunnablePassthrough()} | prompt | llm | StrOutputParser() ) print(rag_chain.invoke("What supplement could i buy to improve my sleep?")) # print(rag_chain.invoke("I am a 45 years old woman and I have to loose weight for the summer. Provide me with a fitness program, and a nutrition program"))