import os import streamlit as st from huggingface_hub import HfApi from langchain_community.document_loaders import UnstructuredPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_huggingface import HuggingFaceEmbeddings from langchain_chroma import Chroma from langchain_groq import ChatGroq from langchain.chains import RetrievalQA # Set the working directory working_dir = os.path.dirname(os.path.abspath((__file__))) secret = os.getenv('GROQ_API_KEY') # Loading the embedding model embedding = HuggingFaceEmbeddings() # Load the llm from Groq llm = ChatGroq( model="deepseek-r1-distill-llama-70b", temperature=0 ) def process_document_to_chroma_db(file_name): """Process the document and load it into Chroma DB.""" # Load the document using unstructured PDF loader loader = UnstructuredPDFLoader(f"{working_dir}/{file_name}") documents = loader.load() # Split the text into chunks text_splitter = RecursiveCharacterTextSplitter( chunk_size=2000, chunk_overlap=200 ) texts = text_splitter.split_documents(documents) # Load the documents into Chroma vectorstore vectordb = Chroma.from_documents( documents=texts, embedding=embedding, persist_directory=f"{working_dir}/doc_vectorstore" ) return 0 def answer_question(user_question): """Answer the user's question using the trained model.""" # Load the persistent vectordb vectordb = Chroma( persist_directory=f"{working_dir}/doc_vectorstore", embedding_function=embedding ) # Retriever retriever = vectordb.as_retriever() # Create a chain to answer user question using DeepSeek-R1 qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=retriever, ) response = qa_chain.invoke({"query": user_question}) answer = response["result"] return answer # Streamlit interface st.title("🐋 DeepSeek-R1 - Document RAG") # File uploader widget uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) if uploaded_file is not None: # Define save path and save the uploaded file save_path = os.path.join(working_dir, uploaded_file.name) with open(save_path, "wb") as f: f.write(uploaded_file.getbuffer()) # Process the document process_document_to_chroma_db(uploaded_file.name) st.info("Document Processed Successfully") # Text widget to get user input user_question = st.text_area("Ask your question about the document") if st.button("Answer"): # Answer the user's question answer = answer_question(user_question) # Display the response st.markdown("### DeepSeek-R1 Response") st.markdown(answer)