Spaces:

Manasa1
/

CHAT_WITH_PDF_USING_DEEPSEEK

Running

File size: 2,770 Bytes

import os
import streamlit as st
from huggingface_hub import HfApi

from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA

# Set the working directory
working_dir = os.path.dirname(os.path.abspath((__file__)))

secret = os.getenv('GROQ_API_KEY')

# Loading the embedding model
embedding = HuggingFaceEmbeddings()

# Load the llm from Groq
llm = ChatGroq(
    model="deepseek-r1-distill-llama-70b",
    temperature=0
)


def process_document_to_chroma_db(file_name):
    """Process the document and load it into Chroma DB."""
    # Load the document using unstructured PDF loader
    loader = UnstructuredPDFLoader(f"{working_dir}/{file_name}")
    documents = loader.load()

    # Split the text into chunks
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=2000,
        chunk_overlap=200
    )
    texts = text_splitter.split_documents(documents)

    # Load the documents into Chroma vectorstore
    vectordb = Chroma.from_documents(
        documents=texts,
        embedding=embedding,
        persist_directory=f"{working_dir}/doc_vectorstore"
    )
    return 0


def answer_question(user_question):
    """Answer the user's question using the trained model."""
    # Load the persistent vectordb
    vectordb = Chroma(
        persist_directory=f"{working_dir}/doc_vectorstore",
        embedding_function=embedding
    )

    # Retriever
    retriever = vectordb.as_retriever()

    # Create a chain to answer user question using DeepSeek-R1
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=retriever,
    )
    response = qa_chain.invoke({"query": user_question})
    answer = response["result"]

    return answer


# Streamlit interface
st.title("🐋 DeepSeek-R1 - Document RAG")

# File uploader widget
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

if uploaded_file is not None:
    # Define save path and save the uploaded file
    save_path = os.path.join(working_dir, uploaded_file.name)
    with open(save_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    # Process the document
    process_document_to_chroma_db(uploaded_file.name)
    st.info("Document Processed Successfully")

# Text widget to get user input
user_question = st.text_area("Ask your question about the document")

if st.button("Answer"):
    # Answer the user's question
    answer = answer_question(user_question)

    # Display the response
    st.markdown("### DeepSeek-R1 Response")
    st.markdown(answer)