|
import os |
|
import streamlit as st |
|
from huggingface_hub import HfApi |
|
|
|
from langchain_community.document_loaders import UnstructuredPDFLoader |
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
from langchain_chroma import Chroma |
|
from langchain_groq import ChatGroq |
|
from langchain.chains import RetrievalQA |
|
|
|
|
|
working_dir = os.path.dirname(os.path.abspath((__file__))) |
|
|
|
secret = os.getenv('GROQ_API_KEY') |
|
|
|
|
|
embedding = HuggingFaceEmbeddings() |
|
|
|
|
|
llm = ChatGroq( |
|
model="deepseek-r1-distill-llama-70b", |
|
temperature=0 |
|
) |
|
|
|
|
|
def process_document_to_chroma_db(file_name): |
|
"""Process the document and load it into Chroma DB.""" |
|
|
|
loader = UnstructuredPDFLoader(f"{working_dir}/{file_name}") |
|
documents = loader.load() |
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=2000, |
|
chunk_overlap=200 |
|
) |
|
texts = text_splitter.split_documents(documents) |
|
|
|
|
|
vectordb = Chroma.from_documents( |
|
documents=texts, |
|
embedding=embedding, |
|
persist_directory=f"{working_dir}/doc_vectorstore" |
|
) |
|
return 0 |
|
|
|
|
|
def answer_question(user_question): |
|
"""Answer the user's question using the trained model.""" |
|
|
|
vectordb = Chroma( |
|
persist_directory=f"{working_dir}/doc_vectorstore", |
|
embedding_function=embedding |
|
) |
|
|
|
|
|
retriever = vectordb.as_retriever() |
|
|
|
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
llm=llm, |
|
chain_type="stuff", |
|
retriever=retriever, |
|
) |
|
response = qa_chain.invoke({"query": user_question}) |
|
answer = response["result"] |
|
|
|
return answer |
|
|
|
|
|
|
|
st.title("🐋 DeepSeek-R1 - Document RAG") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"]) |
|
|
|
if uploaded_file is not None: |
|
|
|
save_path = os.path.join(working_dir, uploaded_file.name) |
|
with open(save_path, "wb") as f: |
|
f.write(uploaded_file.getbuffer()) |
|
|
|
|
|
process_document_to_chroma_db(uploaded_file.name) |
|
st.info("Document Processed Successfully") |
|
|
|
|
|
user_question = st.text_area("Ask your question about the document") |
|
|
|
if st.button("Answer"): |
|
|
|
answer = answer_question(user_question) |
|
|
|
|
|
st.markdown("### DeepSeek-R1 Response") |
|
st.markdown(answer) |
|
|