Manasa1's picture
Update app.py
0f50957 verified
import os
import streamlit as st
from huggingface_hub import HfApi
from langchain_community.document_loaders import UnstructuredPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
# Set the working directory
working_dir = os.path.dirname(os.path.abspath((__file__)))
secret = os.getenv('GROQ_API_KEY')
# Loading the embedding model
embedding = HuggingFaceEmbeddings()
# Load the llm from Groq
llm = ChatGroq(
model="deepseek-r1-distill-llama-70b",
temperature=0
)
def process_document_to_chroma_db(file_name):
"""Process the document and load it into Chroma DB."""
# Load the document using unstructured PDF loader
loader = UnstructuredPDFLoader(f"{working_dir}/{file_name}")
documents = loader.load()
# Split the text into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=2000,
chunk_overlap=200
)
texts = text_splitter.split_documents(documents)
# Load the documents into Chroma vectorstore
vectordb = Chroma.from_documents(
documents=texts,
embedding=embedding,
persist_directory=f"{working_dir}/doc_vectorstore"
)
return 0
def answer_question(user_question):
"""Answer the user's question using the trained model."""
# Load the persistent vectordb
vectordb = Chroma(
persist_directory=f"{working_dir}/doc_vectorstore",
embedding_function=embedding
)
# Retriever
retriever = vectordb.as_retriever()
# Create a chain to answer user question using DeepSeek-R1
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
)
response = qa_chain.invoke({"query": user_question})
answer = response["result"]
return answer
# Streamlit interface
st.title("🐋 DeepSeek-R1 - Document RAG")
# File uploader widget
uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])
if uploaded_file is not None:
# Define save path and save the uploaded file
save_path = os.path.join(working_dir, uploaded_file.name)
with open(save_path, "wb") as f:
f.write(uploaded_file.getbuffer())
# Process the document
process_document_to_chroma_db(uploaded_file.name)
st.info("Document Processed Successfully")
# Text widget to get user input
user_question = st.text_area("Ask your question about the document")
if st.button("Answer"):
# Answer the user's question
answer = answer_question(user_question)
# Display the response
st.markdown("### DeepSeek-R1 Response")
st.markdown(answer)