Spaces:

Manasa1
/

CHAT_WITH_PDF_USING_DEEPSEEK

Running

App Files Files Community

CHAT_WITH_PDF_USING_DEEPSEEK / app.py

Manasa1

Update app.py

0f50957 verified 24 days ago

raw

history blame contribute delete

2.77 kB

	import os
	import streamlit as st
	from huggingface_hub import HfApi

	from langchain_community.document_loaders import UnstructuredPDFLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings
	from langchain_chroma import Chroma
	from langchain_groq import ChatGroq
	from langchain.chains import RetrievalQA

	# Set the working directory
	working_dir = os.path.dirname(os.path.abspath((__file__)))

	secret = os.getenv('GROQ_API_KEY')

	# Loading the embedding model
	embedding = HuggingFaceEmbeddings()

	# Load the llm from Groq
	llm = ChatGroq(
	model="deepseek-r1-distill-llama-70b",
	temperature=0
	)


	def process_document_to_chroma_db(file_name):
	"""Process the document and load it into Chroma DB."""
	# Load the document using unstructured PDF loader
	loader = UnstructuredPDFLoader(f"{working_dir}/{file_name}")
	documents = loader.load()

	# Split the text into chunks
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=2000,
	chunk_overlap=200
	)
	texts = text_splitter.split_documents(documents)

	# Load the documents into Chroma vectorstore
	vectordb = Chroma.from_documents(
	documents=texts,
	embedding=embedding,
	persist_directory=f"{working_dir}/doc_vectorstore"
	)
	return 0


	def answer_question(user_question):
	"""Answer the user's question using the trained model."""
	# Load the persistent vectordb
	vectordb = Chroma(
	persist_directory=f"{working_dir}/doc_vectorstore",
	embedding_function=embedding
	)

	# Retriever
	retriever = vectordb.as_retriever()

	# Create a chain to answer user question using DeepSeek-R1
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	)
	response = qa_chain.invoke({"query": user_question})
	answer = response["result"]

	return answer


	# Streamlit interface
	st.title("🐋 DeepSeek-R1 - Document RAG")

	# File uploader widget
	uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"])

	if uploaded_file is not None:
	# Define save path and save the uploaded file
	save_path = os.path.join(working_dir, uploaded_file.name)
	with open(save_path, "wb") as f:
	f.write(uploaded_file.getbuffer())

	# Process the document
	process_document_to_chroma_db(uploaded_file.name)
	st.info("Document Processed Successfully")

	# Text widget to get user input
	user_question = st.text_area("Ask your question about the document")

	if st.button("Answer"):
	# Answer the user's question
	answer = answer_question(user_question)

	# Display the response
	st.markdown("### DeepSeek-R1 Response")
	st.markdown(answer)