|
import os |
|
from dotenv import load_dotenv |
|
import streamlit as st |
|
from langchain_groq import ChatGroq |
|
from langchain.document_loaders import PyPDFLoader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from langchain.chains import RetrievalQA |
|
|
|
|
|
load_dotenv() |
|
|
|
def main(): |
|
|
|
groq_api_key = GROQ_API_KEY='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' |
|
|
|
|
|
if not groq_api_key: |
|
st.error("GROQ API Key not found. Please check your .env file.") |
|
return |
|
|
|
st.title("PDF Chat with Groq LLM") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload a PDF", type="pdf") |
|
|
|
if uploaded_file is not None: |
|
|
|
with open("temp.pdf", "wb") as f: |
|
f.write(uploaded_file.getbuffer()) |
|
|
|
|
|
loader = PyPDFLoader("temp.pdf") |
|
pages = loader.load() |
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=1000, |
|
chunk_overlap=200 |
|
) |
|
texts = text_splitter.split_documents(pages) |
|
|
|
|
|
embeddings = HuggingFaceEmbeddings( |
|
model_name="sentence-transformers/all-MiniLM-L6-v2" |
|
) |
|
|
|
|
|
vectorstore = FAISS.from_documents(texts, embeddings) |
|
|
|
|
|
llm = ChatGroq( |
|
temperature=0.7, |
|
model_name='llama3-70b-8192', |
|
api_key=groq_api_key |
|
) |
|
|
|
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
llm=llm, |
|
chain_type="stuff", |
|
retriever=vectorstore.as_retriever(search_kwargs={"k": 3}) |
|
) |
|
|
|
|
|
query = st.text_input("Ask a question about the PDF:") |
|
|
|
if query: |
|
|
|
response = qa_chain.invoke(query) |
|
st.write("Response:", response['result']) |
|
|
|
if __name__ == "__main__": |
|
main() |