amraly1983 commited on
Commit
b10004d
β€’
1 Parent(s): 9a367c7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.vectorstores import FAISS
4
+ from langchain.llms import HuggingFaceHub
5
+ from langchain.chains import RetrievalQA
6
+ from langchain.text_splitter import CharacterTextSplitter
7
+ import fitz
8
+ import os
9
+ from langchain.schema import Document
10
+
11
+ def process_pdf(file):
12
+ """Extract text from PDF, split into chunks, and create embeddings."""
13
+ try:
14
+ # Save the uploaded file temporarily
15
+ with open("temp_pdf.pdf", "wb") as f:
16
+ f.write(file.getbuffer())
17
+
18
+ text = ""
19
+ with fitz.open("temp_pdf.pdf") as doc:
20
+ for page in doc:
21
+ text += page.get_text()
22
+
23
+ # Create Document objects for the text splitter
24
+ texts = [Document(page_content=text)]
25
+
26
+ # Split text into smaller chunks
27
+ text_splitter = CharacterTextSplitter(
28
+ separator="\n",
29
+ chunk_size=300,
30
+ chunk_overlap=30
31
+ )
32
+ documents = text_splitter.split_documents(texts)
33
+
34
+ embeddings = HuggingFaceEmbeddings()
35
+ vectorstore = FAISS.from_documents(documents, embeddings)
36
+
37
+ os.remove("temp_pdf.pdf")
38
+
39
+ return vectorstore
40
+ except Exception as e:
41
+ st.error(f"Error processing PDF: {e}")
42
+ return None
43
+
44
+ # --- Streamlit UI ---
45
+ st.title("PDF Chatbot")
46
+ uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
47
+
48
+ if uploaded_file:
49
+ vectorstore = process_pdf(uploaded_file)
50
+
51
+ if vectorstore:
52
+ # --- Chat Functionality ---
53
+ llm = HuggingFaceHub(
54
+ repo_id="google/flan-t5-xxl",
55
+ model_kwargs={"temperature": 0.7, "max_length": 512},
56
+ huggingfacehub_api_token=HF_TOKEN # Replace with your actual API token
57
+ )
58
+ qa_chain = RetrievalQA.from_chain_type(
59
+ llm=llm,
60
+ chain_type="stuff",
61
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 2})
62
+ )
63
+
64
+ user_question = st.text_input("Ask a question about the PDF:")
65
+ if user_question:
66
+ with st.spinner("Generating answer..."):
67
+ response = qa_chain({"query": user_question})
68
+ answer = response['result']
69
+ st.write(answer)
70
+
71
+ # --- Feedback Mechanism ---
72
+ st.write("Was this answer helpful?")
73
+ col1, col2 = st.columns(2)
74
+ with col1:
75
+ if st.button("πŸ‘"):
76
+ st.write("Thanks for the feedback!")
77
+ with col2:
78
+ if st.button("πŸ‘Ž"):
79
+ st.write("We appreciate your feedback. We'll work on improving!")