Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
import streamlit as st
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from langchain.document_loaders import PyPDFLoader
|
6 |
+
from langchain.embeddings import HuggingFaceEmbeddings
|
7 |
+
from langchain.vectorstores import FAISS
|
8 |
+
from langchain.storage import LocalFileStore
|
9 |
+
from langchain.embeddings import CacheBackedEmbeddings
|
10 |
+
from langchain_groq import ChatGroq
|
11 |
+
from langchain_core.runnables import RunnablePassthrough
|
12 |
+
from langchain_core.prompts import ChatPromptTemplate
|
13 |
+
from langchain_core.output_parsers import StrOutputParser
|
14 |
+
from streamlit_chat import message
|
15 |
+
|
16 |
+
# Load environment variables
|
17 |
+
load_dotenv()
|
18 |
+
os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API')
|
19 |
+
os.environ["LANGCHAIN_TRACING_V2"] = "true"
|
20 |
+
os.environ["LANGCHAIN_API_KEY"] = os.getenv('LANGSMITH_API')
|
21 |
+
|
22 |
+
UPLOAD_DIR = "uploaded_files"
|
23 |
+
|
24 |
+
|
25 |
+
def cleanup_files():
|
26 |
+
if os.path.isdir(UPLOAD_DIR):
|
27 |
+
shutil.rmtree(UPLOAD_DIR, ignore_errors=True)
|
28 |
+
if 'file_handle' in st.session_state:
|
29 |
+
st.session_state.file_handle.close()
|
30 |
+
|
31 |
+
|
32 |
+
if 'cleanup_done' not in st.session_state:
|
33 |
+
st.session_state.cleanup_done = False
|
34 |
+
|
35 |
+
if not st.session_state.cleanup_done:
|
36 |
+
cleanup_files()
|
37 |
+
|
38 |
+
if not os.path.exists(UPLOAD_DIR):
|
39 |
+
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
40 |
+
|
41 |
+
# Custom CSS for Wattpad-like theme with background and header
|
42 |
+
st.markdown(
|
43 |
+
"""
|
44 |
+
<style>
|
45 |
+
body {
|
46 |
+
background-color: #FFF7F0;
|
47 |
+
color: #333333;
|
48 |
+
font-family: 'Helvetica Neue', sans-serif;
|
49 |
+
background-image: url('https://drive.google.com/uc?export=view&id=17Vg5hM0-X7fUy2BdYCFqSAQtJVDqYErU');
|
50 |
+
background-size: cover;
|
51 |
+
background-position: top center;
|
52 |
+
}
|
53 |
+
.stButton button {
|
54 |
+
background-color: #FF5000;
|
55 |
+
color: white;
|
56 |
+
border-radius: 12px;
|
57 |
+
border: none;
|
58 |
+
padding: 10px 20px;
|
59 |
+
font-weight: bold;
|
60 |
+
}
|
61 |
+
.stButton button:hover {
|
62 |
+
background-color: #E64500;
|
63 |
+
}
|
64 |
+
.stTextInput > div > input {
|
65 |
+
border: 1px solid #FF5000;
|
66 |
+
border-radius: 10px;
|
67 |
+
padding: 10px;
|
68 |
+
}
|
69 |
+
.stFileUploader > div {
|
70 |
+
border: 2px dashed #FF5000;
|
71 |
+
border-radius: 10px;
|
72 |
+
padding: 10px;
|
73 |
+
}
|
74 |
+
.header {
|
75 |
+
display: flex;
|
76 |
+
align-items: center;
|
77 |
+
gap: 10px;
|
78 |
+
padding-top: 50px;
|
79 |
+
}
|
80 |
+
</style>
|
81 |
+
""",
|
82 |
+
unsafe_allow_html=True
|
83 |
+
)
|
84 |
+
|
85 |
+
# Wattpad-like header without logo
|
86 |
+
st.markdown(
|
87 |
+
"""
|
88 |
+
<div class="header" style="display: flex; align-items: center; gap: 10px;">
|
89 |
+
<h1 style="color: #FF5000; font-weight: bold;">Hi, we're Wattpad.</h1>
|
90 |
+
</div>
|
91 |
+
""",
|
92 |
+
unsafe_allow_html=True
|
93 |
+
)
|
94 |
+
|
95 |
+
# Spacer to push chatbot below the header
|
96 |
+
st.write("<div style='height: 100px;'></div>", unsafe_allow_html=True)
|
97 |
+
|
98 |
+
st.title("Chat with Your PDF!!")
|
99 |
+
|
100 |
+
uploaded_file = st.file_uploader("Upload a file")
|
101 |
+
|
102 |
+
if uploaded_file is not None:
|
103 |
+
file_path = os.path.join(UPLOAD_DIR, uploaded_file.name)
|
104 |
+
file_path = os.path.abspath(file_path)
|
105 |
+
|
106 |
+
with open(file_path, 'wb') as f:
|
107 |
+
f.write(uploaded_file.getbuffer())
|
108 |
+
st.write("You're Ready For a Chat with your PDF")
|
109 |
+
|
110 |
+
docs = PyPDFLoader(file_path).load_and_split()
|
111 |
+
|
112 |
+
embedding = HuggingFaceEmbeddings(
|
113 |
+
model_name='BAAI/llm-embedder',
|
114 |
+
)
|
115 |
+
|
116 |
+
store = LocalFileStore("./cache/")
|
117 |
+
cached_embedder = CacheBackedEmbeddings.from_bytes_store(
|
118 |
+
embedding, store, namespace='embeddings'
|
119 |
+
)
|
120 |
+
|
121 |
+
vector_base = FAISS.from_documents(
|
122 |
+
docs,
|
123 |
+
embedding
|
124 |
+
)
|
125 |
+
|
126 |
+
template = '''You are WattBot, Wattpad's friendly chatbot assistant, designed to help readers and writers with insightful answers about stories, writing tips, and the Wattpad platform. Please answer the {question} based only on the given {context}. If the question is unrelated to the context or beyond your knowledge, respond with "I'm not sure about that, but feel free to explore more on Wattpad!" Keep your responses concise, using a maximum of three sentences.'''
|
127 |
+
|
128 |
+
prompt = ChatPromptTemplate.from_template(template)
|
129 |
+
retriever = vector_base.as_retriever()
|
130 |
+
|
131 |
+
llm = ChatGroq(
|
132 |
+
model='mixtral-8x7b-32768',
|
133 |
+
temperature=0,
|
134 |
+
)
|
135 |
+
|
136 |
+
if 'history' not in st.session_state:
|
137 |
+
st.session_state.history = []
|
138 |
+
|
139 |
+
query = st.text_input("Enter your question", placeholder="Ask something interesting...")
|
140 |
+
|
141 |
+
if st.button("Submit!", key="submit_button"):
|
142 |
+
if query:
|
143 |
+
chain = (
|
144 |
+
{'context': retriever, 'question': RunnablePassthrough()}
|
145 |
+
| prompt | llm | StrOutputParser()
|
146 |
+
)
|
147 |
+
answer = chain.invoke(query)
|
148 |
+
st.session_state.history.append({'question': query, 'answer': answer})
|
149 |
+
|
150 |
+
if st.session_state.history:
|
151 |
+
st.write("### Previous Questions and Answers")
|
152 |
+
for idx, entry in enumerate(st.session_state.history):
|
153 |
+
st.markdown(
|
154 |
+
f"""
|
155 |
+
<div style="background-color: #FFFAF5; padding: 10px; border-radius: 10px; margin-bottom: 10px;">
|
156 |
+
<p style="font-weight: bold; color: #FF5000;">Q{idx + 1}: {entry['question']}</p>
|
157 |
+
<p style="color: #333333;">A{idx + 1}: {entry['answer']}</p>
|
158 |
+
</div>
|
159 |
+
""",
|
160 |
+
unsafe_allow_html=True
|
161 |
+
)
|
162 |
+
|
163 |
+
if st.session_state.cleanup_done:
|
164 |
+
cleanup_files()
|