Update test.py
Browse files
test.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import sys
|
2 |
import os
|
3 |
import re
|
@@ -61,7 +62,16 @@ def load_docs(document_path):
|
|
61 |
)
|
62 |
documents = loader.load()
|
63 |
text_splitter = NLTKTextSplitter(chunk_size=1000)
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
except Exception as e:
|
66 |
st.error(f"Failed to load and process PDF: {e}")
|
67 |
st.stop()
|
@@ -128,59 +138,71 @@ if __name__ == "__main__":
|
|
128 |
)
|
129 |
st.header("π Patent Chat: Google Patents Chat Demo")
|
130 |
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
if "messages" not in st.session_state:
|
157 |
-
st.session_state["
|
158 |
-
{"role": "assistant", "content": "Hello! How can I assist you with this patent?"}
|
159 |
-
]
|
160 |
|
|
|
161 |
for message in st.session_state.messages:
|
162 |
with st.chat_message(message["role"]):
|
163 |
st.markdown(message["content"])
|
164 |
|
165 |
-
if
|
166 |
-
st.
|
167 |
-
|
168 |
-
st.
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
with st.spinner("Generating response..."):
|
175 |
-
try:
|
176 |
-
assistant_response = chain({"question": user_input})
|
177 |
-
for chunk in assistant_response["answer"].split():
|
178 |
-
full_response += chunk + " "
|
179 |
-
time.sleep(0.05)
|
180 |
-
message_placeholder.markdown(full_response + "β")
|
181 |
-
except Exception as e:
|
182 |
-
full_response = f"An error occurred: {e}"
|
183 |
-
finally:
|
184 |
-
message_placeholder.markdown(full_response)
|
185 |
|
186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# to-do: Enable downloading multiple patent PDFs via corresponding links
|
2 |
import sys
|
3 |
import os
|
4 |
import re
|
|
|
62 |
)
|
63 |
documents = loader.load()
|
64 |
text_splitter = NLTKTextSplitter(chunk_size=1000)
|
65 |
+
split_docs = text_splitter.split_documents(documents)
|
66 |
+
|
67 |
+
# Filter metadata to only include str, int, float, or bool
|
68 |
+
for doc in split_docs:
|
69 |
+
if hasattr(doc, "metadata") and isinstance(doc.metadata, dict):
|
70 |
+
doc.metadata = {
|
71 |
+
k: v for k, v in doc.metadata.items()
|
72 |
+
if isinstance(v, (str, int, float, bool))
|
73 |
+
}
|
74 |
+
return split_docs
|
75 |
except Exception as e:
|
76 |
st.error(f"Failed to load and process PDF: {e}")
|
77 |
st.stop()
|
|
|
138 |
)
|
139 |
st.header("π Patent Chat: Google Patents Chat Demo")
|
140 |
|
141 |
+
# Fetch query parameters safely
|
142 |
+
query_params = st.query_params
|
143 |
+
default_patent_link = query_params.get("patent_link", "https://patents.google.com/patent/US8676427B1/en")
|
144 |
+
|
145 |
+
# Input for Google Patent Link
|
146 |
+
patent_link = st.text_area("Enter Google Patent Link:", value=default_patent_link, height=100)
|
147 |
+
|
148 |
+
# Button to start processing
|
149 |
+
if st.button("Load and Process Patent"):
|
150 |
+
if not patent_link:
|
151 |
+
st.warning("Please enter a Google patent link to proceed.")
|
152 |
+
st.stop()
|
153 |
+
|
154 |
+
patent_number = extract_patent_number(patent_link)
|
155 |
+
if not patent_number:
|
156 |
+
st.error("Invalid patent link format. Please provide a valid Google patent link.")
|
157 |
+
st.stop()
|
158 |
+
|
159 |
+
st.write(f"Patent number: **{patent_number}**")
|
160 |
+
|
161 |
+
pdf_path = os.path.join(tempfile.gettempdir(), f"{patent_number}.pdf")
|
162 |
+
if os.path.isfile(pdf_path):
|
163 |
+
st.write("β
File already downloaded.")
|
164 |
+
else:
|
165 |
+
st.write("π₯ Downloading patent file...")
|
166 |
+
pdf_path = download_pdf(patent_number)
|
167 |
+
st.write(f"β
File downloaded: {pdf_path}")
|
168 |
+
|
169 |
+
st.write("π Loading document into the system...")
|
170 |
+
|
171 |
+
# Persist the chain in session state to prevent reloading
|
172 |
+
if "chain" not in st.session_state or st.session_state.get("loaded_file") != pdf_path:
|
173 |
+
st.session_state.chain = load_chain(pdf_path)
|
174 |
+
st.session_state.loaded_file = pdf_path
|
175 |
+
st.session_state.messages = [{"role": "assistant", "content": "Hello! How can I assist you with this patent?"}]
|
176 |
+
|
177 |
+
st.success("π Document successfully loaded! You can now start asking questions.")
|
178 |
+
|
179 |
+
# Initialize messages if not already done
|
180 |
if "messages" not in st.session_state:
|
181 |
+
st.session_state.messages = [{"role": "assistant", "content": "Hello! How can I assist you with this patent?"}]
|
|
|
|
|
182 |
|
183 |
+
# Display previous chat messages
|
184 |
for message in st.session_state.messages:
|
185 |
with st.chat_message(message["role"]):
|
186 |
st.markdown(message["content"])
|
187 |
|
188 |
+
if "chain" in st.session_state:
|
189 |
+
if user_input := st.chat_input("What is your question?"):
|
190 |
+
st.session_state.messages.append({"role": "user", "content": user_input})
|
191 |
+
with st.chat_message("user"):
|
192 |
+
st.markdown(user_input)
|
193 |
+
|
194 |
+
with st.chat_message("assistant"):
|
195 |
+
message_placeholder = st.empty()
|
196 |
+
full_response = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
+
with st.spinner("Generating response..."):
|
199 |
+
try:
|
200 |
+
assistant_response = st.session_state.chain({"question": user_input})
|
201 |
+
full_response = assistant_response["answer"]
|
202 |
+
except Exception as e:
|
203 |
+
full_response = f"An error occurred: {e}"
|
204 |
+
|
205 |
+
message_placeholder.markdown(full_response)
|
206 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
207 |
+
else:
|
208 |
+
st.info("Press the 'Load and Process Patent' button to start processing.")
|