Spaces:
Running
Running
import streamlit as st | |
import os | |
import pandas as pd | |
import base64 | |
from findUpdate import FindUpdate # Import the FindUpdate class | |
from tempfile import NamedTemporaryFile | |
class PDFDocumentProcessor: | |
def __init__(self): | |
self.uploaded_agreement = None | |
self.uploaded_template = None | |
self.find_update = FindUpdate() # Create an instance of FindUpdate class | |
def file_uploaders(self): | |
"""Function to handle file uploads.""" | |
self.uploaded_agreement = st.file_uploader("Upload the PDF Agreement", type=['pdf']) | |
self.uploaded_template = st.file_uploader("Upload the PDF Template", type=['pdf']) | |
def process_files(self, agreement_path, template_path): | |
"""Main file processing logic.""" | |
try: | |
# Use the find_update instance to call the processing function | |
result = self.find_update.main_processing_function(agreement_path, template_path) | |
st.success("Files successfully processed!") | |
# Convert the result dictionary to a DataFrame | |
df_changes = pd.DataFrame(result['changes']) | |
df_changes = df_changes[['section_number', 'page_number', 'actual', 'changed', 'analysis', 'type_of_change']] | |
# Display the DataFrame in the UI | |
st.dataframe(df_changes, height=600) # You can adjust height based on your needs | |
# Convert DataFrame to CSV for download | |
csv = df_changes.to_csv(index=False) | |
b64 = base64.b64encode(csv.encode()).decode() # some browsers need base64 encoding | |
# href = f'<a href="data:file/csv;base64,{b64}" download="document_changes.csv">Download CSV File</a>' | |
st.markdown(href, unsafe_allow_html=True) | |
except Exception as e: | |
st.error(f"Error processing files: {e}") | |
finally: | |
# Clean up temporary files after processing | |
os.remove(agreement_path) | |
os.remove(template_path) | |
def save_uploaded_files(self): | |
"""Save the uploaded files temporarily for processing.""" | |
if self.uploaded_agreement and self.uploaded_template: | |
with NamedTemporaryFile(delete=False, suffix=".pdf", mode='wb') as temp_agreement: | |
temp_agreement.write(self.uploaded_agreement.read()) | |
agreement_path = temp_agreement.name | |
with NamedTemporaryFile(delete=False, suffix=".pdf", mode='wb') as temp_template: | |
temp_template.write(self.uploaded_template.read()) | |
template_path = temp_template.name | |
self.process_files(agreement_path, template_path) | |