Spaces:
Sleeping
Sleeping
windows pdf2image working, hugging face still need work
Browse files
app.py
CHANGED
@@ -1,8 +1,29 @@
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
big_text = """
|
3 |
<div style='text-align: center;'>
|
4 |
<h1 style='font-size: 30x;'>Locked PDF Ingestion</h1>
|
5 |
</div>
|
6 |
"""
|
7 |
# Display the styled text
|
8 |
-
st.markdown(big_text, unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
from pdf2image import convert_from_path
|
3 |
+
|
4 |
+
|
5 |
+
# get https://github.com/oschwartz10612/poppler-windows/releases/tag/v22.01.0-0
|
6 |
+
# poppler-utils:
|
7 |
+
# Installed: 22.02.0-2ubuntu0.4
|
8 |
+
|
9 |
big_text = """
|
10 |
<div style='text-align: center;'>
|
11 |
<h1 style='font-size: 30x;'>Locked PDF Ingestion</h1>
|
12 |
</div>
|
13 |
"""
|
14 |
# Display the styled text
|
15 |
+
st.markdown(big_text, unsafe_allow_html=True)
|
16 |
+
pdf_path = 'uploaded_pdf/data_sheet.pdf'
|
17 |
+
print("start")
|
18 |
+
images = convert_from_path(pdf_path, first_page=1, last_page=2)
|
19 |
+
print("done")
|
20 |
+
#
|
21 |
+
# total_pages = 100
|
22 |
+
# print(f"total_pages = {total_pages}")
|
23 |
+
# st.write(f"total_pages = {total_pages}")
|
24 |
+
# for page_number in range(total_pages):
|
25 |
+
# pdf_image_list = convert_from_path(pdf_path)
|
26 |
+
# images = convert_from_path(pdf_path, first_page=page_number + 1, last_page=page_number + 1)
|
27 |
+
# progress = (page_number + 1) / total_pages * 100
|
28 |
+
# print(f"Progress: {progress:.2f}%")
|
29 |
+
# print("done")
|