zmbfeng commited on
Commit
6a8bd53
1 Parent(s): 79680ba

windows pdf2image working, hugging face still need work

Browse files
Files changed (1) hide show
  1. app.py +22 -1
app.py CHANGED
@@ -1,8 +1,29 @@
1
  import streamlit as st
 
 
 
 
 
 
 
2
  big_text = """
3
  <div style='text-align: center;'>
4
  <h1 style='font-size: 30x;'>Locked PDF Ingestion</h1>
5
  </div>
6
  """
7
  # Display the styled text
8
- st.markdown(big_text, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from pdf2image import convert_from_path
3
+
4
+
5
+ # get https://github.com/oschwartz10612/poppler-windows/releases/tag/v22.01.0-0
6
+ # poppler-utils:
7
+ # Installed: 22.02.0-2ubuntu0.4
8
+
9
  big_text = """
10
  <div style='text-align: center;'>
11
  <h1 style='font-size: 30x;'>Locked PDF Ingestion</h1>
12
  </div>
13
  """
14
  # Display the styled text
15
+ st.markdown(big_text, unsafe_allow_html=True)
16
+ pdf_path = 'uploaded_pdf/data_sheet.pdf'
17
+ print("start")
18
+ images = convert_from_path(pdf_path, first_page=1, last_page=2)
19
+ print("done")
20
+ #
21
+ # total_pages = 100
22
+ # print(f"total_pages = {total_pages}")
23
+ # st.write(f"total_pages = {total_pages}")
24
+ # for page_number in range(total_pages):
25
+ # pdf_image_list = convert_from_path(pdf_path)
26
+ # images = convert_from_path(pdf_path, first_page=page_number + 1, last_page=page_number + 1)
27
+ # progress = (page_number + 1) / total_pages * 100
28
+ # print(f"Progress: {progress:.2f}%")
29
+ # print("done")