changxin commited on
Commit
65a043f
1 Parent(s): a4d203d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -149
app.py CHANGED
@@ -1,150 +1,164 @@
1
  import streamlit as st
2
- import math
3
- import re
4
- import os
5
- from PyPDF2 import PdfFileReader, PdfFileWriter
6
- import pandas as pd
7
- import pdfplumber
8
- from docx2pdf import convert
9
- import fitz
10
- import base64
11
-
12
- st.header('PDF文件处理工具测试')
13
-
14
- def fx(x):
15
- return sum(x,[])
16
- fns=st.radio('请选择PDF处理类型:',['拆分','合并','读取','在线预览','转换'])
17
- if fns=='拆分':
18
- uploaded_file = st.text_input("请输入要处理的pdf文件地址:")
19
- if uploaded_file !='':
20
-
21
- pdf_reader = PdfFileReader(uploaded_file)
22
- n=pdf_reader.getNumPages()
23
- che=st.radio('选择拆分类型',['按固定页数拆分','截取某几页','删除指定页面'])
24
- if che=='按固定页数拆分':
25
- fn=st.number_input('请输入每组拆分的文档页数:',1,n,1)
26
- stre=st.text_input("请输入拆分后文件存放根目录:")
27
- zs=math.ceil(n/fn)
28
- if st.button('开始拆分>>'):
29
- for page in range(1,zs+1):
30
- for pn in range(fn*page-fn,fn*page):
31
- if pn<n:
32
- pdf_writer = PdfFileWriter()
33
- pdf_writer.addPage(pdf_reader.getPage(pn))
34
- with open(stre+'/test-{}.pdf'.format(page), 'wb') as out:
35
- pdf_writer.write(out)
36
- elif che=='截取某几页':
37
- st_en=st.text_input("请输入截取的起止页码,格式为“1-5”或“1,3,5”:")
38
- stre2=st.text_input("请输入截取后pdf文件存放根目录:")
39
- if st_en!='':
40
- tt=[int(x) for x in re.split(r'[-,\s]\s*',st_en)]
41
- if st.button('开始截取>>'):
42
- outw=PdfFileWriter()
43
- for r in (tt if ',' in st_en else range(tt[0]-1,tt[1])):
44
- outw.addPage(pdf_reader.getPage(r))
45
- with open(stre2+'/666.pdf', 'wb') as out:
46
- outw.write(out)
47
- else:
48
- st_en2=st.text_input("请输入需要删除的页码,格式为“1-5”或“1,3,5”:")
49
- stre3=st.text_input("请输入删除指定页面后的pdf文件存放根目录:")
50
- if st_en2!='':
51
- tt=[int(x) for x in re.split(r'[-,\s]\s*',st_en2)]
52
- if st.button('开始删除>>'):
53
- outw2=PdfFileWriter()
54
- for r in range(n):
55
- if r not in (tt if ',' in st_en2 else range(tt[0]-1,tt[1])):
56
- outw2.addPage(pdf_reader.getPage(r))
57
- with open(stre3+'/666.pdf', 'wb') as out:
58
- outw2.write(out)
59
- elif fns=='合并':
60
- path = st.text_input("请输入要处理的pdf文件根目录:")
61
- scn = st.text_input("请填写输出文件地址及文件名")
62
- if path !='' and scn!='':
63
- file_list = os.listdir(path)
64
- if st.button('开始合并>>'):
65
- file_out = PdfFileWriter()
66
- for file in file_list:
67
- docdir = os.path.join(path, file)
68
- file_read = PdfFileReader(docdir)
69
- for pageNum in range(file_read.getNumPages()):
70
- file_out.addPage(file_read.getPage(pageNum))
71
- with open(scn,'wb') as output:
72
- file_out.write(output)
73
-
74
-
75
- elif fns=='读取':
76
- path3 = st.text_input("请输入要读取的pdf文件地址:")
77
- if path3 !='':
78
- ms=st.radio('请选择读取模式:',['指定页码','全部'])
79
- if ms=='指定页码':
80
- ymq= st.number_input("请选择要读取的pdf页码:",1,66,1)
81
- dqlx=st.radio('请选择读取类型',['文本内容','表格内容'])
82
- with pdfplumber.open(path3) as p:
83
- page = p.pages[ymq-1]
84
- if dqlx=='文本内容':
85
- textdata = page.extract_text()
86
- st.write(textdata)
87
- else:
88
- n_table=st.number_input('请选择读取页面中第几个表格:',1,3,1)
89
- tables=page.extract_tables()
90
- datan=tables[n_table-1]
91
- st.dataframe(pd.DataFrame(datan[1:],columns=datan[0]))
92
- else:
93
- dqlx2=st.radio('请选择读取类型',['文本内容','表格内容'])
94
- with pdfplumber.open(path3) as p:
95
- if dqlx2=='文本内容':
96
- sz='\n'.join([page.extract_text() for page in p.pages])
97
- st.write(sz)
98
- else:
99
- st.dataframe(pd.concat([pd.DataFrame(data=y[1:],columns=y[0]) for y in fx([page.extract_tables() for page in p.pages])]))
100
-
101
- elif fns=='在线预览':
102
- file = st.file_uploader("请上传PDF")
103
- if file is not None:
104
- base64_pdf = base64.b64encode(file.read()).decode('utf-8')
105
- pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="1000" type="application/pdf">'
106
- st.markdown(pdf_display, unsafe_allow_html=True)
107
-
108
- else:
109
- ms1=st.radio('请选择转换模式:',['word->pdf','ppt->pdf','pdf->jpg/png','jpg/png->pdf'])
110
- if ms1=='word->pdf':
111
- path4 = st.text_input("请输入要批量转换的word文件根目录:")
112
- if path4 !='':
113
- FileList = map(lambda x: path4 + '\\' + x, os.listdir(path4))
114
- for file in FileList:
115
- convert(file, f"{file.split('.')[0]}.pdf")
116
- st.success('转换成功!')
117
- elif ms1=='pdf->jpg/png':
118
- path5 = st.text_input("请输入要转换的pdf文件地址:")
119
- dir_1=st.text_input("请输入要输出的图片保存根目录:")
120
- if path5 !='' and dir_1 !='':
121
- doc = fitz.open(path5)
122
- for page in doc:
123
- pix = page.get_pixmap()
124
- pix.save(dir_1+"/page-%i.png" % page.number)
125
-
126
- elif ms1=='jpg/png->pdf':
127
- dir_2=st.text_input("请输入要转换为pdf的图片根目录:")
128
- path6 = st.text_input("请输入合成的pdf文件存放地址:")
129
- if path6 !='' and dir_2 !='':
130
- doc = fitz.open()
131
- imglist = os.listdir(dir_2)
132
- for i, f in enumerate(imglist):
133
- img = fitz.open(os.path.join(dir_2, f))
134
- rect = img[0].rect
135
- pdfbytes = img.convert_to_pdf()
136
- img.close()
137
- imgPDF = fitz.open("pdf", pdfbytes)
138
- page = doc.new_page(width = rect.width,height = rect.height)
139
- page.show_pdf_page(rect, imgPDF, 0)
140
- doc.save(path6)
141
- elif ms1=='ppt->pdf':
142
- dir_3=st.text_input("请输入要转换为pdf的PPT文件地址:")
143
- path7 = st.text_input("请输入生成的pdf文件存放地址:")
144
- if path7 !='' and dir_3 !='':
145
- ppt = fitz.open(dir_3)
146
- pdfbytes = ppt.convert_to_pdf()
147
- pdf = fitz.open("pdf", pdfbytes)
148
- pdf.save(path7)
149
- else:
150
- ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from streamlit.components.v1 import html
3
+
4
+ st.header('嵌入网页及图表')
5
+
6
+ html('''
7
+ <iframe src="https://web.powerva.microsoft.com/environments/Default-51a58d6c-4fcf-4b75-8608-d00bf7f244d5/bots/new_bot_830e155fc862429e89683426b31c9bd5/webchat" height="500" frameborder="1" style="width:100%"></iframe>
8
+ ''',height=520)
9
+
10
+ html('''
11
+ <iframe src="https://d.pbihub.cn/" height="600" frameborder="1" style="width:100%"></iframe>
12
+ ''',height=620)
13
+
14
+
15
+ html('''
16
+ <head><meta charset="utf-8"><title>测试</title></head>
17
+ <body>
18
+ <div id="main" style="width: 600px;height:400px;"></div>
19
+ <script src="http://echarts.baidu.com/build/dist/echarts.js"></script>
20
+ <script>
21
+ require.config({paths: {echarts: 'http://echarts.baidu.com/build/dist'}});
22
+ require(
23
+ ['echarts','echarts/chart/bar'],
24
+ function (ec) {
25
+ var myChart = ec.init(document.getElementById('main'));
26
+ var option = {
27
+
28
+ title : {
29
+
30
+ text: '某地区蒸发量和降水量',
31
+
32
+ subtext: '纯属虚构'
33
+
34
+ },
35
+
36
+ tooltip : {
37
+
38
+ trigger: 'axis'
39
+
40
+ },
41
+
42
+ legend: {
43
+
44
+ data:['蒸发量','降水量']
45
+
46
+ },
47
+
48
+ toolbox: {
49
+
50
+ show : true,
51
+
52
+ feature : {
53
+
54
+ dataView : {show: true, readOnly: false},
55
+
56
+ magicType : {show: true, type: ['line', 'bar']},
57
+
58
+ restore : {show: true},
59
+
60
+ saveAsImage : {show: true}
61
+
62
+ }
63
+
64
+ },
65
+
66
+ calculable : true,
67
+
68
+ xAxis : [
69
+
70
+ {
71
+
72
+ type : 'category',
73
+
74
+ data : ['1月','2月','3月','4月','5月','6月','7月','8月','9月','10月','11月','12月']
75
+
76
+ }
77
+
78
+ ],
79
+
80
+ yAxis : [
81
+
82
+ {
83
+
84
+ type : 'value'
85
+
86
+ }
87
+
88
+ ],
89
+
90
+ series : [
91
+
92
+ {
93
+
94
+ name:'蒸发量',
95
+
96
+ type:'bar',
97
+
98
+ data:[2.0, 4.9, 7.0, 23.2, 25.6, 76.7, 135.6, 162.2, 32.6, 20.0, 6.4, 3.3],
99
+
100
+ markPoint : {
101
+
102
+ data : [
103
+
104
+ {type : 'max', name: '最大值'},
105
+
106
+ {type : 'min', name: '最小值'}
107
+
108
+ ]
109
+
110
+ },
111
+
112
+ markLine : {
113
+
114
+ data : [
115
+
116
+ {type : 'average', name: '平均值'}
117
+
118
+ ]
119
+
120
+ }
121
+
122
+ },
123
+
124
+ {
125
+
126
+ name:'降水量',
127
+
128
+ type:'bar',
129
+
130
+ data:[2.6, 5.9, 9.0, 26.4, 28.7, 70.7, 175.6, 182.2, 48.7, 18.8, 6.0, 2.3],
131
+
132
+ markPoint : {
133
+
134
+ data : [
135
+
136
+ {name : '年最高', value : 182.2, xAxis: 7, yAxis: 183},
137
+
138
+ {name : '年最低', value : 2.3, xAxis: 11, yAxis: 3}
139
+
140
+ ]
141
+
142
+ },
143
+
144
+ markLine : {
145
+
146
+ data : [
147
+
148
+ {type : 'average', name : '平均值'}
149
+
150
+ ]
151
+
152
+ }
153
+
154
+ }
155
+
156
+ ]
157
+ };
158
+
159
+ myChart.setOption(option);
160
+ }
161
+ );
162
+ </script>
163
+ </body>
164
+ ''',height=600)