Spaces:

changxin
/

pdf

Build error

App Files Files Community

changxin commited on Jul 3, 2022

Commit

65a043f

1 Parent(s): a4d203d

Update app.py

Browse files

Files changed (1) hide show

app.py +163 -149

app.py CHANGED Viewed

@@ -1,150 +1,164 @@
 import streamlit as st
-import math
-import re
-import os
-from PyPDF2 import PdfFileReader, PdfFileWriter
-import pandas as pd
-import pdfplumber
-from docx2pdf import convert
-import fitz
-import base64
-st.header('PDF文件处理工具测试')
-def fx(x):
-    return sum(x,[])
-fns=st.radio('请选择PDF处理类型：',['拆分','合并','读取','在线预览','转换'])
-if fns=='拆分':
-    uploaded_file = st.text_input("请输入要处理的pdf文件地址:")
-    if uploaded_file !='':
-        pdf_reader = PdfFileReader(uploaded_file)
-        n=pdf_reader.getNumPages()
-        che=st.radio('选择拆分类型',['按固定页数拆分','截取某几页','删除指定页面'])
-        if che=='按固定页数拆分':
-            fn=st.number_input('请输入每组拆分的文档页数：',1,n,1)
-            stre=st.text_input("请输入拆分后文件存放根目录:")
-            zs=math.ceil(n/fn)
-            if st.button('开始拆分>>'):
-                for page in range(1,zs+1):
-                    for pn in range(fn*page-fn,fn*page):
-                        if pn<n:
-                            pdf_writer = PdfFileWriter()
-                            pdf_writer.addPage(pdf_reader.getPage(pn))
-                            with open(stre+'/test-{}.pdf'.format(page), 'wb') as out:
-                                pdf_writer.write(out)
-        elif che=='截取某几页':
-            st_en=st.text_input("请输入截取的起止页码，格式为“1-5”或“1,3,5”:")
-            stre2=st.text_input("请输入截取后pdf文件存放根目录:")
-            if st_en!='':
-                tt=[int(x) for x in re.split(r'[-,\s]\s*',st_en)]
-                if st.button('开始截取>>'):
-                    outw=PdfFileWriter()
-                    for r in (tt if ',' in st_en else range(tt[0]-1,tt[1])):
-                        outw.addPage(pdf_reader.getPage(r))
-                        with open(stre2+'/666.pdf', 'wb') as out:
-                            outw.write(out)
-        else:
-            st_en2=st.text_input("请输入需要删除的页码，格式为“1-5”或“1,3,5”:")
-            stre3=st.text_input("请输入删除指定页面后的pdf文件存放根目录:")
-            if st_en2!='':
-                tt=[int(x) for x in re.split(r'[-,\s]\s*',st_en2)]
-                if st.button('开始删除>>'):
-                    outw2=PdfFileWriter()
-                    for r in range(n):
-                        if r not in (tt if ',' in st_en2 else range(tt[0]-1,tt[1])):
-                            outw2.addPage(pdf_reader.getPage(r))
-                            with open(stre3+'/666.pdf', 'wb') as out:
-                                outw2.write(out)
-elif fns=='合并':
-    path = st.text_input("请输入要处理的pdf文件根目录:")
-    scn = st.text_input("请填写输出文件地址及文件名")
-    if path !='' and scn!='':
-        file_list = os.listdir(path)
-        if st.button('开始合并>>'):
-            file_out = PdfFileWriter()
-            for file in file_list:
-                docdir = os.path.join(path, file)
-                file_read = PdfFileReader(docdir)
-                for pageNum in range(file_read.getNumPages()):
-                    file_out.addPage(file_read.getPage(pageNum))
-            with open(scn,'wb') as output:
-                file_out.write(output)
-elif fns=='读取':
-    path3 = st.text_input("请输入要读取的pdf文件地址:")
-    if path3 !='':
-        ms=st.radio('请选择读取模式：',['指定页码','全部'])
-        if ms=='指定页码':
-            ymq= st.number_input("请选择要读取的pdf页码:",1,66,1)
-            dqlx=st.radio('请选择读取类型',['文本内容','表格内容'])
-            with pdfplumber.open(path3) as p:
-                page = p.pages[ymq-1]
-                if dqlx=='文本内容':
-                    textdata = page.extract_text()
-                    st.write(textdata)
-                else:
-                    n_table=st.number_input('请选择读取页面中第几个表格：',1,3,1)
-                    tables=page.extract_tables()
-                    datan=tables[n_table-1]
-                    st.dataframe(pd.DataFrame(datan[1:],columns=datan[0]))
-        else:
-            dqlx2=st.radio('请选择读取类型',['文本内容','表格内容'])
-            with pdfplumber.open(path3) as p:
-                if dqlx2=='文本内容':
-                    sz='\n'.join([page.extract_text() for page in p.pages])
-                    st.write(sz)
-                else:
-                    st.dataframe(pd.concat([pd.DataFrame(data=y[1:],columns=y[0]) for y in fx([page.extract_tables() for page in p.pages])]))
-elif fns=='在线预览':
-    file = st.file_uploader("请上传PDF")
-    if file is not None:
-        base64_pdf = base64.b64encode(file.read()).decode('utf-8')
-        pdf_display = f'<embed src="data:application/pdf;base64,{base64_pdf}" width="100%" height="1000" type="application/pdf">'
-        st.markdown(pdf_display, unsafe_allow_html=True)
-else:
-    ms1=st.radio('请选择转换模式：',['word->pdf','ppt->pdf','pdf->jpg/png','jpg/png->pdf'])
-    if ms1=='word->pdf':
-        path4 = st.text_input("请输入要批量转换的word文件根目录:")
-        if path4 !='':
-            FileList = map(lambda x: path4 + '\\' + x, os.listdir(path4))
-            for file in FileList:
-                convert(file, f"{file.split('.')[0]}.pdf")
-            st.success('转换成功！')
-    elif ms1=='pdf->jpg/png':
-        path5 = st.text_input("请输入要转换的pdf文件地址:")
-        dir_1=st.text_input("请输入要输出的图片保存根目录:")
-        if path5 !='' and dir_1 !='':
-            doc = fitz.open(path5)
-            for page in doc:
-                pix = page.get_pixmap()
-                pix.save(dir_1+"/page-%i.png" % page.number)
-    elif ms1=='jpg/png->pdf':
-        dir_2=st.text_input("请输入要转换为pdf的图片根目录:")
-        path6 = st.text_input("请输入合成的pdf文件存放地址:")
-        if path6 !='' and dir_2 !='':
-            doc = fitz.open()
-            imglist = os.listdir(dir_2)
-            for i, f in enumerate(imglist):
-                img = fitz.open(os.path.join(dir_2, f))
-                rect = img[0].rect
-                pdfbytes = img.convert_to_pdf()
-                img.close()
-                imgPDF = fitz.open("pdf", pdfbytes)
-                page = doc.new_page(width = rect.width,height = rect.height)
-                page.show_pdf_page(rect, imgPDF, 0)
-            doc.save(path6)
-    elif ms1=='ppt->pdf':
-        dir_3=st.text_input("请输入要转换为pdf的PPT文件地址:")
-        path7 = st.text_input("请输入生成的pdf文件存放地址:")
-        if path7 !='' and dir_3 !='':
-            ppt = fitz.open(dir_3)
-            pdfbytes = ppt.convert_to_pdf()
-            pdf = fitz.open("pdf", pdfbytes)
-            pdf.save(path7)
-    else:
-        ""

 import streamlit as st
+from streamlit.components.v1 import html
+st.header('嵌入网页及图表')
+html('''
+<iframe src="https://web.powerva.microsoft.com/environments/Default-51a58d6c-4fcf-4b75-8608-d00bf7f244d5/bots/new_bot_830e155fc862429e89683426b31c9bd5/webchat" height="500" frameborder="1" style="width:100%"></iframe>
+''',height=520)
+html('''
+<iframe src="https://d.pbihub.cn/" height="600" frameborder="1" style="width:100%"></iframe>
+''',height=620)
+html('''
+<head><meta charset="utf-8"><title>测试</title></head>
+<body>
+    <div id="main" style="width: 600px;height:400px;"></div>
+    <script src="http://echarts.baidu.com/build/dist/echarts.js"></script>
+    <script>
+        require.config({paths: {echarts: 'http://echarts.baidu.com/build/dist'}});
+        require(
+            ['echarts','echarts/chart/bar'],
+            function (ec) {
+                var myChart = ec.init(document.getElementById('main'));
+                var option = {
+    title : {
+        text: '某地区蒸发量和降水量',
+        subtext: '纯属虚构'
+    },
+    tooltip : {
+        trigger: 'axis'
+    },
+    legend: {
+        data:['蒸发量','降水量']
+    },
+    toolbox: {
+        show : true,
+        feature : {
+            dataView : {show: true, readOnly: false},
+            magicType : {show: true, type: ['line', 'bar']},
+            restore : {show: true},
+            saveAsImage : {show: true}
+        }
+    },
+    calculable : true,
+    xAxis : [
+        {
+            type : 'category',
+            data : ['1月','2月','3月','4月','5月','6月','7月','8月','9月','10月','11月','12月']
+        }
+    ],
+    yAxis : [
+        {
+            type : 'value'
+        }
+    ],
+    series : [
+        {
+            name:'蒸发量',
+            type:'bar',
+            data:[2.0, 4.9, 7.0, 23.2, 25.6, 76.7, 135.6, 162.2, 32.6, 20.0, 6.4, 3.3],
+            markPoint : {
+                data : [
+                    {type : 'max', name: '最大值'},
+                    {type : 'min', name: '最小值'}
+                ]
+            },
+            markLine : {
+                data : [
+                    {type : 'average', name: '平均值'}
+                ]
+            }
+        },
+        {
+            name:'降水量',
+            type:'bar',
+            data:[2.6, 5.9, 9.0, 26.4, 28.7, 70.7, 175.6, 182.2, 48.7, 18.8, 6.0, 2.3],
+            markPoint : {
+                data : [
+                    {name : '年最高', value : 182.2, xAxis: 7, yAxis: 183},
+                    {name : '年最低', value : 2.3, xAxis: 11, yAxis: 3}
+                ]
+            },
+            markLine : {
+                data : [
+                    {type : 'average', name : '平均值'}
+                ]
+            }
+        }
+    ]
+};
+                myChart.setOption(option);
+            }
+        );
+    </script>
+</body>
+''',height=600)