masakk commited on
Commit
e50c5a8
1 Parent(s): c740a01

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gtts import gTTS
2
+ from pdfminer.high_level import extract_text
3
+ from transformers import pipeline
4
+ import gradio as gr
5
+ import os
6
+
7
+ summarizer(pipeline(task='summarization'))
8
+
9
+ def find_abstract(input_text):
10
+ count=0
11
+ for item in input_text.split("\n\n"):
12
+ count=count+1
13
+ if item=="Abstract":
14
+ break
15
+ return count
16
+
17
+
18
+ def pdf_to_text(file_obj):
19
+ text = extract_text(file_obj.name)
20
+ summaryPDF=summarizer(text.split("\n\n")[find_abstract(text)], max_length=20, min_length=5, do_sample=False)
21
+ myobj = gTTS(text=summaryPDF[0]["summary_text"], lang='en', slow=False)
22
+ myobj.save("test.wav")
23
+ return 'test.wav'
24
+
25
+ examples = [
26
+ [os.path.abspath("Article 11 Hidden Technical Debt in Machine Learning Systems.pdf")],
27
+ ]
28
+
29
+ iface = gr.Interface(fn = pdf_to_text,
30
+ inputs = 'file',
31
+ outputs = 'audio',
32
+ title = 'PDF to Audio Application',
33
+ description = 'A simple application to convert PDF files in audio speech. Upload your own file, or click one of the examples to load them.',
34
+ article =
35
+ '''<div>
36
+ <p style="text-align: center"> All you need to do is to upload the pdf file and hit submit, then wait for compiling. After that click on Play/Pause for listing to the audio. The audio is saved in a wav format.</p>
37
+ </div>''',
38
+ examples=examples
39
+ )
40
+
41
+ iface.launch()