File size: 1,395 Bytes
0c2406e
 
 
e50c5a8
 
 
 
 
 
682fa45
e50c5a8
 
 
 
 
830896f
e50c5a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62ebe47
e50c5a8
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# https://huggingface.co/spaces/masakk/firstAPP


from gtts import gTTS
from pdfminer.high_level import extract_text
from transformers import pipeline
import gradio as gr
import os

summarizer=(pipeline(task='summarization'))

def find_abstract(input_text):
  count=0
  for item in input_text.split("\n\n"):
    count=count+1
    if item.lower()=="abstract":
      break
  return count


def pdf_to_text(file_obj):
    text = extract_text(file_obj.name)
    summaryPDF=summarizer(text.split("\n\n")[find_abstract(text)], max_length=20, min_length=5, do_sample=False)
    myobj = gTTS(text=summaryPDF[0]["summary_text"], lang='en', slow=False) 
    myobj.save("test.wav") 
    return 'test.wav'


iface = gr.Interface(fn = pdf_to_text,
                     inputs = 'file',
                     outputs = 'audio', 
                     title = 'PDF to Audio Application',
                     description = 'A simple application to convert PDF files in audio speech. Upload your own file, it must contain abstract.',
                     article = 
                        '''<div>
                            <p style="text-align: center"> All you need to do is to upload the pdf file and hit submit, then wait for compiling. After that click on Play/Pause for listing to the audio. The audio is saved in a wav format.</p>
                        </div>''',
                    )

iface.launch()