|
|
|
|
|
|
|
from gtts import gTTS |
|
from pdfminer.high_level import extract_text |
|
from transformers import pipeline |
|
import gradio as gr |
|
import os |
|
|
|
summarizer=(pipeline(task='summarization')) |
|
|
|
def find_abstract(input_text): |
|
count=0 |
|
for item in input_text.split("\n\n"): |
|
count=count+1 |
|
if item.lower()=="abstract": |
|
break |
|
return count |
|
|
|
|
|
def pdf_to_text(file_obj): |
|
text = extract_text(file_obj.name) |
|
summaryPDF=summarizer(text.split("\n\n")[find_abstract(text)], max_length=20, min_length=5, do_sample=False) |
|
myobj = gTTS(text=summaryPDF[0]["summary_text"], lang='en', slow=False) |
|
myobj.save("test.wav") |
|
return 'test.wav' |
|
|
|
|
|
iface = gr.Interface(fn = pdf_to_text, |
|
inputs = 'file', |
|
outputs = 'audio', |
|
title = 'PDF to Audio Application', |
|
description = 'A simple application to convert PDF files in audio speech. Upload your own file, it must contain abstract.', |
|
article = |
|
'''<div> |
|
<p style="text-align: center"> All you need to do is to upload the pdf file and hit submit, then wait for compiling. After that click on Play/Pause for listing to the audio. The audio is saved in a wav format.</p> |
|
</div>''', |
|
) |
|
|
|
iface.launch() |