# https://huggingface.co/spaces/masakk/firstAPP from gtts import gTTS from pdfminer.high_level import extract_text from transformers import pipeline import gradio as gr import os summarizer(pipeline(task='summarization')) def find_abstract(input_text): count=0 for item in input_text.split("\n\n"): count=count+1 if item=="Abstract": break return count def pdf_to_text(file_obj): text = extract_text(file_obj.name) summaryPDF=summarizer(text.split("\n\n")[find_abstract(text)], max_length=20, min_length=5, do_sample=False) myobj = gTTS(text=summaryPDF[0]["summary_text"], lang='en', slow=False) myobj.save("test.wav") return 'test.wav' examples = [ [os.path.abspath("Article 11 Hidden Technical Debt in Machine Learning Systems.pdf")], ] iface = gr.Interface(fn = pdf_to_text, inputs = 'file', outputs = 'audio', title = 'PDF to Audio Application', description = 'A simple application to convert PDF files in audio speech. Upload your own file, or click one of the examples to load them.', article = '''

All you need to do is to upload the pdf file and hit submit, then wait for compiling. After that click on Play/Pause for listing to the audio. The audio is saved in a wav format.

''', examples=examples ) iface.launch()