Spaces:

masakk
/

firstAPP

Sleeping

firstAPP / app.py

Update app.py

0c2406e 11 months ago

1.55 kB

	# https://huggingface.co/spaces/masakk/firstAPP


	from gtts import gTTS
	from pdfminer.high_level import extract_text
	from transformers import pipeline
	import gradio as gr
	import os

	summarizer(pipeline(task='summarization'))

	def find_abstract(input_text):
	count=0
	for item in input_text.split("\n\n"):
	count=count+1
	if item=="Abstract":
	break
	return count


	def pdf_to_text(file_obj):
	text = extract_text(file_obj.name)
	summaryPDF=summarizer(text.split("\n\n")[find_abstract(text)], max_length=20, min_length=5, do_sample=False)
	myobj = gTTS(text=summaryPDF[0]["summary_text"], lang='en', slow=False)
	myobj.save("test.wav")
	return 'test.wav'

	examples = [
	[os.path.abspath("Article 11 Hidden Technical Debt in Machine Learning Systems.pdf")],
	]

	iface = gr.Interface(fn = pdf_to_text,
	inputs = 'file',
	outputs = 'audio',
	title = 'PDF to Audio Application',
	description = 'A simple application to convert PDF files in audio speech. Upload your own file, or click one of the examples to load them.',
	article =
	'''<div>
	<p style="text-align: center"> All you need to do is to upload the pdf file and hit submit, then wait for compiling. After that click on Play/Pause for listing to the audio. The audio is saved in a wav format.</p>
	</div>''',
	examples=examples
	)

	iface.launch()