MDspace / app.py
diogenemudenge's picture
Update app.py
cca1c1a
#https://huggingface.co/spaces/diogenemudenge/MDspace
import gradio as gr
import pdfplumber
from transformers import pipeline
from gtts import gTTS
import speech_recognition as sr
import os
def extract_abstract(text):
# Simple logic to extract text between 'Abstract' and 'Introduction'
abstract_start = text.find('Abstract')
abstract_end = text.find('Introduction', abstract_start)
return text[abstract_start:abstract_end]
def summarize_abstract(abstract):
summarizer = pipeline('summarization', model='facebook/bart-large-cnn')
summary = summarizer(abstract, max_length=130, min_length=30, do_sample=False)
return summary[0]['summary_text']
def convert_to_speech(text):
tts = gTTS(text)
audio_file = 'summary.mp3'
tts.save(audio_file)
return audio_file
def my_app_function(pdf_file):
with pdfplumber.open(pdf_file) as pdf:
first_page = pdf.pages[0]
text = first_page.extract_text()
abstract = extract_abstract(text)
summary = summarize_abstract(abstract)
audio_file = convert_to_speech(summary)
return summary, audio_file
iface = gr.Interface(
fn=my_app_function,
inputs=gr.File(), # Updated line
outputs=["text", "audio"],
title="PDF Abstract Summarizer",
description="This app reads PDFs, summarizes the abstract, and converts the summary to speech. Please upload PDFs with abstracts."
)
iface.launch(share=True)