import subprocess subprocess.run(["pip", "install", "PyPDF2", "transformers", "bark", "gradio","soundfile","PyMuPDF","numpy"]) import PyPDF2 from transformers import pipeline from bark import SAMPLE_RATE, generate_audio, preload_models import gradio as gr import os import io import fitz import tempfile from PyPDF2 import PdfReader import numpy as np from tempfile import NamedTemporaryFile import soundfile as sf def readPDF(pdf_file_path): if not pdf_file_path.endswith(".pdf"): raise ValueError("Please upload a PDF file.") with open(pdf_file_path, 'rb') as file: pdf_reader = file.read() return pdf_reader def summarize_and_convert_to_audio(pdf_reader, page): temp_file = tempfile.NamedTemporaryFile(delete=False) temp_file.write(pdf_reader) temp_file_path = temp_file.name # Use PyMuPDF to read the PDF content pdf_document = fitz.open(temp_file_path) print(page) # Get the abstract page text abstract_page_text = pdf_document[int(page) - 1].get_text() summarizer = pipeline("summarization", model="facebook/bart-large-cnn") summary = summarizer(abstract_page_text, max_length=20, min_length=20) preload_models() text = summary[0]['summary_text'] audio_array = generate_audio(text) #save temporary file audio to use it in the second step with NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav_file: wav_file_path = temp_wav_file.name sf.write(wav_file_path, audio_array, SAMPLE_RATE) return wav_file_path def read_and_speech(pdf_file,abstract_page): print(pdf_file) pdf_file_path= pdf_file.name print(pdf_file_path) page=abstract_page reader=readPDF(pdf_file_path) audio=summarize_and_convert_to_audio(reader,page) return audio; # Define app name, app description, and examples app_name = "From PDF to Speech" app_description = "Convert text from a PDF file to audio. Upload a PDF file. We accept only PDF files with abstracts." iface = gr.Interface( fn=read_and_speech, inputs=[ gr.File(file_types=["pdf"], label="Upload PDF file"), gr.Textbox(label="Insert the page where the abstract is located")], outputs=gr.Audio(type="filepath"), title=app_name, description=app_description, examples=[ ["DATA7004+90+80+Hidden+Technical+Debt+in+Machine+Learning+Systems",1], ], allow_flagging="never" ) iface.launch()