NoteBot / pdf_processing.py
I-AdityaGoyal's picture
Upload 8 files
1272fb9 verified
raw
history blame contribute delete
431 Bytes
import fitz # PyMuPDF
def extract_text_from_pdf(pdf_path):
try:
pdf_document = fitz.open(pdf_path)
text = ""
for page_num in range(len(pdf_document)):
page = pdf_document.load_page(page_num)
text += page.get_text()
pdf_document.close()
return text
except Exception as e:
print(f"Error extracting text from PDF: {e}")
return ""