Spaces:
Runtime error
Runtime error
from pdfminer.high_level import extract_text, extract_pages | |
from pdfminer.layout import LTTextContainer | |
from preprocess import pre_process | |
def get_pages(filename, start_page=0, end_page=0): | |
page_number = [] | |
for i in range(start_page, end_page+1): | |
page_number.append(i-1) | |
print(page_number) | |
#filename = str(paper.title)+'.pdf' | |
pages = extract_pages(filename, page_numbers=page_number) | |
content = "" | |
for page_layout in pages: | |
for element in page_layout: | |
if isinstance(element, LTTextContainer): | |
content = content+element.get_text() | |
content = pre_process(content) | |
return content |