Spaces:
Runtime error
Runtime error
import os | |
import platform | |
import time | |
import logging | |
from fastapi import FastAPI, UploadFile, File | |
import uvicorn | |
import pytesseract | |
import streamlit as st | |
import pandas as pd | |
from PIL import Image | |
from typing import List | |
from transformers import TableTransformerForObjectDetection, DetrFeatureExtractor | |
from codes.table_recognition import TableRecognition | |
from codes.table_detection import TableDetection | |
from codes.table_preprocessing import TablePreprocessor | |
from codes.data_extraction import TextDataExtraction | |
from datatypes.config import Config, tesseract_config, model_config | |
if platform.system() == 'Windows': | |
pytesseract.pytesseract.tesseract_cmd = tesseract_config['tesseractpath'] | |
# Table detection-recognition model loading function | |
def load_models(): | |
try: | |
# models loading from local | |
# detection_model = TableTransformerForObjectDetection.from_pretrained(model_config['detection_model_path']) | |
# recognition_model = TableTransformerForObjectDetection.from_pretrained(model_config['recognition_model_path']) | |
# models loading from hugginfacehub | |
detection_model = TableTransformerForObjectDetection.from_pretrained("microsoft/table-transformer-detection") | |
recognition_model = TableTransformerForObjectDetection.from_pretrained("microsoft/table-transformer-structure-recognition") | |
return detection_model, recognition_model | |
except: | |
print('Table detection or recognition model loading is failed!!') | |
# Models loading | |
detection_model, recognition_model = load_models() | |
# Detection feature extractor | |
detection_feature_extractor = DetrFeatureExtractor(do_resize=True, size=800, max_size=800) | |
# Recognition feature extractor | |
recognition_feature_extractor = DetrFeatureExtractor(do_resize=True, size=1000, max_size=1000) | |
# config values for the detection and recognition | |
# Detection Object | |
detection_obj = TableDetection(detection_feature_extractor, detection_model, threshold=Config['table_detection_threshold']) | |
# Recognition Object | |
recognition_obj = TableRecognition(recognition_feature_extractor, recognition_model, threshold=Config['table_recognition_threshold']) | |
table_preprocessor = TablePreprocessor() | |
textdataextractor = TextDataExtraction() | |
# # Fast API the service if we need to install this as a microservice | |
# app = FastAPI() | |
# @app.get("/health") | |
# def healthcheck(): | |
# return "200" | |
# @app.post('/table-data-extraction') | |
# def table_data_extraction_from_image(file: UploadFile = File(...)): | |
# if not (file.filename.split('.')[-1]).lower() in ("jpg", "jpeg", "png"): | |
# return {'Image must be jpg or png format!'} | |
# print(f'#---------- Table extractor started {time.strftime("%Y-%m-%d %H:%M:%S")} -----------#') | |
# image = Image.open(file.file).convert('RGB') | |
# detection_result = detection_obj.table_detection_from_image(image) | |
# recognition_result = recognition_obj.table_recognition_from_detection(image, detection_result) | |
# preprocessed_tables = table_preprocessor.table_structure_sorting(recognition_result) | |
# exracted_table_data = textdataextractor.cell_data_extraction(image, preprocessed_tables) | |
# print(f'#---------- Table extractor ended {time.strftime("%Y-%m-%d %H:%M:%S")} -----------#\n') | |
# return exracted_table_data | |
def convert_to_df(extracted_object): | |
logging.info(f'#---------- Table visualization started {time.strftime("%Y-%m-%d %H:%M:%S")} -----------#') | |
def _show_outputdf(table_list:List[List], table_number:int): | |
op_df = pd.DataFrame(table_list) | |
container.write(f'Extracted tabel: {table_number}') | |
container.dataframe(op_df) | |
container.write('\n') | |
if len(extracted_object.tables) != 0: | |
table_no = 1 | |
for table in extracted_object.tables: | |
table_list = [] | |
for row in table.extracted_rows: | |
row_list = [] | |
for cell in row.extracted_cells: | |
row_list.append(cell.value) | |
table_list.append(row_list) | |
_show_outputdf(table_list=table_list, table_number=table_no) | |
table_no += 1 | |
else: | |
container.write('No tables are predicted!!!!') | |
def table_data_extraction_from_image1(file): | |
logging.info(f'#---------- Table extractor started {time.strftime("%Y-%m-%d %H:%M:%S")} -----------#') | |
image = Image.open(file).convert('RGB') | |
detection_result = detection_obj.table_detection_from_image(image) | |
recognition_result = recognition_obj.table_recognition_from_detection(image, detection_result) | |
preprocessed_tables = table_preprocessor.table_structure_sorting(recognition_result) | |
exracted_table_data = textdataextractor.cell_data_extraction(image, preprocessed_tables) | |
convert_to_df(exracted_table_data) | |
logging.info((f'#---------- Table extractor ended {time.strftime("%Y-%m-%d %H:%M:%S")} -----------#\n')) | |
return exracted_table_data | |
if __name__ == '__main__': | |
st.title('Table detection and recognition') | |
st.write('Table data extraction application with help of microsoft detr models.') | |
image = st.sidebar.file_uploader(label='Upload image file for data extraction', type=['png','jpg','jpeg','tif']) | |
if image: | |
result = st.sidebar.button(label='Predict', on_click=table_data_extraction_from_image1, args=(image,)) | |
container = st.container() | |
container.subheader('Extracted tables :snowflake:') |