Spaces:
Sleeping
Sleeping
import re | |
import cv2 | |
import numpy as np | |
from paddleocr import PaddleOCR | |
from PIL import Image | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import onnxruntime | |
import gradio as gr | |
# initialize the OCR | |
ocr = PaddleOCR(lang='sl', | |
enable_mkldnn=True, | |
cls=False, | |
show_log= False) | |
# initialize the models | |
model_deskew = onnxruntime.InferenceSession("/content/CNN_deskew_v0.0.2.onnx") | |
model_denoise = onnxruntime.InferenceSession("/content/autoencoder_denoise_v0.0.2.onnx") | |
##### All Functions ##### | |
def preprocess_image(image): | |
''' | |
Function: preprocess image to make it lighter to work on | |
Input: resized image | |
Output: image | |
''' | |
image = np.array(image) | |
scale = 1.494 | |
width = int(image.shape[1] / scale) | |
height = int(image.shape[0] / scale) | |
dim = (width, height) | |
image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA) | |
return image | |
def deskew(image, model): | |
''' | |
Function: deskew an image | |
Input: takes an image as an array | |
Output: deskewed image | |
''' | |
# map the model classes to the actual degree of skew | |
map = { 0: '-1', 1: '-10', 2: '-11', 3: '-12', 4: '-13', | |
5: '-14',6: '-15', 7: '-2', 8: '-3', 9: '-4', | |
10: '-5',11: '-6',12: '-7', 13: '-8', 14: '-9', | |
15: '0', 16: '1', 17: '10', 18: '11', 19: '12', | |
20: '13',21: '14',22: '15', 23: '180',24: '2', | |
25: '270',26: '3',27: '4', 28: '5', 29: '6', | |
30: '7', 31: '8',32: '9', 33: '90'} | |
image_d = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
width = int(image_d.shape[1] * 0.2) | |
height = int(image_d.shape[0] * 0.2) | |
dim = (width, height) | |
# resize image | |
res = cv2.resize(image_d, dim, interpolation = cv2.INTER_AREA) | |
resized = cv2.resize(res, (200, 200)) | |
# add two dimensions to feed to the model | |
resized = resized.astype('float32').reshape(1, 200, 200 ,1) | |
# normalize | |
resized = resized/255 | |
# predictions | |
predictions = model.run(None, {'conv2d_input': resized}) | |
# best prediction | |
pred = predictions[0].argmax() | |
# angle of skew | |
angle = int(map[pred]) | |
skew_confidence = predictions[0][0][pred] * 100 | |
# deskew original image | |
if angle == 90: | |
deskewed_image = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE) | |
return deskewed_image, angle, skew_confidence | |
if angle == 270: | |
deskewed_image = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE) | |
return deskewed_image, angle, skew_confidence | |
(h, w) = image.shape[:2] | |
center = (w // 2, h // 2) | |
M = cv2.getRotationMatrix2D(center, -angle, 1.0) | |
deskewed_image = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, | |
borderMode=cv2.BORDER_REPLICATE) | |
return deskewed_image, angle, skew_confidence | |
def prepare_image_to_autoencoder(image): | |
''' | |
Function: prepare the image to be passed to the autoencoder. | |
Input: image (_type_): deskewed image | |
Output: resized image to be passed to the autoencoder | |
''' | |
height, width = image.shape[:2] | |
target_height = 600 | |
target_width = 600 | |
image = image[int(height/3.6): int(height/1.87), int(width/3.67): int(width/1.575)] | |
# reshape image to fixed size | |
image = cv2.resize(image, (target_width, target_height)) | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
# normalize images | |
image = image / 255.0 | |
# reshape to pass image to autoencoder | |
image = image.reshape(target_height, target_width, 1) | |
return image | |
def autoencode_ONNX(image, model): | |
''' | |
Function: remove noise from image | |
Input: image and autoencoder model | |
Output: image | |
''' | |
image = image.astype(np.float32).reshape(1, 600, 600, 1) | |
image = model.run(None, {'input_2': image}) | |
image = image[0] | |
image = image.squeeze() | |
image = image * 255 | |
image = image.astype('uint8') | |
return image | |
def extract_detected_entries_pdl(image): | |
""" | |
Extracts text, scores, and boundary boxes from an image using OCR and returns a DataFrame. | |
This function takes an input image, applies OCR to detect text in the image, and then extracts | |
the detected text, confidence scores, and boundary boxes for each text entry. The extracted | |
information is returned in a DataFrame with columns "Text", "Score", and "Boundary Box". | |
Parameters | |
---------- | |
image : numpy.ndarray | |
The input image to be processed. | |
Returns | |
------- | |
pandas.DataFrame | |
A DataFrame containing the extracted text, confidence scores, and boundary boxes | |
for each detected text entry. The DataFrame has the following columns: | |
- "Text": The detected text. | |
- "Score": The confidence score for the detected text. | |
- "Boundary Box": The coordinates of the boundary box for the detected text. | |
""" | |
# run the OCR | |
result = ocr.ocr(image) | |
# creates the Pandas dataframe | |
txt = [] | |
scores = [] | |
boxes = [] | |
for r in result[0]: | |
txt.append(cleanString_basic(r[-1][0])) | |
scores.append(r[-1][1]) | |
boxes.append(r[0]) | |
return pd.DataFrame(np.transpose([txt, scores, boxes]),columns = ["Text","Score", "Boundary Box"]) | |
def cleanString_basic(word): | |
word = word.replace("$", "s") | |
return word | |
def clean_string_start(string: 'str'): | |
names_flags = "√" | |
chars_to_remove = ['!', "'", '[', ']', '*', '|', '.', ':', '\\', '/'] | |
if string.startswith(tuple(chars_to_remove)): | |
names_flags = string[0] | |
string = string[1:] | |
return string, names_flags | |
def clean_string_end(string: 'str'): | |
names_flags = "√" | |
chars_to_remove = ['!', "'", '[', ']', '*', '|', '.', ':', '\\', '/'] | |
if string.endswith(tuple(chars_to_remove)): | |
names_flags = string[-1] | |
string = string[:-1] | |
return string, names_flags | |
def clean_dates(date: 'str'): | |
''' | |
Function: cleans the fields "datum smrti" and returns the char removed. | |
Input: date (string format) | |
Output: cleaned frame | |
''' | |
date_flags = "Y" | |
# finds special characters in the string | |
special_char = re.findall(r'[a-zA-Z!\[\|]', date) | |
if len(special_char) > 0: | |
date_flags = special_char | |
# remove special characters in the string | |
string = re.sub(r'[a-zA-Z!\[\|]', '', date) | |
return string, date_flags | |
##### Main Function ##### | |
def pdf_extract_gr(image): | |
extractimg = preprocess_image(image) | |
#extractimg = np.array(image) | |
# deskew the image | |
deskewed_image, angle, skew_confidence = deskew(extractimg, model_deskew) | |
# prepare the image for the autoencoder | |
cleanimg = prepare_image_to_autoencoder(deskewed_image) | |
# clean the image | |
img = autoencode_ONNX(cleanimg, model_denoise) | |
# extract the entries from the image | |
df = extract_detected_entries_pdl(img) | |
# first name | |
firstnamerow = df.iloc[0] | |
firstname = firstnamerow[0] | |
firstnameconfidence = round(float(firstnamerow[1]) * 100,3) | |
firstnameconfidence = f"{firstnameconfidence}%" | |
# surname | |
surnamerow = df.iloc[1] | |
surname = surnamerow[0] | |
surnameconfidence = round(float(surnamerow[1]) * 100,3) | |
surnameconfidence = f"{surnameconfidence}%" | |
# death date condifence | |
dodrow = df.iloc[2] | |
dodname = dodrow[0] | |
dodconfidence = round(float(dodrow[1]) * 100,3) | |
dodconfidence = f"{dodconfidence}%" | |
# return all the results | |
return df, deskewed_image, angle, skew_confidence, img, firstname, firstnameconfidence, surname, surnameconfidence, dodname, dodconfidence | |
##### Gradio Style ##### | |
css = """ | |
.run_container { | |
display: flex; | |
flex-direction: column; | |
align-items: center; | |
gap: 10px; | |
} | |
.run_btn { | |
margin: auto; | |
width: 50%; | |
display: flex; | |
} | |
.upload_cell { | |
margin: auto; | |
display: flex; | |
} | |
.results_container { | |
display: flex; | |
justify-content: space-evenly; | |
} | |
.results_cell { | |
} | |
""" | |
##### Gradio Blocks ##### | |
with gr.Blocks(css = css) as demo: | |
gr.Markdown(""" | |
# Death Certificate Extraction | |
""", elem_classes = "h1") | |
gr.Markdown("Upload a PDF, extract data") | |
with gr.Box(elem_classes = "run_container"): | |
# ExtractInput = gr.File(label = "Death Certificate", elem_classes="upload_cell") | |
ExtractButton = gr.Button(label = "Extract", elem_classes="run_btn") | |
with gr.Row(elem_id = "hide"): | |
with gr.Column(): | |
ExtractInput = gr.Image() | |
with gr.Column(): | |
# ExtractResult = gr.Image(label = "result") | |
with gr.Row(elem_classes = "results_container"): | |
FirstNameBox = gr.Textbox(label = "First Name", elem_classes = "results_cell") | |
FirstNameConfidenceBox = gr.Textbox(label = "First Name Confidence", elem_classes = "results_cell") | |
with gr.Row(elem_classes = "results_container"): | |
SurnameNameBox = gr.Textbox(label = "Surname", elem_classes = "results_cell") | |
SurnameNameConfidenceBox = gr.Textbox(label = "Surname Confidence", elem_classes = "results_cell") | |
with gr.Row(elem_classes = "results_container"): | |
DODBox = gr.Textbox(label = "Date of Death", elem_classes = "results_cell") | |
DODConfidenceBox = gr.Textbox(label = "Date of Death Confidence", elem_classes = "results_cell") | |
with gr.Accordion("Full Results", open = False): | |
ExtractDF = gr.Dataframe(label = "Results") | |
with gr.Accordion("Clean Image", open = False): | |
CleanOutput = gr.Image() | |
with gr.Accordion("Deskew", open = False): | |
DeskewOutput = gr.Image() | |
with gr.Column(): | |
DeskewAngle = gr.Number(label = "Angle") | |
with gr.Column(): | |
DeskewConfidence = gr.Number(label = "Confidence") | |
ExtractButton.click(fn=pdf_extract_gr, | |
inputs = ExtractInput, | |
outputs = [ExtractDF, DeskewOutput, DeskewAngle, | |
DeskewConfidence, CleanOutput, FirstNameBox, | |
FirstNameConfidenceBox, SurnameNameBox, | |
SurnameNameConfidenceBox, DODBox, DODConfidenceBox]) | |
demo.launch(show_api=True, share=False, debug=True) |