Spaces:
Sleeping
Sleeping
Alealejandrooo
commited on
Commit
·
d1b3545
1
Parent(s):
f7610e7
Changes to Main Script
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
# from alessandro
|
2 |
import re
|
3 |
import cv2
|
4 |
import numpy as np
|
@@ -7,43 +6,34 @@ from PIL import Image
|
|
7 |
import matplotlib.pyplot as plt
|
8 |
import pandas as pd
|
9 |
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
# pdf_document = load_from_file(document)
|
25 |
-
# page_1 = pdf_document.create_page(0)
|
26 |
-
# images = renderer.render_page(page_1)
|
27 |
-
# image_data = image.data
|
28 |
-
# # convert the image to numpy array
|
29 |
-
# image = np.array(images)
|
30 |
-
# # handles non-PDF formats (e.g., .tif)
|
31 |
-
# # else:
|
32 |
-
# # images = Image.open(document)
|
33 |
-
# # # convert the image to RGB
|
34 |
-
# # image = images.convert('RGB')
|
35 |
-
# # # convert the image to numpy array
|
36 |
-
# # image = np.array(image)
|
37 |
-
# # # TODO: change to dynamic scaling
|
38 |
-
# # # downscale the image
|
39 |
-
# # scale = 1.494
|
40 |
-
# # width = int(image.shape[1] / scale)
|
41 |
-
# # height = int(image.shape[0] / scale)
|
42 |
-
# # dim = (width, height)
|
43 |
-
# # image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
|
44 |
-
# # fig, ax = plt.subplots(figsize=(15, 10))
|
45 |
-
# # ax.imshow(image, cmap = 'gray')
|
46 |
-
# return image
|
47 |
|
48 |
|
49 |
def deskew(image, model):
|
@@ -52,7 +42,6 @@ def deskew(image, model):
|
|
52 |
Input: takes an image as an array
|
53 |
Output: deskewed image
|
54 |
'''
|
55 |
-
|
56 |
# map the model classes to the actual degree of skew
|
57 |
map = { 0: '-1', 1: '-10', 2: '-11', 3: '-12', 4: '-13',
|
58 |
5: '-14',6: '-15', 7: '-2', 8: '-3', 9: '-4',
|
@@ -102,7 +91,6 @@ def prepare_image_to_autoencoder(image):
|
|
102 |
Input: image (_type_): deskewed image
|
103 |
Output: resized image to be passed to the autoencoder
|
104 |
'''
|
105 |
-
|
106 |
height, width = image.shape[:2]
|
107 |
target_height = 600
|
108 |
target_width = 600
|
@@ -123,68 +111,39 @@ def autoencode_ONNX(image, model):
|
|
123 |
Input: image and autoencoder model
|
124 |
Output: image
|
125 |
'''
|
126 |
-
|
127 |
image = image.astype(np.float32).reshape(1, 600, 600, 1)
|
128 |
image = model.run(None, {'input_2': image})
|
129 |
image = image[0]
|
130 |
image = image.squeeze()
|
131 |
image = image * 255
|
132 |
image = image.astype('uint8')
|
133 |
-
# fig, ax = plt.subplots(figsize=(8, 5))
|
134 |
-
# ax.imshow(image, cmap = 'gray')
|
135 |
return image
|
136 |
|
137 |
-
|
138 |
-
def detect_entries_ONNX(denoised, model):
|
139 |
-
'''
|
140 |
-
Function: detect boxes Priimek, Ime and Datum boxes
|
141 |
-
Priimek: lastname
|
142 |
-
Ime: firstname
|
143 |
-
Datum smrti: date of death
|
144 |
-
Input: image
|
145 |
-
Output: boxes and confidence scores
|
146 |
-
'''
|
147 |
-
|
148 |
-
# the object detection model requires a tensor(1, h, w, 3)
|
149 |
-
autoencoded_RGB = cv2.cvtColor(denoised, cv2.COLOR_GRAY2RGB)
|
150 |
-
# adds the 1 to the tensor
|
151 |
-
autoencoded_expanded = np.expand_dims(autoencoded_RGB, axis=0)
|
152 |
-
detections = model.run(None, {'input_tensor': autoencoded_expanded})
|
153 |
-
boxes = detections[1]
|
154 |
-
confidence = detections[4] # returns a ndarray in a list of list
|
155 |
-
boxes = np.array(boxes[0])
|
156 |
-
confidence = np.array(confidence).reshape(5, 1)
|
157 |
-
boxes_and_confidence = np.append(boxes, confidence, axis=1)
|
158 |
-
# reshapes the boxes to be sorted
|
159 |
-
boxes_and_confidence = boxes_and_confidence.reshape(5, 5)
|
160 |
-
# sorts
|
161 |
-
boxes_and_confidence = \
|
162 |
-
boxes_and_confidence[boxes_and_confidence[:, 0].argsort()]
|
163 |
-
# boxes (expressed in image %)
|
164 |
-
boxes = boxes_and_confidence[:, :-1]
|
165 |
-
# boxes (expressed in actual pixels: ymin, xmin, ymax, xmax)
|
166 |
-
boxes = boxes * 600
|
167 |
-
# confidence boxes
|
168 |
-
confidence_boxes = boxes_and_confidence[:, -1].tolist()
|
169 |
-
|
170 |
-
for box in boxes:
|
171 |
-
ymin, xmin, ymax, xmax = box.astype(int)
|
172 |
-
cv2.rectangle(autoencoded_RGB, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
|
173 |
-
plt.figure()
|
174 |
-
plt.imshow(cv2.cvtColor(autoencoded_RGB, cv2.COLOR_BGR2RGB))
|
175 |
-
plt.title("Detected Boxes")
|
176 |
-
plt.savefig("test.jpg")
|
177 |
-
img = cv2.imread("test.jpg")
|
178 |
-
return Image.fromarray(img), confidence_boxes
|
179 |
-
|
180 |
def extract_detected_entries_pdl(image):
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
txt = []
|
189 |
scores = []
|
190 |
boxes = []
|
@@ -193,7 +152,7 @@ def extract_detected_entries_pdl(image):
|
|
193 |
scores.append(r[-1][1])
|
194 |
boxes.append(r[0])
|
195 |
|
196 |
-
return pd.DataFrame(np.transpose([txt,scores, boxes]),columns = ["Text","Score", "Boundary Box"])
|
197 |
|
198 |
def cleanString_basic(word):
|
199 |
word = word.replace("$", "s")
|
@@ -233,82 +192,41 @@ def clean_dates(date: 'str'):
|
|
233 |
string = re.sub(r'[a-zA-Z!\[\|]', '', date)
|
234 |
return string, date_flags
|
235 |
|
236 |
-
def regex_string(string):
|
237 |
-
'''
|
238 |
-
Function: swaps the carachters with the "hat" with the regular ones
|
239 |
-
Input: string
|
240 |
-
Output: cleaned string
|
241 |
-
'''
|
242 |
-
map = {'Č': 'C',
|
243 |
-
'č': 'c',
|
244 |
-
'Š': 'S',
|
245 |
-
'š': 's',
|
246 |
-
'Ž': 'Z',
|
247 |
-
'ž':'z'}
|
248 |
-
for x in string:
|
249 |
-
if x in map:
|
250 |
-
string = string.replace(x, map[x])
|
251 |
-
return string
|
252 |
|
253 |
-
|
254 |
|
255 |
-
def
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
def pdf_clean_gr(document):
|
262 |
-
img = convert_to_image(document)
|
263 |
-
model = onnxruntime.InferenceSession("./models/CNN_deskew_v0.0.2.onnx")
|
264 |
-
deskewed_image, angle, skew_confidence = deskew(img, model)
|
265 |
-
img = prepare_image_to_autoencoder(img)
|
266 |
-
model = onnxruntime.InferenceSession("./models/autoencoder_denoise_v0.0.2.onnx")
|
267 |
-
img = autoencode_ONNX(img, model)
|
268 |
-
return img
|
269 |
-
|
270 |
-
def pdf_resnet_gr(document):
|
271 |
-
img = convert_to_image(document)
|
272 |
-
model = onnxruntime.InferenceSession("/content/drive/MyDrive/cpo/Alessandro/ai_models/Latest/CNN_deskew_v0.0.2.onnx")
|
273 |
-
deskewed_image, angle, skew_confidence = deskew(img, model)
|
274 |
-
img = prepare_image_to_autoencoder(img)
|
275 |
-
model = onnxruntime.InferenceSession("/content/drive/MyDrive/cpo/Alessandro/ai_models/Latest/autoencoder_denoise_v0.0.2.onnx")
|
276 |
-
img = autoencode_ONNX(img, model)
|
277 |
-
model = onnxruntime.InferenceSession("/content/drive/MyDrive/cpo/Alessandro/ai_models/Latest/ResNet_od_v0.0.2.onnx")
|
278 |
-
boxes, confidence_boxes = detect_entries_ONNX(img, model)
|
279 |
-
return boxes, confidence_boxes
|
280 |
-
|
281 |
-
def pdf_extract_gr(extractimg):
|
282 |
-
# extractimg = convert_to_image(document)
|
283 |
-
extractimg = np.array(extractimg)
|
284 |
-
model = onnxruntime.InferenceSession("./models/CNN_deskew_v0.0.2.onnx")
|
285 |
-
deskewed_image, angle, skew_confidence = deskew(extractimg, model)
|
286 |
cleanimg = prepare_image_to_autoencoder(deskewed_image)
|
287 |
-
|
288 |
-
img = autoencode_ONNX(cleanimg,
|
289 |
-
#
|
290 |
-
# boxes, confidence_boxes = detect_entries_ONNX(img, model)
|
291 |
-
# confidence_entries, lastname, firstname, death_date = extract_detected_entries_pdl(img, boxes)
|
292 |
-
|
293 |
df = extract_detected_entries_pdl(img)
|
294 |
-
|
295 |
firstnamerow = df.iloc[0]
|
296 |
firstname = firstnamerow[0]
|
297 |
firstnameconfidence = round(float(firstnamerow[1]) * 100,3)
|
298 |
firstnameconfidence = f"{firstnameconfidence}%"
|
299 |
-
|
300 |
surnamerow = df.iloc[1]
|
301 |
surname = surnamerow[0]
|
302 |
surnameconfidence = round(float(surnamerow[1]) * 100,3)
|
303 |
surnameconfidence = f"{surnameconfidence}%"
|
304 |
-
|
305 |
dodrow = df.iloc[2]
|
306 |
dodname = dodrow[0]
|
307 |
dodconfidence = round(float(dodrow[1]) * 100,3)
|
308 |
dodconfidence = f"{dodconfidence}%"
|
309 |
-
|
310 |
return df, deskewed_image, angle, skew_confidence, img, firstname, firstnameconfidence, surname, surnameconfidence, dodname, dodconfidence
|
311 |
|
|
|
|
|
|
|
312 |
css = """
|
313 |
.run_container {
|
314 |
display: flex;
|
@@ -316,7 +234,6 @@ css = """
|
|
316 |
align-items: center;
|
317 |
gap: 10px;
|
318 |
}
|
319 |
-
|
320 |
.run_btn {
|
321 |
margin: auto;
|
322 |
width: 50%;
|
@@ -326,19 +243,15 @@ css = """
|
|
326 |
margin: auto;
|
327 |
display: flex;
|
328 |
}
|
329 |
-
|
330 |
.results_container {
|
331 |
display: flex;
|
332 |
justify-content: space-evenly;
|
333 |
}
|
334 |
-
|
335 |
.results_cell {
|
336 |
-
|
337 |
}
|
338 |
-
|
339 |
"""
|
340 |
|
341 |
-
|
342 |
|
343 |
with gr.Blocks(css = css) as demo:
|
344 |
gr.Markdown("""
|
|
|
|
|
1 |
import re
|
2 |
import cv2
|
3 |
import numpy as np
|
|
|
6 |
import matplotlib.pyplot as plt
|
7 |
import pandas as pd
|
8 |
import matplotlib.pyplot as plt
|
9 |
+
import onnxruntime
|
10 |
+
import gradio as gr
|
11 |
+
|
12 |
+
# initialize the OCR
|
13 |
+
ocr = PaddleOCR(lang='sl',
|
14 |
+
enable_mkldnn=True,
|
15 |
+
cls=False,
|
16 |
+
show_log= False)
|
17 |
+
|
18 |
+
# initialize the models
|
19 |
+
model_deskew = onnxruntime.InferenceSession("/content/CNN_deskew_v0.0.2.onnx")
|
20 |
+
model_denoise = onnxruntime.InferenceSession("/content/autoencoder_denoise_v0.0.2.onnx")
|
21 |
+
|
22 |
+
##### All Functions #####
|
23 |
|
24 |
+
def preprocess_image(image):
|
25 |
+
'''
|
26 |
+
Function: preprocess image to make it lighter to work on
|
27 |
+
Input: resized image
|
28 |
+
Output: image
|
29 |
+
'''
|
30 |
+
image = np.array(image)
|
31 |
+
scale = 1.494
|
32 |
+
width = int(image.shape[1] / scale)
|
33 |
+
height = int(image.shape[0] / scale)
|
34 |
+
dim = (width, height)
|
35 |
+
image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
|
36 |
+
return image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
|
39 |
def deskew(image, model):
|
|
|
42 |
Input: takes an image as an array
|
43 |
Output: deskewed image
|
44 |
'''
|
|
|
45 |
# map the model classes to the actual degree of skew
|
46 |
map = { 0: '-1', 1: '-10', 2: '-11', 3: '-12', 4: '-13',
|
47 |
5: '-14',6: '-15', 7: '-2', 8: '-3', 9: '-4',
|
|
|
91 |
Input: image (_type_): deskewed image
|
92 |
Output: resized image to be passed to the autoencoder
|
93 |
'''
|
|
|
94 |
height, width = image.shape[:2]
|
95 |
target_height = 600
|
96 |
target_width = 600
|
|
|
111 |
Input: image and autoencoder model
|
112 |
Output: image
|
113 |
'''
|
|
|
114 |
image = image.astype(np.float32).reshape(1, 600, 600, 1)
|
115 |
image = model.run(None, {'input_2': image})
|
116 |
image = image[0]
|
117 |
image = image.squeeze()
|
118 |
image = image * 255
|
119 |
image = image.astype('uint8')
|
|
|
|
|
120 |
return image
|
121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
def extract_detected_entries_pdl(image):
|
123 |
+
"""
|
124 |
+
Extracts text, scores, and boundary boxes from an image using OCR and returns a DataFrame.
|
125 |
+
|
126 |
+
This function takes an input image, applies OCR to detect text in the image, and then extracts
|
127 |
+
the detected text, confidence scores, and boundary boxes for each text entry. The extracted
|
128 |
+
information is returned in a DataFrame with columns "Text", "Score", and "Boundary Box".
|
129 |
+
|
130 |
+
Parameters
|
131 |
+
----------
|
132 |
+
image : numpy.ndarray
|
133 |
+
The input image to be processed.
|
134 |
+
|
135 |
+
Returns
|
136 |
+
-------
|
137 |
+
pandas.DataFrame
|
138 |
+
A DataFrame containing the extracted text, confidence scores, and boundary boxes
|
139 |
+
for each detected text entry. The DataFrame has the following columns:
|
140 |
+
- "Text": The detected text.
|
141 |
+
- "Score": The confidence score for the detected text.
|
142 |
+
- "Boundary Box": The coordinates of the boundary box for the detected text.
|
143 |
+
"""
|
144 |
+
# run the OCR
|
145 |
+
result = ocr.ocr(image)
|
146 |
+
# creates the Pandas dataframe
|
147 |
txt = []
|
148 |
scores = []
|
149 |
boxes = []
|
|
|
152 |
scores.append(r[-1][1])
|
153 |
boxes.append(r[0])
|
154 |
|
155 |
+
return pd.DataFrame(np.transpose([txt, scores, boxes]),columns = ["Text","Score", "Boundary Box"])
|
156 |
|
157 |
def cleanString_basic(word):
|
158 |
word = word.replace("$", "s")
|
|
|
192 |
string = re.sub(r'[a-zA-Z!\[\|]', '', date)
|
193 |
return string, date_flags
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
|
196 |
+
##### Main Function #####
|
197 |
|
198 |
+
def pdf_extract_gr(image):
|
199 |
+
extractimg = preprocess_image(image)
|
200 |
+
#extractimg = np.array(image)
|
201 |
+
# deskew the image
|
202 |
+
deskewed_image, angle, skew_confidence = deskew(extractimg, model_deskew)
|
203 |
+
# prepare the image for the autoencoder
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
204 |
cleanimg = prepare_image_to_autoencoder(deskewed_image)
|
205 |
+
# clean the image
|
206 |
+
img = autoencode_ONNX(cleanimg, model_denoise)
|
207 |
+
# extract the entries from the image
|
|
|
|
|
|
|
208 |
df = extract_detected_entries_pdl(img)
|
209 |
+
# first name
|
210 |
firstnamerow = df.iloc[0]
|
211 |
firstname = firstnamerow[0]
|
212 |
firstnameconfidence = round(float(firstnamerow[1]) * 100,3)
|
213 |
firstnameconfidence = f"{firstnameconfidence}%"
|
214 |
+
# surname
|
215 |
surnamerow = df.iloc[1]
|
216 |
surname = surnamerow[0]
|
217 |
surnameconfidence = round(float(surnamerow[1]) * 100,3)
|
218 |
surnameconfidence = f"{surnameconfidence}%"
|
219 |
+
# death date condifence
|
220 |
dodrow = df.iloc[2]
|
221 |
dodname = dodrow[0]
|
222 |
dodconfidence = round(float(dodrow[1]) * 100,3)
|
223 |
dodconfidence = f"{dodconfidence}%"
|
224 |
+
# return all the results
|
225 |
return df, deskewed_image, angle, skew_confidence, img, firstname, firstnameconfidence, surname, surnameconfidence, dodname, dodconfidence
|
226 |
|
227 |
+
|
228 |
+
##### Gradio Style #####
|
229 |
+
|
230 |
css = """
|
231 |
.run_container {
|
232 |
display: flex;
|
|
|
234 |
align-items: center;
|
235 |
gap: 10px;
|
236 |
}
|
|
|
237 |
.run_btn {
|
238 |
margin: auto;
|
239 |
width: 50%;
|
|
|
243 |
margin: auto;
|
244 |
display: flex;
|
245 |
}
|
|
|
246 |
.results_container {
|
247 |
display: flex;
|
248 |
justify-content: space-evenly;
|
249 |
}
|
|
|
250 |
.results_cell {
|
|
|
251 |
}
|
|
|
252 |
"""
|
253 |
|
254 |
+
##### Gradio Blocks #####
|
255 |
|
256 |
with gr.Blocks(css = css) as demo:
|
257 |
gr.Markdown("""
|