Alealejandrooo commited on
Commit
d1b3545
·
1 Parent(s): f7610e7

Changes to Main Script

Browse files
Files changed (1) hide show
  1. app.py +70 -157
app.py CHANGED
@@ -1,4 +1,3 @@
1
- # from alessandro
2
  import re
3
  import cv2
4
  import numpy as np
@@ -7,43 +6,34 @@ from PIL import Image
7
  import matplotlib.pyplot as plt
8
  import pandas as pd
9
  import matplotlib.pyplot as plt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- ocr = PaddleOCR(lang='sl')
12
-
13
- # def convert_to_image(document):
14
- # '''
15
- # Function: converts the pdf to image
16
- # Input: pdf document
17
- # Output: image
18
- # '''
19
-
20
- # # reads PDFs
21
- # # reads only first page of PDF documents
22
-
23
- # # os.path.join(document.name, 'sample.pdf')
24
- # pdf_document = load_from_file(document)
25
- # page_1 = pdf_document.create_page(0)
26
- # images = renderer.render_page(page_1)
27
- # image_data = image.data
28
- # # convert the image to numpy array
29
- # image = np.array(images)
30
- # # handles non-PDF formats (e.g., .tif)
31
- # # else:
32
- # # images = Image.open(document)
33
- # # # convert the image to RGB
34
- # # image = images.convert('RGB')
35
- # # # convert the image to numpy array
36
- # # image = np.array(image)
37
- # # # TODO: change to dynamic scaling
38
- # # # downscale the image
39
- # # scale = 1.494
40
- # # width = int(image.shape[1] / scale)
41
- # # height = int(image.shape[0] / scale)
42
- # # dim = (width, height)
43
- # # image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
44
- # # fig, ax = plt.subplots(figsize=(15, 10))
45
- # # ax.imshow(image, cmap = 'gray')
46
- # return image
47
 
48
 
49
  def deskew(image, model):
@@ -52,7 +42,6 @@ def deskew(image, model):
52
  Input: takes an image as an array
53
  Output: deskewed image
54
  '''
55
-
56
  # map the model classes to the actual degree of skew
57
  map = { 0: '-1', 1: '-10', 2: '-11', 3: '-12', 4: '-13',
58
  5: '-14',6: '-15', 7: '-2', 8: '-3', 9: '-4',
@@ -102,7 +91,6 @@ def prepare_image_to_autoencoder(image):
102
  Input: image (_type_): deskewed image
103
  Output: resized image to be passed to the autoencoder
104
  '''
105
-
106
  height, width = image.shape[:2]
107
  target_height = 600
108
  target_width = 600
@@ -123,68 +111,39 @@ def autoencode_ONNX(image, model):
123
  Input: image and autoencoder model
124
  Output: image
125
  '''
126
-
127
  image = image.astype(np.float32).reshape(1, 600, 600, 1)
128
  image = model.run(None, {'input_2': image})
129
  image = image[0]
130
  image = image.squeeze()
131
  image = image * 255
132
  image = image.astype('uint8')
133
- # fig, ax = plt.subplots(figsize=(8, 5))
134
- # ax.imshow(image, cmap = 'gray')
135
  return image
136
 
137
-
138
- def detect_entries_ONNX(denoised, model):
139
- '''
140
- Function: detect boxes Priimek, Ime and Datum boxes
141
- Priimek: lastname
142
- Ime: firstname
143
- Datum smrti: date of death
144
- Input: image
145
- Output: boxes and confidence scores
146
- '''
147
-
148
- # the object detection model requires a tensor(1, h, w, 3)
149
- autoencoded_RGB = cv2.cvtColor(denoised, cv2.COLOR_GRAY2RGB)
150
- # adds the 1 to the tensor
151
- autoencoded_expanded = np.expand_dims(autoencoded_RGB, axis=0)
152
- detections = model.run(None, {'input_tensor': autoencoded_expanded})
153
- boxes = detections[1]
154
- confidence = detections[4] # returns a ndarray in a list of list
155
- boxes = np.array(boxes[0])
156
- confidence = np.array(confidence).reshape(5, 1)
157
- boxes_and_confidence = np.append(boxes, confidence, axis=1)
158
- # reshapes the boxes to be sorted
159
- boxes_and_confidence = boxes_and_confidence.reshape(5, 5)
160
- # sorts
161
- boxes_and_confidence = \
162
- boxes_and_confidence[boxes_and_confidence[:, 0].argsort()]
163
- # boxes (expressed in image %)
164
- boxes = boxes_and_confidence[:, :-1]
165
- # boxes (expressed in actual pixels: ymin, xmin, ymax, xmax)
166
- boxes = boxes * 600
167
- # confidence boxes
168
- confidence_boxes = boxes_and_confidence[:, -1].tolist()
169
-
170
- for box in boxes:
171
- ymin, xmin, ymax, xmax = box.astype(int)
172
- cv2.rectangle(autoencoded_RGB, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)
173
- plt.figure()
174
- plt.imshow(cv2.cvtColor(autoencoded_RGB, cv2.COLOR_BGR2RGB))
175
- plt.title("Detected Boxes")
176
- plt.savefig("test.jpg")
177
- img = cv2.imread("test.jpg")
178
- return Image.fromarray(img), confidence_boxes
179
-
180
  def extract_detected_entries_pdl(image):
181
-
182
- result = ocr.ocr(image, cls=False)
183
-
184
- # boxes = [line[0] for line in result]
185
- # txts = [line[1][0] for line in result]
186
- # scores = [line[1][1] for line in result]
187
- # im_show = draw_ocr(image, boxes, txts, scores, font_path ='/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  txt = []
189
  scores = []
190
  boxes = []
@@ -193,7 +152,7 @@ def extract_detected_entries_pdl(image):
193
  scores.append(r[-1][1])
194
  boxes.append(r[0])
195
 
196
- return pd.DataFrame(np.transpose([txt,scores, boxes]),columns = ["Text","Score", "Boundary Box"])
197
 
198
  def cleanString_basic(word):
199
  word = word.replace("$", "s")
@@ -233,82 +192,41 @@ def clean_dates(date: 'str'):
233
  string = re.sub(r'[a-zA-Z!\[\|]', '', date)
234
  return string, date_flags
235
 
236
- def regex_string(string):
237
- '''
238
- Function: swaps the carachters with the "hat" with the regular ones
239
- Input: string
240
- Output: cleaned string
241
- '''
242
- map = {'Č': 'C',
243
- 'č': 'c',
244
- 'Š': 'S',
245
- 'š': 's',
246
- 'Ž': 'Z',
247
- 'ž':'z'}
248
- for x in string:
249
- if x in map:
250
- string = string.replace(x, map[x])
251
- return string
252
 
253
- import onnxruntime
254
 
255
- def pdf_deskew_gr (document):
256
- img = convert_to_image(document)
257
- model = onnxruntime.InferenceSession("./models/CNN_deskew_v0.0.2.onnx")
258
- deskewed_image, angle, skew_confidence = deskew(img, model)
259
- return deskewed_image, angle, skew_confidence
260
-
261
- def pdf_clean_gr(document):
262
- img = convert_to_image(document)
263
- model = onnxruntime.InferenceSession("./models/CNN_deskew_v0.0.2.onnx")
264
- deskewed_image, angle, skew_confidence = deskew(img, model)
265
- img = prepare_image_to_autoencoder(img)
266
- model = onnxruntime.InferenceSession("./models/autoencoder_denoise_v0.0.2.onnx")
267
- img = autoencode_ONNX(img, model)
268
- return img
269
-
270
- def pdf_resnet_gr(document):
271
- img = convert_to_image(document)
272
- model = onnxruntime.InferenceSession("/content/drive/MyDrive/cpo/Alessandro/ai_models/Latest/CNN_deskew_v0.0.2.onnx")
273
- deskewed_image, angle, skew_confidence = deskew(img, model)
274
- img = prepare_image_to_autoencoder(img)
275
- model = onnxruntime.InferenceSession("/content/drive/MyDrive/cpo/Alessandro/ai_models/Latest/autoencoder_denoise_v0.0.2.onnx")
276
- img = autoencode_ONNX(img, model)
277
- model = onnxruntime.InferenceSession("/content/drive/MyDrive/cpo/Alessandro/ai_models/Latest/ResNet_od_v0.0.2.onnx")
278
- boxes, confidence_boxes = detect_entries_ONNX(img, model)
279
- return boxes, confidence_boxes
280
-
281
- def pdf_extract_gr(extractimg):
282
- # extractimg = convert_to_image(document)
283
- extractimg = np.array(extractimg)
284
- model = onnxruntime.InferenceSession("./models/CNN_deskew_v0.0.2.onnx")
285
- deskewed_image, angle, skew_confidence = deskew(extractimg, model)
286
  cleanimg = prepare_image_to_autoencoder(deskewed_image)
287
- model = onnxruntime.InferenceSession("./models/autoencoder_denoise_v0.0.2.onnx")
288
- img = autoencode_ONNX(cleanimg, model)
289
- # model = onnxruntime.InferenceSession("./models/ResNet_od_v0.0.2.onnx")
290
- # boxes, confidence_boxes = detect_entries_ONNX(img, model)
291
- # confidence_entries, lastname, firstname, death_date = extract_detected_entries_pdl(img, boxes)
292
-
293
  df = extract_detected_entries_pdl(img)
294
-
295
  firstnamerow = df.iloc[0]
296
  firstname = firstnamerow[0]
297
  firstnameconfidence = round(float(firstnamerow[1]) * 100,3)
298
  firstnameconfidence = f"{firstnameconfidence}%"
299
-
300
  surnamerow = df.iloc[1]
301
  surname = surnamerow[0]
302
  surnameconfidence = round(float(surnamerow[1]) * 100,3)
303
  surnameconfidence = f"{surnameconfidence}%"
304
-
305
  dodrow = df.iloc[2]
306
  dodname = dodrow[0]
307
  dodconfidence = round(float(dodrow[1]) * 100,3)
308
  dodconfidence = f"{dodconfidence}%"
309
-
310
  return df, deskewed_image, angle, skew_confidence, img, firstname, firstnameconfidence, surname, surnameconfidence, dodname, dodconfidence
311
 
 
 
 
312
  css = """
313
  .run_container {
314
  display: flex;
@@ -316,7 +234,6 @@ css = """
316
  align-items: center;
317
  gap: 10px;
318
  }
319
-
320
  .run_btn {
321
  margin: auto;
322
  width: 50%;
@@ -326,19 +243,15 @@ css = """
326
  margin: auto;
327
  display: flex;
328
  }
329
-
330
  .results_container {
331
  display: flex;
332
  justify-content: space-evenly;
333
  }
334
-
335
  .results_cell {
336
-
337
  }
338
-
339
  """
340
 
341
- import gradio as gr
342
 
343
  with gr.Blocks(css = css) as demo:
344
  gr.Markdown("""
 
 
1
  import re
2
  import cv2
3
  import numpy as np
 
6
  import matplotlib.pyplot as plt
7
  import pandas as pd
8
  import matplotlib.pyplot as plt
9
+ import onnxruntime
10
+ import gradio as gr
11
+
12
+ # initialize the OCR
13
+ ocr = PaddleOCR(lang='sl',
14
+ enable_mkldnn=True,
15
+ cls=False,
16
+ show_log= False)
17
+
18
+ # initialize the models
19
+ model_deskew = onnxruntime.InferenceSession("/content/CNN_deskew_v0.0.2.onnx")
20
+ model_denoise = onnxruntime.InferenceSession("/content/autoencoder_denoise_v0.0.2.onnx")
21
+
22
+ ##### All Functions #####
23
 
24
+ def preprocess_image(image):
25
+ '''
26
+ Function: preprocess image to make it lighter to work on
27
+ Input: resized image
28
+ Output: image
29
+ '''
30
+ image = np.array(image)
31
+ scale = 1.494
32
+ width = int(image.shape[1] / scale)
33
+ height = int(image.shape[0] / scale)
34
+ dim = (width, height)
35
+ image = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
36
+ return image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
 
39
  def deskew(image, model):
 
42
  Input: takes an image as an array
43
  Output: deskewed image
44
  '''
 
45
  # map the model classes to the actual degree of skew
46
  map = { 0: '-1', 1: '-10', 2: '-11', 3: '-12', 4: '-13',
47
  5: '-14',6: '-15', 7: '-2', 8: '-3', 9: '-4',
 
91
  Input: image (_type_): deskewed image
92
  Output: resized image to be passed to the autoencoder
93
  '''
 
94
  height, width = image.shape[:2]
95
  target_height = 600
96
  target_width = 600
 
111
  Input: image and autoencoder model
112
  Output: image
113
  '''
 
114
  image = image.astype(np.float32).reshape(1, 600, 600, 1)
115
  image = model.run(None, {'input_2': image})
116
  image = image[0]
117
  image = image.squeeze()
118
  image = image * 255
119
  image = image.astype('uint8')
 
 
120
  return image
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  def extract_detected_entries_pdl(image):
123
+ """
124
+ Extracts text, scores, and boundary boxes from an image using OCR and returns a DataFrame.
125
+
126
+ This function takes an input image, applies OCR to detect text in the image, and then extracts
127
+ the detected text, confidence scores, and boundary boxes for each text entry. The extracted
128
+ information is returned in a DataFrame with columns "Text", "Score", and "Boundary Box".
129
+
130
+ Parameters
131
+ ----------
132
+ image : numpy.ndarray
133
+ The input image to be processed.
134
+
135
+ Returns
136
+ -------
137
+ pandas.DataFrame
138
+ A DataFrame containing the extracted text, confidence scores, and boundary boxes
139
+ for each detected text entry. The DataFrame has the following columns:
140
+ - "Text": The detected text.
141
+ - "Score": The confidence score for the detected text.
142
+ - "Boundary Box": The coordinates of the boundary box for the detected text.
143
+ """
144
+ # run the OCR
145
+ result = ocr.ocr(image)
146
+ # creates the Pandas dataframe
147
  txt = []
148
  scores = []
149
  boxes = []
 
152
  scores.append(r[-1][1])
153
  boxes.append(r[0])
154
 
155
+ return pd.DataFrame(np.transpose([txt, scores, boxes]),columns = ["Text","Score", "Boundary Box"])
156
 
157
  def cleanString_basic(word):
158
  word = word.replace("$", "s")
 
192
  string = re.sub(r'[a-zA-Z!\[\|]', '', date)
193
  return string, date_flags
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
+ ##### Main Function #####
197
 
198
+ def pdf_extract_gr(image):
199
+ extractimg = preprocess_image(image)
200
+ #extractimg = np.array(image)
201
+ # deskew the image
202
+ deskewed_image, angle, skew_confidence = deskew(extractimg, model_deskew)
203
+ # prepare the image for the autoencoder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  cleanimg = prepare_image_to_autoencoder(deskewed_image)
205
+ # clean the image
206
+ img = autoencode_ONNX(cleanimg, model_denoise)
207
+ # extract the entries from the image
 
 
 
208
  df = extract_detected_entries_pdl(img)
209
+ # first name
210
  firstnamerow = df.iloc[0]
211
  firstname = firstnamerow[0]
212
  firstnameconfidence = round(float(firstnamerow[1]) * 100,3)
213
  firstnameconfidence = f"{firstnameconfidence}%"
214
+ # surname
215
  surnamerow = df.iloc[1]
216
  surname = surnamerow[0]
217
  surnameconfidence = round(float(surnamerow[1]) * 100,3)
218
  surnameconfidence = f"{surnameconfidence}%"
219
+ # death date condifence
220
  dodrow = df.iloc[2]
221
  dodname = dodrow[0]
222
  dodconfidence = round(float(dodrow[1]) * 100,3)
223
  dodconfidence = f"{dodconfidence}%"
224
+ # return all the results
225
  return df, deskewed_image, angle, skew_confidence, img, firstname, firstnameconfidence, surname, surnameconfidence, dodname, dodconfidence
226
 
227
+
228
+ ##### Gradio Style #####
229
+
230
  css = """
231
  .run_container {
232
  display: flex;
 
234
  align-items: center;
235
  gap: 10px;
236
  }
 
237
  .run_btn {
238
  margin: auto;
239
  width: 50%;
 
243
  margin: auto;
244
  display: flex;
245
  }
 
246
  .results_container {
247
  display: flex;
248
  justify-content: space-evenly;
249
  }
 
250
  .results_cell {
 
251
  }
 
252
  """
253
 
254
+ ##### Gradio Blocks #####
255
 
256
  with gr.Blocks(css = css) as demo:
257
  gr.Markdown("""