MrFeelgoood commited on
Commit
5568e6f
1 Parent(s): a30c743

Fixed bug in ocr function

Browse files

Fixed bugs on ocr function

Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -231,14 +231,14 @@ def pdf_ocr(file, model_t, question):
231
  # Perform OCR on the PDF if the extracted text is empty
232
  if not text:
233
  # Convert PDF pages to images
234
- images = convert_from_path(content)
235
  for i, img in enumerate(images):
236
  text += pytesseract.image_to_string(img, lang='ita')
237
 
238
  # Clear the image list to free up memory
239
  del images
240
 
241
- ks = ('mq', 'metri quadri', 'm2')
242
  quest = "Quanti metri quadri misura la superficie?"
243
  totalK = ['totale', 'complessivo', 'complessiva']
244
 
@@ -296,6 +296,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
296
  extract_button.click(fn = ocr_interface,
297
  inputs=[pdf_input, model_input, question_input], outputs=[values_output, total_output, text_output])
298
 
299
- gr.Examples(['Example1.pdf', 'Example2.pdf'], inputs = pdf_input)
300
 
301
  demo.launch()
 
231
  # Perform OCR on the PDF if the extracted text is empty
232
  if not text:
233
  # Convert PDF pages to images
234
+ images = convert_from_bytes(content)
235
  for i, img in enumerate(images):
236
  text += pytesseract.image_to_string(img, lang='ita')
237
 
238
  # Clear the image list to free up memory
239
  del images
240
 
241
+ ks = ('mq', 'MQ', 'Mq' 'metri quadri', 'm2')
242
  quest = "Quanti metri quadri misura la superficie?"
243
  totalK = ['totale', 'complessivo', 'complessiva']
244
 
 
296
  extract_button.click(fn = ocr_interface,
297
  inputs=[pdf_input, model_input, question_input], outputs=[values_output, total_output, text_output])
298
 
299
+ gr.Examples(['Example1(scanned).pdf', 'Example2.pdf', 'Example3Large.pdf'], inputs = pdf_input)
300
 
301
  demo.launch()