Spaces:
Build error
Build error
MrFeelgoood
commited on
Commit
•
5568e6f
1
Parent(s):
a30c743
Fixed bug in ocr function
Browse filesFixed bugs on ocr function
app.py
CHANGED
@@ -231,14 +231,14 @@ def pdf_ocr(file, model_t, question):
|
|
231 |
# Perform OCR on the PDF if the extracted text is empty
|
232 |
if not text:
|
233 |
# Convert PDF pages to images
|
234 |
-
images =
|
235 |
for i, img in enumerate(images):
|
236 |
text += pytesseract.image_to_string(img, lang='ita')
|
237 |
|
238 |
# Clear the image list to free up memory
|
239 |
del images
|
240 |
|
241 |
-
ks = ('mq', 'metri quadri', 'm2')
|
242 |
quest = "Quanti metri quadri misura la superficie?"
|
243 |
totalK = ['totale', 'complessivo', 'complessiva']
|
244 |
|
@@ -296,6 +296,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
296 |
extract_button.click(fn = ocr_interface,
|
297 |
inputs=[pdf_input, model_input, question_input], outputs=[values_output, total_output, text_output])
|
298 |
|
299 |
-
gr.Examples(['Example1.pdf', 'Example2.pdf'], inputs = pdf_input)
|
300 |
|
301 |
demo.launch()
|
|
|
231 |
# Perform OCR on the PDF if the extracted text is empty
|
232 |
if not text:
|
233 |
# Convert PDF pages to images
|
234 |
+
images = convert_from_bytes(content)
|
235 |
for i, img in enumerate(images):
|
236 |
text += pytesseract.image_to_string(img, lang='ita')
|
237 |
|
238 |
# Clear the image list to free up memory
|
239 |
del images
|
240 |
|
241 |
+
ks = ('mq', 'MQ', 'Mq' 'metri quadri', 'm2')
|
242 |
quest = "Quanti metri quadri misura la superficie?"
|
243 |
totalK = ['totale', 'complessivo', 'complessiva']
|
244 |
|
|
|
296 |
extract_button.click(fn = ocr_interface,
|
297 |
inputs=[pdf_input, model_input, question_input], outputs=[values_output, total_output, text_output])
|
298 |
|
299 |
+
gr.Examples(['Example1(scanned).pdf', 'Example2.pdf', 'Example3Large.pdf'], inputs = pdf_input)
|
300 |
|
301 |
demo.launch()
|