Spaces:

manuth
/

khmer_English_image_to_text_

Sleeping

App Files Files Community

khmer_English_image_to_text_ / app.py

manuth

sharing public

3685a25 10 months ago

raw

history blame

2.87 kB

	import cv2
	import os
	import pytesseract
	import gradio as gr
	from gradio import Interface, Image, Text
	import numpy as np
	from PIL import Image
	from PIL import UnidentifiedImageError

	def process_image(input_image):
	try:
	# Convert the input image to a NumPy array if it's a PIL Image
	if isinstance(input_image, Image.Image):
	img = np.array(input_image)
	else:
	# If it's a file path or file-like object, read it directly with OpenCV
	img = cv2.imread(input_image)

	# Check that the image is in the expected format
	if img is None or img.dtype != np.uint8:
	raise Exception("Could not read the image. Please check the image format.")

	img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

	# img = cv2.imdecode(np.fromstring(input_image.read(), np.uint8), cv2.IMREAD_COLOR)
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU \| cv2.THRESH_BINARY_INV)
	rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))
	dilation = cv2.dilate(thresh1, rect_kernel, iterations=1)

	# Find text lines using connected component analysis
	text_lines = []
	contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	for cnt in contours:
	x, y, w, h = cv2.boundingRect(cnt)
	text_lines.append((y, y + h, x, x + w))

	# Sort text lines by their y-coordinates
	text_lines.sort(key=lambda line: line[0])

	# Extract text from each line using Tesseract
	recognized_text = []
	for y_min, y_max, x_min, x_max in text_lines:
	cropped_img = img[y_min:y_max, x_min:x_max]
	custom_config = r'-l eng+khm --oem 3 --psm 6'
	extracted_text = pytesseract.image_to_string(cropped_img, config=custom_config)
	recognized_text.append(extracted_text.strip())

	# Combine recognized text into a single string
	full_text = "\n".join(recognized_text)

	# Draw bounding boxes on the image
	result_rgb = img.copy()
	for y_min, y_max, x_min, x_max in text_lines:
	cv2.rectangle(result_rgb, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

	return full_text, result_rgb
	except Exception as e:
	return "Could not process the image. Error: " + str(e), None

	iface = gr.Interface(
	process_image,
	inputs=[gr.Image(type="pil", label="Processed Image")],
	outputs=[
	gr.Text(label="Detected Labels"),
	gr.Image(type="pil", label="Processed Image")
	],
	title="Bank Statement OCR",
	# description="Upload an image containing text to perform OCR and see the detected text and image."
	flagging_options=["blurry", "incorrect", "other"],)

	iface.launch(debug=True , share=True)