Spaces:

manuth
/

khmer_English_image_to_text_

Sleeping

File size: 2,869 Bytes

import cv2
import os
import pytesseract
import gradio as gr
from gradio import Interface, Image, Text
import numpy as np
from PIL import Image
from PIL import UnidentifiedImageError

def process_image(input_image):
    try:
        # Convert the input image to a NumPy array if it's a PIL Image
        if isinstance(input_image, Image.Image):
            img = np.array(input_image)
        else:
            # If it's a file path or file-like object, read it directly with OpenCV
            img = cv2.imread(input_image)

        # Check that the image is in the expected format
        if img is None or img.dtype != np.uint8:
            raise Exception("Could not read the image. Please check the image format.")

        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        # img = cv2.imdecode(np.fromstring(input_image.read(), np.uint8), cv2.IMREAD_COLOR)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)
        rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))
        dilation = cv2.dilate(thresh1, rect_kernel, iterations=1)

        # Find text lines using connected component analysis
        text_lines = []
        contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        for cnt in contours:
            x, y, w, h = cv2.boundingRect(cnt)
            text_lines.append((y, y + h, x, x + w))

        # Sort text lines by their y-coordinates
        text_lines.sort(key=lambda line: line[0])

        # Extract text from each line using Tesseract
        recognized_text = []
        for y_min, y_max, x_min, x_max in text_lines:
            cropped_img = img[y_min:y_max, x_min:x_max]
            custom_config = r'-l eng+khm --oem 3 --psm 6'
            extracted_text = pytesseract.image_to_string(cropped_img, config=custom_config)
            recognized_text.append(extracted_text.strip())

        # Combine recognized text into a single string
        full_text = "\n".join(recognized_text)

        # Draw bounding boxes on the image
        result_rgb = img.copy()
        for y_min, y_max, x_min, x_max in text_lines:
            cv2.rectangle(result_rgb, (x_min, y_min), (x_max, y_max), (0, 255, 0), 2)

        return full_text, result_rgb
    except Exception as e:
        return "Could not process the image. Error: " + str(e), None

iface = gr.Interface(
    process_image,
    inputs=[gr.Image(type="pil", label="Processed Image")],
    outputs=[
        gr.Text(label="Detected Labels"),
        gr.Image(type="pil", label="Processed Image")
    ],
    title="Bank Statement OCR",
    # description="Upload an image containing text to perform OCR and see the detected text and image."
    flagging_options=["blurry", "incorrect", "other"],)

iface.launch(debug=True , share=True)