Spaces:

yashbyname
/

OCR_using_GOT_and_Tesseract

Running

File size: 1,534 Bytes

4322a12
37b7885
 
41eaee6
 
37b7885
efe753c
41eaee6
efe753c
37b7885
 
 
 
 
41eaee6
 
 
 
 
 
 
 
 
 
 
 
37b7885
 
 
41eaee6
37b7885
41eaee6
37b7885
 
 
 
 
 
 
 
 
 
 
 
 
 
efe753c
37b7885

import gradio as gr
import torch
import pytesseract
import cv2
import tempfile
from transformers import AutoTokenizer, AutoModel

pytesseract.pytesseract.tesseract_cmd = r'/opt/homebrew/bin/tesseract'  # Update this if necessary

# Load the tokenizer and model
tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).eval()

def perform_ocr(image, language):
    # Save the NumPy array as an image file temporarily
    with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
        temp_filename = temp_file.name
        cv2.imwrite(temp_filename, image)

    # Perform OCR for English
    res_eng = model_eng.chat(tokenizer_eng, temp_filename, ocr_type='ocr')

    # Clean up temporary file if needed
    # os.remove(temp_filename)

    return res_eng  # Return results for English

def ocr_and_search(image, language):
    # Call the perform_ocr function
    english_text = perform_ocr(image, language)
    
    return english_text  # Return the OCR result for English

# Create Gradio interface
iface = gr.Interface(
    fn=ocr_and_search,
    inputs=[
        gr.Image(type="numpy", label="Upload Image"),
        gr.Dropdown(choices=["English", "Hindi"], label="Select Language")
    ],
    outputs=gr.Textbox(label="Extracted Text"),
    title="OCR Application",
    description="Upload an image to extract text using OCR."
)

# Run the app
if __name__ == "__main__":
    iface.launch()