srikar-v05 commited on
Commit
f563d24
·
verified ·
1 Parent(s): ef597c1

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +2 -8
  2. app.py +95 -0
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Simple Image Search Using GOT OCR 2.0
3
- emoji: 🐠
4
- colorFrom: pink
5
- colorTo: green
6
  sdk: gradio
7
  sdk_version: 4.44.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Simple_image_search_using_GOT_OCR_2.0
3
+ app_file: app.py
 
 
4
  sdk: gradio
5
  sdk_version: 4.44.0
 
 
6
  ---
 
 
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import gradio as gr
3
+ import os
4
+ from transformers import AutoModel, AutoTokenizer
5
+ import torch
6
+ from PIL import Image
7
+ import warnings
8
+ import re
9
+
10
+ # Suppress warnings
11
+ warnings.simplefilter("ignore")
12
+
13
+ # Retrieve Hugging Face token
14
+ hf_token = os.getenv("HF_TOKEN")
15
+
16
+ # Load tokenizer and model
17
+ tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, use_auth_token=hf_token)
18
+ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True,
19
+ low_cpu_mem_usage=True,
20
+ device_map='cuda' if torch.cuda.is_available() else 'cpu',
21
+ use_safetensors=True,
22
+ pad_token_id=tokenizer.eos_token_id,
23
+ use_auth_token=hf_token)
24
+ model = model.eval()
25
+
26
+ # Global variable to store OCR result
27
+ ocr_result = ""
28
+
29
+ # Perform OCR function
30
+ def perform_ocr(image):
31
+ global ocr_result
32
+
33
+ # Convert the numpy array to a PIL image
34
+ pil_image = Image.fromarray(image)
35
+
36
+ # Save the image temporarily
37
+ image_file = "temp_image.png"
38
+ pil_image.save(image_file)
39
+
40
+ # Perform OCR with the model
41
+ with torch.no_grad():
42
+ ocr_result = model.chat(tokenizer, image_file, ocr_type='ocr')
43
+
44
+ # Optionally remove the temporary image file
45
+ os.remove(image_file)
46
+
47
+ return ocr_result
48
+
49
+ # Function to highlight search term with a different color (e.g., light blue)
50
+ def highlight_text(text, query):
51
+ # Use regex to wrap the search query with a span for styling
52
+ pattern = re.compile(re.escape(query), re.IGNORECASE)
53
+ highlighted_text = pattern.sub(f"<span style='background-color: #ADD8E6; color: black;'>{query}</span>", text)
54
+ return highlighted_text
55
+
56
+ # Search functionality to search within OCR result, highlight, and return the modified text
57
+ def search_text(query):
58
+ # If no query is provided, return the original OCR result
59
+ if not query:
60
+ return ocr_result, "No matches found."
61
+
62
+ # Highlight the searched term in the OCR text
63
+ highlighted_result = highlight_text(ocr_result, query)
64
+
65
+ # Split OCR result into lines and search for the query
66
+ lines = ocr_result.split('\n')
67
+ matching_lines = [line for line in lines if query.lower() in line.lower()]
68
+
69
+ if matching_lines:
70
+ return highlighted_result, '\n'.join(matching_lines) # Return highlighted text and matched lines
71
+ else:
72
+ return highlighted_result, "No matches found."
73
+
74
+ # Set up Gradio interface
75
+ with gr.Blocks() as demo:
76
+ # Section for uploading image and getting OCR results
77
+ with gr.Row():
78
+ with gr.Column():
79
+ image_input = gr.Image(type="numpy", label="Upload Image")
80
+ ocr_output = gr.HTML(label="OCR Output") # Changed to HTML for displaying highlighted text
81
+ ocr_button = gr.Button("Run OCR")
82
+
83
+ # Section for searching within the OCR result
84
+ with gr.Row():
85
+ with gr.Column():
86
+ search_input = gr.Textbox(label="Search Text")
87
+ search_output = gr.HTML(label="Search Result") # Separate output for search matches
88
+ search_button = gr.Button("Search in OCR Text")
89
+
90
+ # Define button actions
91
+ ocr_button.click(perform_ocr, inputs=image_input, outputs=ocr_output)
92
+ search_button.click(search_text, inputs=search_input, outputs=[ocr_output, search_output])
93
+
94
+ # Launch the Gradio interface
95
+ demo.launch(share=True)