Spaces:
Running
Running
UniquePratham
commited on
Commit
β’
aa47a7c
1
Parent(s):
1a5d3d0
Update app.py
Browse files
app.py
CHANGED
@@ -9,13 +9,19 @@ import torch
|
|
9 |
import tempfile
|
10 |
import os
|
11 |
import re
|
12 |
-
import
|
13 |
from groq import Groq
|
14 |
|
15 |
# Page configuration
|
16 |
st.set_page_config(page_title="DualTextOCRFusion", page_icon="π", layout="wide")
|
17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# Load Surya OCR Models (English + Hindi)
|
20 |
det_processor, det_model = load_det_processor(), load_det_model()
|
21 |
det_model.to(device)
|
@@ -44,25 +50,18 @@ def init_qwen_model():
|
|
44 |
|
45 |
# Text Cleaning AI - Clean spaces, handle dual languages
|
46 |
def clean_extracted_text(text):
|
47 |
-
# Remove extra spaces
|
48 |
cleaned_text = re.sub(r'\s+', ' ', text).strip()
|
49 |
cleaned_text = re.sub(r'\s([?.!,])', r'\1', cleaned_text)
|
50 |
return cleaned_text
|
51 |
|
52 |
# Polish the text using a model
|
53 |
def polish_text_with_ai(cleaned_text):
|
54 |
-
prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces
|
55 |
client = Groq(api_key="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")
|
56 |
chat_completion = client.chat.completions.create(
|
57 |
-
|
58 |
-
{
|
59 |
-
|
60 |
-
"content": "You are a pedantic sentence corrector. Remove extra spaces between and within words to make the sentence meaningful in English, Hindi, or Hinglish, according to the context of the sentence, without changing any words."
|
61 |
-
},
|
62 |
-
{
|
63 |
-
"role": "user",
|
64 |
-
"content": prompt,
|
65 |
-
}
|
66 |
],
|
67 |
model="gemma2-9b-it",
|
68 |
)
|
@@ -88,11 +87,9 @@ def extract_text_qwen(image_file, model, processor):
|
|
88 |
|
89 |
# Highlight keyword search
|
90 |
def highlight_text(text, search_term):
|
91 |
-
if not search_term:
|
92 |
return text
|
93 |
-
# Use a regular expression to search for the term, case insensitive
|
94 |
pattern = re.compile(re.escape(search_term), re.IGNORECASE)
|
95 |
-
# Highlight matched terms with yellow background
|
96 |
return pattern.sub(lambda m: f'<span style="background-color: yellow;">{m.group()}</span>', text)
|
97 |
|
98 |
# Title and UI
|
@@ -106,89 +103,73 @@ model_choice = st.sidebar.selectbox("Select OCR Model:", ("GOT_CPU", "GOT_GPU",
|
|
106 |
|
107 |
# Upload Section
|
108 |
uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
-
# Input from clipboard
|
111 |
-
if st.sidebar.button("Paste from Clipboard"):
|
112 |
-
try:
|
113 |
-
clipboard_data = st.experimental_get_clipboard()
|
114 |
-
if clipboard_data:
|
115 |
-
# Assuming clipboard data is base64 encoded image
|
116 |
-
image_data = base64.b64decode(clipboard_data)
|
117 |
-
uploaded_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
|
118 |
-
uploaded_file.write(image_data)
|
119 |
-
uploaded_file.seek(0)
|
120 |
-
except:
|
121 |
-
st.sidebar.warning("Clipboard data is not an image.")
|
122 |
-
|
123 |
-
# Input from camera
|
124 |
-
camera_file = st.sidebar.camera_input("Capture from Camera")
|
125 |
-
if camera_file:
|
126 |
-
uploaded_file = camera_file
|
127 |
-
|
128 |
-
# Predict button
|
129 |
-
predict_button = st.sidebar.button("Predict")
|
130 |
-
|
131 |
-
# Main columns
|
132 |
-
col1, col2 = st.columns([2, 1])
|
133 |
-
|
134 |
-
# Display image preview
|
135 |
-
if uploaded_file:
|
136 |
-
image = Image.open(uploaded_file)
|
137 |
-
with col1:
|
138 |
-
col1.image(image, caption='Uploaded Image', use_column_width=False, width=300)
|
139 |
-
|
140 |
-
# Handle predictions
|
141 |
-
if predict_button and uploaded_file:
|
142 |
-
with st.spinner("Processing..."):
|
143 |
-
# Save uploaded image
|
144 |
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
|
145 |
-
temp_file.write(uploaded_file.getvalue())
|
146 |
-
temp_file_path = temp_file.name
|
147 |
-
|
148 |
-
image = Image.open(temp_file_path)
|
149 |
-
image = image.convert("RGB")
|
150 |
-
|
151 |
-
if model_choice == "GOT_CPU":
|
152 |
-
got_model, tokenizer = init_got_model()
|
153 |
-
extracted_text = extract_text_got(temp_file_path, got_model, tokenizer)
|
154 |
-
|
155 |
-
elif model_choice == "GOT_GPU":
|
156 |
-
got_gpu_model, tokenizer = init_got_gpu_model()
|
157 |
-
extracted_text = extract_text_got(temp_file_path, got_gpu_model, tokenizer)
|
158 |
-
|
159 |
-
elif model_choice == "Qwen":
|
160 |
-
qwen_model, qwen_processor = init_qwen_model()
|
161 |
-
extracted_text = extract_text_qwen(temp_file_path, qwen_model, qwen_processor)
|
162 |
-
|
163 |
-
elif model_choice == "Surya (English+Hindi)":
|
164 |
-
langs = ["en", "hi"]
|
165 |
-
predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
|
166 |
-
text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
|
167 |
-
extracted_text = ' '.join(text_list)
|
168 |
-
|
169 |
-
# Clean extracted text
|
170 |
-
cleaned_text = clean_extracted_text(extracted_text)
|
171 |
-
|
172 |
-
# Optionally, polish text with AI model for better language flow
|
173 |
-
polished_text = polish_text_with_ai(cleaned_text) if model_choice in ["GOT_CPU", "GOT_GPU"] else cleaned_text
|
174 |
-
|
175 |
-
# Delete temp file
|
176 |
-
if os.path.exists(temp_file_path):
|
177 |
-
os.remove(temp_file_path)
|
178 |
-
|
179 |
-
# Display extracted text and search functionality
|
180 |
st.subheader("Extracted Text (Cleaned & Polished)")
|
181 |
st.markdown(polished_text, unsafe_allow_html=True)
|
182 |
|
183 |
# Input box for real-time search
|
184 |
-
search_query = st.text_input("Search in extracted text:", key="search_query", placeholder="Type to search...")
|
185 |
-
|
186 |
-
#
|
187 |
if search_query:
|
188 |
-
# Highlight the search term in the text
|
189 |
highlighted_text = highlight_text(polished_text, search_query)
|
190 |
st.markdown("### Highlighted Search Results:")
|
191 |
st.markdown(highlighted_text, unsafe_allow_html=True)
|
192 |
-
else:
|
193 |
-
st.markdown("### Extracted Text:")
|
194 |
-
st.markdown(polished_text)
|
|
|
9 |
import tempfile
|
10 |
import os
|
11 |
import re
|
12 |
+
import json
|
13 |
from groq import Groq
|
14 |
|
15 |
# Page configuration
|
16 |
st.set_page_config(page_title="DualTextOCRFusion", page_icon="π", layout="wide")
|
17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
|
19 |
+
# Directories for images and results
|
20 |
+
IMAGES_DIR = "images"
|
21 |
+
RESULTS_DIR = "results"
|
22 |
+
os.makedirs(IMAGES_DIR, exist_ok=True)
|
23 |
+
os.makedirs(RESULTS_DIR, exist_ok=True)
|
24 |
+
|
25 |
# Load Surya OCR Models (English + Hindi)
|
26 |
det_processor, det_model = load_det_processor(), load_det_model()
|
27 |
det_model.to(device)
|
|
|
50 |
|
51 |
# Text Cleaning AI - Clean spaces, handle dual languages
|
52 |
def clean_extracted_text(text):
|
|
|
53 |
cleaned_text = re.sub(r'\s+', ' ', text).strip()
|
54 |
cleaned_text = re.sub(r'\s([?.!,])', r'\1', cleaned_text)
|
55 |
return cleaned_text
|
56 |
|
57 |
# Polish the text using a model
|
58 |
def polish_text_with_ai(cleaned_text):
|
59 |
+
prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces."
|
60 |
client = Groq(api_key="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")
|
61 |
chat_completion = client.chat.completions.create(
|
62 |
+
messages=[
|
63 |
+
{"role": "system", "content": "You are a pedantic sentence corrector."},
|
64 |
+
{"role": "user", "content": prompt},
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
],
|
66 |
model="gemma2-9b-it",
|
67 |
)
|
|
|
87 |
|
88 |
# Highlight keyword search
|
89 |
def highlight_text(text, search_term):
|
90 |
+
if not search_term:
|
91 |
return text
|
|
|
92 |
pattern = re.compile(re.escape(search_term), re.IGNORECASE)
|
|
|
93 |
return pattern.sub(lambda m: f'<span style="background-color: yellow;">{m.group()}</span>', text)
|
94 |
|
95 |
# Title and UI
|
|
|
103 |
|
104 |
# Upload Section
|
105 |
uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
|
106 |
+
clipboard_text = st.sidebar.text_area("Paste image path from clipboard:")
|
107 |
+
|
108 |
+
if uploaded_file or clipboard_text:
|
109 |
+
image_path = None
|
110 |
+
if uploaded_file:
|
111 |
+
image_path = os.path.join(IMAGES_DIR, uploaded_file.name)
|
112 |
+
with open(image_path, "wb") as f:
|
113 |
+
f.write(uploaded_file.getvalue())
|
114 |
+
elif clipboard_text:
|
115 |
+
image_path = clipboard_text.strip()
|
116 |
+
|
117 |
+
# Predict button
|
118 |
+
predict_button = st.sidebar.button("Predict")
|
119 |
+
|
120 |
+
# Main columns
|
121 |
+
col1, col2 = st.columns([2, 1])
|
122 |
+
|
123 |
+
# Check if result JSON already exists
|
124 |
+
result_json_path = os.path.join(RESULTS_DIR, f"{os.path.basename(image_path)}_result.json") if image_path else None
|
125 |
+
|
126 |
+
if predict_button and image_path:
|
127 |
+
if os.path.exists(result_json_path):
|
128 |
+
with open(result_json_path, "r") as json_file:
|
129 |
+
result_data = json.load(json_file)
|
130 |
+
polished_text = result_data.get("polished_text", "")
|
131 |
+
else:
|
132 |
+
with st.spinner("Processing..."):
|
133 |
+
image = Image.open(image_path).convert("RGB")
|
134 |
+
|
135 |
+
if model_choice == "GOT_CPU":
|
136 |
+
got_model, tokenizer = init_got_model()
|
137 |
+
extracted_text = extract_text_got(image_path, got_model, tokenizer)
|
138 |
+
|
139 |
+
elif model_choice == "GOT_GPU":
|
140 |
+
got_gpu_model, tokenizer = init_got_gpu_model()
|
141 |
+
extracted_text = extract_text_got(image_path, got_gpu_model, tokenizer)
|
142 |
+
|
143 |
+
elif model_choice == "Qwen":
|
144 |
+
qwen_model, qwen_processor = init_qwen_model()
|
145 |
+
extracted_text = extract_text_qwen(image_path, qwen_model, qwen_processor)
|
146 |
+
|
147 |
+
elif model_choice == "Surya (English+Hindi)":
|
148 |
+
langs = ["en", "hi"]
|
149 |
+
predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
|
150 |
+
text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
|
151 |
+
extracted_text = ' '.join(text_list)
|
152 |
+
|
153 |
+
cleaned_text = clean_extracted_text(extracted_text)
|
154 |
+
polished_text = polish_text_with_ai(cleaned_text) if model_choice in ["GOT_CPU", "GOT_GPU"] else cleaned_text
|
155 |
+
|
156 |
+
# Save result to JSON
|
157 |
+
with open(result_json_path, "w") as json_file:
|
158 |
+
json.dump({"polished_text": polished_text}, json_file)
|
159 |
+
|
160 |
+
# Display image preview and text
|
161 |
+
if image_path:
|
162 |
+
with col1:
|
163 |
+
col1.image(image_path, caption='Uploaded Image', use_column_width=False, width=300)
|
164 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
st.subheader("Extracted Text (Cleaned & Polished)")
|
166 |
st.markdown(polished_text, unsafe_allow_html=True)
|
167 |
|
168 |
# Input box for real-time search
|
169 |
+
search_query = st.text_input("Search in extracted text:", key="search_query", placeholder="Type to search...", on_change=lambda: st.session_state.update(search_query) disabled=not uploaded_file)
|
170 |
+
|
171 |
+
# Highlight the search term in the text
|
172 |
if search_query:
|
|
|
173 |
highlighted_text = highlight_text(polished_text, search_query)
|
174 |
st.markdown("### Highlighted Search Results:")
|
175 |
st.markdown(highlighted_text, unsafe_allow_html=True)
|
|
|
|
|
|