UniquePratham commited on
Commit
aa47a7c
β€’
1 Parent(s): 1a5d3d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -92
app.py CHANGED
@@ -9,13 +9,19 @@ import torch
9
  import tempfile
10
  import os
11
  import re
12
- import base64
13
  from groq import Groq
14
 
15
  # Page configuration
16
  st.set_page_config(page_title="DualTextOCRFusion", page_icon="πŸ”", layout="wide")
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
 
 
 
 
 
 
 
19
  # Load Surya OCR Models (English + Hindi)
20
  det_processor, det_model = load_det_processor(), load_det_model()
21
  det_model.to(device)
@@ -44,25 +50,18 @@ def init_qwen_model():
44
 
45
  # Text Cleaning AI - Clean spaces, handle dual languages
46
  def clean_extracted_text(text):
47
- # Remove extra spaces
48
  cleaned_text = re.sub(r'\s+', ' ', text).strip()
49
  cleaned_text = re.sub(r'\s([?.!,])', r'\1', cleaned_text)
50
  return cleaned_text
51
 
52
  # Polish the text using a model
53
  def polish_text_with_ai(cleaned_text):
54
- prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible, along with relevant details or insights that an AI can provide about the extracted text. Extracted Text : {cleaned_text}"
55
  client = Groq(api_key="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")
56
  chat_completion = client.chat.completions.create(
57
- messages=[
58
- {
59
- "role": "system",
60
- "content": "You are a pedantic sentence corrector. Remove extra spaces between and within words to make the sentence meaningful in English, Hindi, or Hinglish, according to the context of the sentence, without changing any words."
61
- },
62
- {
63
- "role": "user",
64
- "content": prompt,
65
- }
66
  ],
67
  model="gemma2-9b-it",
68
  )
@@ -88,11 +87,9 @@ def extract_text_qwen(image_file, model, processor):
88
 
89
  # Highlight keyword search
90
  def highlight_text(text, search_term):
91
- if not search_term: # If no search term is provided, return the original text
92
  return text
93
- # Use a regular expression to search for the term, case insensitive
94
  pattern = re.compile(re.escape(search_term), re.IGNORECASE)
95
- # Highlight matched terms with yellow background
96
  return pattern.sub(lambda m: f'<span style="background-color: yellow;">{m.group()}</span>', text)
97
 
98
  # Title and UI
@@ -106,89 +103,73 @@ model_choice = st.sidebar.selectbox("Select OCR Model:", ("GOT_CPU", "GOT_GPU",
106
 
107
  # Upload Section
108
  uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
 
110
- # Input from clipboard
111
- if st.sidebar.button("Paste from Clipboard"):
112
- try:
113
- clipboard_data = st.experimental_get_clipboard()
114
- if clipboard_data:
115
- # Assuming clipboard data is base64 encoded image
116
- image_data = base64.b64decode(clipboard_data)
117
- uploaded_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
118
- uploaded_file.write(image_data)
119
- uploaded_file.seek(0)
120
- except:
121
- st.sidebar.warning("Clipboard data is not an image.")
122
-
123
- # Input from camera
124
- camera_file = st.sidebar.camera_input("Capture from Camera")
125
- if camera_file:
126
- uploaded_file = camera_file
127
-
128
- # Predict button
129
- predict_button = st.sidebar.button("Predict")
130
-
131
- # Main columns
132
- col1, col2 = st.columns([2, 1])
133
-
134
- # Display image preview
135
- if uploaded_file:
136
- image = Image.open(uploaded_file)
137
- with col1:
138
- col1.image(image, caption='Uploaded Image', use_column_width=False, width=300)
139
-
140
- # Handle predictions
141
- if predict_button and uploaded_file:
142
- with st.spinner("Processing..."):
143
- # Save uploaded image
144
- with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
145
- temp_file.write(uploaded_file.getvalue())
146
- temp_file_path = temp_file.name
147
-
148
- image = Image.open(temp_file_path)
149
- image = image.convert("RGB")
150
-
151
- if model_choice == "GOT_CPU":
152
- got_model, tokenizer = init_got_model()
153
- extracted_text = extract_text_got(temp_file_path, got_model, tokenizer)
154
-
155
- elif model_choice == "GOT_GPU":
156
- got_gpu_model, tokenizer = init_got_gpu_model()
157
- extracted_text = extract_text_got(temp_file_path, got_gpu_model, tokenizer)
158
-
159
- elif model_choice == "Qwen":
160
- qwen_model, qwen_processor = init_qwen_model()
161
- extracted_text = extract_text_qwen(temp_file_path, qwen_model, qwen_processor)
162
-
163
- elif model_choice == "Surya (English+Hindi)":
164
- langs = ["en", "hi"]
165
- predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
166
- text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
167
- extracted_text = ' '.join(text_list)
168
-
169
- # Clean extracted text
170
- cleaned_text = clean_extracted_text(extracted_text)
171
-
172
- # Optionally, polish text with AI model for better language flow
173
- polished_text = polish_text_with_ai(cleaned_text) if model_choice in ["GOT_CPU", "GOT_GPU"] else cleaned_text
174
-
175
- # Delete temp file
176
- if os.path.exists(temp_file_path):
177
- os.remove(temp_file_path)
178
-
179
- # Display extracted text and search functionality
180
  st.subheader("Extracted Text (Cleaned & Polished)")
181
  st.markdown(polished_text, unsafe_allow_html=True)
182
 
183
  # Input box for real-time search
184
- search_query = st.text_input("Search in extracted text:", key="search_query", placeholder="Type to search...")
185
-
186
- # Update results dynamically based on the search term
187
  if search_query:
188
- # Highlight the search term in the text
189
  highlighted_text = highlight_text(polished_text, search_query)
190
  st.markdown("### Highlighted Search Results:")
191
  st.markdown(highlighted_text, unsafe_allow_html=True)
192
- else:
193
- st.markdown("### Extracted Text:")
194
- st.markdown(polished_text)
 
9
  import tempfile
10
  import os
11
  import re
12
+ import json
13
  from groq import Groq
14
 
15
  # Page configuration
16
  st.set_page_config(page_title="DualTextOCRFusion", page_icon="πŸ”", layout="wide")
17
  device = "cuda" if torch.cuda.is_available() else "cpu"
18
 
19
+ # Directories for images and results
20
+ IMAGES_DIR = "images"
21
+ RESULTS_DIR = "results"
22
+ os.makedirs(IMAGES_DIR, exist_ok=True)
23
+ os.makedirs(RESULTS_DIR, exist_ok=True)
24
+
25
  # Load Surya OCR Models (English + Hindi)
26
  det_processor, det_model = load_det_processor(), load_det_model()
27
  det_model.to(device)
 
50
 
51
  # Text Cleaning AI - Clean spaces, handle dual languages
52
  def clean_extracted_text(text):
 
53
  cleaned_text = re.sub(r'\s+', ' ', text).strip()
54
  cleaned_text = re.sub(r'\s([?.!,])', r'\1', cleaned_text)
55
  return cleaned_text
56
 
57
  # Polish the text using a model
58
  def polish_text_with_ai(cleaned_text):
59
+ prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces."
60
  client = Groq(api_key="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")
61
  chat_completion = client.chat.completions.create(
62
+ messages=[
63
+ {"role": "system", "content": "You are a pedantic sentence corrector."},
64
+ {"role": "user", "content": prompt},
 
 
 
 
 
 
65
  ],
66
  model="gemma2-9b-it",
67
  )
 
87
 
88
  # Highlight keyword search
89
  def highlight_text(text, search_term):
90
+ if not search_term:
91
  return text
 
92
  pattern = re.compile(re.escape(search_term), re.IGNORECASE)
 
93
  return pattern.sub(lambda m: f'<span style="background-color: yellow;">{m.group()}</span>', text)
94
 
95
  # Title and UI
 
103
 
104
  # Upload Section
105
  uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
106
+ clipboard_text = st.sidebar.text_area("Paste image path from clipboard:")
107
+
108
+ if uploaded_file or clipboard_text:
109
+ image_path = None
110
+ if uploaded_file:
111
+ image_path = os.path.join(IMAGES_DIR, uploaded_file.name)
112
+ with open(image_path, "wb") as f:
113
+ f.write(uploaded_file.getvalue())
114
+ elif clipboard_text:
115
+ image_path = clipboard_text.strip()
116
+
117
+ # Predict button
118
+ predict_button = st.sidebar.button("Predict")
119
+
120
+ # Main columns
121
+ col1, col2 = st.columns([2, 1])
122
+
123
+ # Check if result JSON already exists
124
+ result_json_path = os.path.join(RESULTS_DIR, f"{os.path.basename(image_path)}_result.json") if image_path else None
125
+
126
+ if predict_button and image_path:
127
+ if os.path.exists(result_json_path):
128
+ with open(result_json_path, "r") as json_file:
129
+ result_data = json.load(json_file)
130
+ polished_text = result_data.get("polished_text", "")
131
+ else:
132
+ with st.spinner("Processing..."):
133
+ image = Image.open(image_path).convert("RGB")
134
+
135
+ if model_choice == "GOT_CPU":
136
+ got_model, tokenizer = init_got_model()
137
+ extracted_text = extract_text_got(image_path, got_model, tokenizer)
138
+
139
+ elif model_choice == "GOT_GPU":
140
+ got_gpu_model, tokenizer = init_got_gpu_model()
141
+ extracted_text = extract_text_got(image_path, got_gpu_model, tokenizer)
142
+
143
+ elif model_choice == "Qwen":
144
+ qwen_model, qwen_processor = init_qwen_model()
145
+ extracted_text = extract_text_qwen(image_path, qwen_model, qwen_processor)
146
+
147
+ elif model_choice == "Surya (English+Hindi)":
148
+ langs = ["en", "hi"]
149
+ predictions = run_ocr([image], [langs], det_model, det_processor, rec_model, rec_processor)
150
+ text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
151
+ extracted_text = ' '.join(text_list)
152
+
153
+ cleaned_text = clean_extracted_text(extracted_text)
154
+ polished_text = polish_text_with_ai(cleaned_text) if model_choice in ["GOT_CPU", "GOT_GPU"] else cleaned_text
155
+
156
+ # Save result to JSON
157
+ with open(result_json_path, "w") as json_file:
158
+ json.dump({"polished_text": polished_text}, json_file)
159
+
160
+ # Display image preview and text
161
+ if image_path:
162
+ with col1:
163
+ col1.image(image_path, caption='Uploaded Image', use_column_width=False, width=300)
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  st.subheader("Extracted Text (Cleaned & Polished)")
166
  st.markdown(polished_text, unsafe_allow_html=True)
167
 
168
  # Input box for real-time search
169
+ search_query = st.text_input("Search in extracted text:", key="search_query", placeholder="Type to search...", on_change=lambda: st.session_state.update(search_query) disabled=not uploaded_file)
170
+
171
+ # Highlight the search term in the text
172
  if search_query:
 
173
  highlighted_text = highlight_text(polished_text, search_query)
174
  st.markdown("### Highlighted Search Results:")
175
  st.markdown(highlighted_text, unsafe_allow_html=True)