Spaces:
Running
Running
UniquePratham
commited on
Commit
β’
137d741
1
Parent(s):
f0cbf19
Update app.py
Browse files
app.py
CHANGED
@@ -12,6 +12,7 @@ import re
|
|
12 |
import json
|
13 |
import base64
|
14 |
from groq import Groq
|
|
|
15 |
|
16 |
# Page configuration
|
17 |
st.set_page_config(page_title="DualTextOCRFusion", page_icon="π", layout="wide")
|
@@ -51,7 +52,7 @@ def clean_extracted_text(text):
|
|
51 |
|
52 |
# Polish the text using a model
|
53 |
def polish_text_with_ai(cleaned_text):
|
54 |
-
prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible.
|
55 |
client = Groq(api_key="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")
|
56 |
chat_completion = client.chat.completions.create(
|
57 |
messages=[
|
@@ -86,11 +87,13 @@ def extract_text_qwen(image_file, model, processor):
|
|
86 |
except Exception as e:
|
87 |
return f"An error occurred: {str(e)}"
|
88 |
|
89 |
-
#
|
90 |
def highlight_text(text, search_term):
|
91 |
-
if not search_term:
|
92 |
return text
|
|
|
93 |
pattern = re.compile(re.escape(search_term), re.IGNORECASE)
|
|
|
94 |
return pattern.sub(lambda m: f'<span style="background-color: yellow;">{m.group()}</span>', text)
|
95 |
|
96 |
# Title and UI
|
@@ -193,14 +196,10 @@ if uploaded_file:
|
|
193 |
else:
|
194 |
st.session_state["highlighted_result"] = extracted_text
|
195 |
|
196 |
-
|
197 |
-
|
198 |
-
key="search_query",
|
199 |
-
placeholder="Type to search...",
|
200 |
-
on_change=update_search,
|
201 |
-
disabled=not uploaded_file
|
202 |
-
)
|
203 |
|
|
|
204 |
if "highlighted_result" in st.session_state:
|
205 |
st.markdown("### Highlighted Search Results:")
|
206 |
st.markdown(st.session_state["highlighted_result"], unsafe_allow_html=True)
|
|
|
12 |
import json
|
13 |
import base64
|
14 |
from groq import Groq
|
15 |
+
from st_keyup import st_keyup
|
16 |
|
17 |
# Page configuration
|
18 |
st.set_page_config(page_title="DualTextOCRFusion", page_icon="π", layout="wide")
|
|
|
52 |
|
53 |
# Polish the text using a model
|
54 |
def polish_text_with_ai(cleaned_text):
|
55 |
+
prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible, along with relevant details or insights that an AI can provide about the extracted text. Extracted Text : {cleaned_text}"
|
56 |
client = Groq(api_key="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")
|
57 |
chat_completion = client.chat.completions.create(
|
58 |
messages=[
|
|
|
87 |
except Exception as e:
|
88 |
return f"An error occurred: {str(e)}"
|
89 |
|
90 |
+
# Function to highlight the keyword in the text
|
91 |
def highlight_text(text, search_term):
|
92 |
+
if not search_term: # If no search term is provided, return the original text
|
93 |
return text
|
94 |
+
# Use a regular expression to search for the term, case insensitive
|
95 |
pattern = re.compile(re.escape(search_term), re.IGNORECASE)
|
96 |
+
# Highlight matched terms with yellow background
|
97 |
return pattern.sub(lambda m: f'<span style="background-color: yellow;">{m.group()}</span>', text)
|
98 |
|
99 |
# Title and UI
|
|
|
196 |
else:
|
197 |
st.session_state["highlighted_result"] = extracted_text
|
198 |
|
199 |
+
# Input search term with real-time update on key press
|
200 |
+
search_query = st_keyup("Search in extracted text:", key="search_key", on_change=update_search)
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
+
# Display highlighted results if they exist in session state
|
203 |
if "highlighted_result" in st.session_state:
|
204 |
st.markdown("### Highlighted Search Results:")
|
205 |
st.markdown(st.session_state["highlighted_result"], unsafe_allow_html=True)
|