Spaces:
Running
Running
import streamlit as st | |
from ocr_cpu import extract_text_got, clean_text # Import OCR and text cleaning functions | |
import json | |
# --- UI Styling --- | |
st.set_page_config(page_title="DualTextOCRFusion", layout="centered", page_icon="π") | |
st.markdown( | |
""" | |
<style> | |
.reportview-container { | |
background: #f4f4f4; | |
} | |
.sidebar .sidebar-content { | |
background: #e0e0e0; | |
} | |
h1 { | |
color: #007BFF; | |
} | |
.upload-btn { | |
background-color: #007BFF; | |
color: white; | |
padding: 10px; | |
border-radius: 5px; | |
text-align: center; | |
} | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
# --- Title --- | |
st.title("π DualTextOCRFusion") | |
st.write("Upload an image with **Hindi**, **English**, or **Hinglish** text to extract and clean text for keyword search.") | |
# --- Image Upload Section --- | |
uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
st.image(uploaded_file, caption='Uploaded Image', use_column_width=True) | |
# Extract text from the image using GOT OCR function | |
with st.spinner("Extracting text from the image..."): | |
try: | |
extracted_text = extract_text_got(uploaded_file) # Use GOT OCR to extract text | |
if not extracted_text.strip(): | |
st.warning("No text extracted from the image.") | |
else: | |
# Clean the extracted text to remove extra spaces | |
cleaned_text = clean_text(extracted_text) | |
st.success("Text extraction and cleaning successful.") | |
except Exception as e: | |
st.error(f"Error during text extraction: {str(e)}") | |
extracted_text = cleaned_text = "" | |
# Display cleaned text | |
st.subheader("Cleaned Extracted Text") | |
st.text_area("Cleaned Text", cleaned_text, height=250) | |
# Save cleaned text for search | |
if cleaned_text: | |
with open("extracted_text.json", "w") as json_file: | |
json.dump({"text": cleaned_text}, json_file) | |
# --- Keyword Search --- | |
st.subheader("Search for Keywords") | |
keyword = st.text_input("Enter a keyword to search in the cleaned text") | |
if keyword: | |
if keyword.lower() in cleaned_text.lower(): | |
st.success(f"Keyword **'{keyword}'** found in the text!") | |
else: | |
st.error(f"Keyword **'{keyword}'** not found.") | |