import streamlit as st from ocr_cpu import extract_text_got, clean_text # Import OCR and text cleaning functions import json # --- UI Styling --- st.set_page_config(page_title="DualTextOCRFusion", layout="centered", page_icon="🔍") st.markdown( """ """, unsafe_allow_html=True ) # --- Title --- st.title("🔍 DualTextOCRFusion") st.write("Upload an image with **Hindi**, **English**, or **Hinglish** text to extract and clean text for keyword search.") # --- Image Upload Section --- uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"]) if uploaded_file is not None: st.image(uploaded_file, caption='Uploaded Image', use_column_width=True) # Extract text from the image using GOT OCR function with st.spinner("Extracting text from the image..."): try: extracted_text = extract_text_got(uploaded_file) # Use GOT OCR to extract text if not extracted_text.strip(): st.warning("No text extracted from the image.") else: # Clean the extracted text to remove extra spaces cleaned_text = clean_text(extracted_text) st.success("Text extraction and cleaning successful.") except Exception as e: st.error(f"Error during text extraction: {str(e)}") extracted_text = cleaned_text = "" # Display cleaned text st.subheader("Cleaned Extracted Text") st.text_area("Cleaned Text", cleaned_text, height=250) # Save cleaned text for search if cleaned_text: with open("extracted_text.json", "w") as json_file: json.dump({"text": cleaned_text}, json_file) # --- Keyword Search --- st.subheader("Search for Keywords") keyword = st.text_input("Enter a keyword to search in the cleaned text") if keyword: if keyword.lower() in cleaned_text.lower(): st.success(f"Keyword **'{keyword}'** found in the text!") else: st.error(f"Keyword **'{keyword}'** not found.")