Spaces:
Sleeping
Sleeping
import streamlit as st | |
from PIL import Image | |
import easyocr | |
# Initialize the EasyOCR reader | |
reader = easyocr.Reader(['en', 'hi'], gpu=False) # 'en' for English, 'hi' for Hindi | |
# Function to process image and perform OCR | |
def process_image(image): | |
img = Image.open(image) | |
# Perform OCR | |
result = reader.readtext(img, detail=0, paragraph=False) # Return detailed results | |
# Join the extracted text with spaces and separate words with new lines | |
words = [word for block in result for word in block.split()] | |
return "\n".join(words) | |
# Function to highlight keywords in extracted text | |
def highlight_keywords(text, keyword): | |
highlighted_text = text.replace(keyword, f"**{keyword}**") | |
return highlighted_text | |
# Streamlit app UI | |
st.title("OCR Web App for Hindi & English Text") | |
st.write("Upload an image with Hindi and English text, extract the text, and search for keywords.") | |
# File uploader for images | |
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
# Display the uploaded image | |
image = Image.open(uploaded_file) | |
st.image(image, caption="Uploaded Image", use_column_width=True) | |
# Perform OCR on the uploaded image | |
st.write("Extracting text...") | |
extracted_text = process_image(uploaded_file) | |
# Display the extracted text | |
st.subheader("Extracted Text:") | |
st.text(extracted_text) | |
# Search functionality | |
query = st.text_input("Enter a keyword to search in the extracted text:") | |
if query: | |
# Highlight the search keyword | |
st.subheader("Search Results:") | |
result = highlight_keywords(extracted_text, query) | |
st.write(result) |