UniquePratham commited on
Commit
3cb2a3f
β€’
1 Parent(s): beb4859

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -72
app.py CHANGED
@@ -1,72 +1,73 @@
1
- import streamlit as st
2
- from ocr_cpu import extract_text_got # The updated OCR function
3
- import json
4
-
5
- # --- UI Styling ---
6
- st.set_page_config(page_title="DualTextOCRFusion",
7
- layout="centered", page_icon="πŸ”")
8
-
9
- st.markdown(
10
- """
11
- <style>
12
- .reportview-container {
13
- background: #f4f4f4;
14
- }
15
- .sidebar .sidebar-content {
16
- background: #e0e0e0;
17
- }
18
- h1 {
19
- color: #007BFF;
20
- }
21
- .upload-btn {
22
- background-color: #007BFF;
23
- color: white;
24
- padding: 10px;
25
- border-radius: 5px;
26
- text-align: center;
27
- }
28
- </style>
29
- """, unsafe_allow_html=True
30
- )
31
-
32
- # --- Title ---
33
- st.title("πŸ” DualTextOCRFusion")
34
- st.write("Upload an image with **Hindi** and **English** text to extract and search for keywords.")
35
-
36
- # --- Image Upload Section ---
37
- uploaded_file = st.file_uploader(
38
- "Choose an image file", type=["jpg", "jpeg", "png"])
39
-
40
- if uploaded_file is not None:
41
- st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)
42
-
43
- # Extract text from the image using the selected OCR function (GOT)
44
- with st.spinner("Extracting text using the model..."):
45
- try:
46
- extracted_text = extract_text_got(
47
- uploaded_file) # Pass uploaded_file directly
48
- if not extracted_text.strip():
49
- st.warning("No text extracted from the image.")
50
- except Exception as e:
51
- st.error(f"Error during text extraction: {str(e)}")
52
- extracted_text = ""
53
-
54
- # Display extracted text
55
- st.subheader("Extracted Text")
56
- st.text_area("Text", extracted_text, height=250)
57
-
58
- # Save extracted text for search
59
- if extracted_text:
60
- with open("extracted_text.json", "w") as json_file:
61
- json.dump({"text": extracted_text}, json_file)
62
-
63
- # --- Keyword Search ---
64
- st.subheader("Search for Keywords")
65
- keyword = st.text_input(
66
- "Enter a keyword to search in the extracted text")
67
-
68
- if keyword:
69
- if keyword.lower() in extracted_text.lower():
70
- st.success(f"Keyword **'{keyword}'** found in the text!")
71
- else:
72
- st.error(f"Keyword **'{keyword}'** not found.")
 
 
1
+ import streamlit as st
2
+ from ocr_cpu import extract_text_got, clean_text # Import OCR and text cleaning functions
3
+ import json
4
+
5
+ # --- UI Styling ---
6
+ st.set_page_config(page_title="DualTextOCRFusion", layout="centered", page_icon="πŸ”")
7
+
8
+ st.markdown(
9
+ """
10
+ <style>
11
+ .reportview-container {
12
+ background: #f4f4f4;
13
+ }
14
+ .sidebar .sidebar-content {
15
+ background: #e0e0e0;
16
+ }
17
+ h1 {
18
+ color: #007BFF;
19
+ }
20
+ .upload-btn {
21
+ background-color: #007BFF;
22
+ color: white;
23
+ padding: 10px;
24
+ border-radius: 5px;
25
+ text-align: center;
26
+ }
27
+ </style>
28
+ """,
29
+ unsafe_allow_html=True
30
+ )
31
+
32
+ # --- Title ---
33
+ st.title("πŸ” DualTextOCRFusion")
34
+ st.write("Upload an image with **Hindi**, **English**, or **Hinglish** text to extract and clean text for keyword search.")
35
+
36
+ # --- Image Upload Section ---
37
+ uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "jpeg", "png"])
38
+
39
+ if uploaded_file is not None:
40
+ st.image(uploaded_file, caption='Uploaded Image', use_column_width=True)
41
+
42
+ # Extract text from the image using GOT OCR function
43
+ with st.spinner("Extracting text from the image..."):
44
+ try:
45
+ extracted_text = extract_text_got(uploaded_file) # Use GOT OCR to extract text
46
+ if not extracted_text.strip():
47
+ st.warning("No text extracted from the image.")
48
+ else:
49
+ # Clean the extracted text to remove extra spaces
50
+ cleaned_text = clean_text(extracted_text)
51
+ st.success("Text extraction and cleaning successful.")
52
+ except Exception as e:
53
+ st.error(f"Error during text extraction: {str(e)}")
54
+ extracted_text = cleaned_text = ""
55
+
56
+ # Display cleaned text
57
+ st.subheader("Cleaned Extracted Text")
58
+ st.text_area("Cleaned Text", cleaned_text, height=250)
59
+
60
+ # Save cleaned text for search
61
+ if cleaned_text:
62
+ with open("extracted_text.json", "w") as json_file:
63
+ json.dump({"text": cleaned_text}, json_file)
64
+
65
+ # --- Keyword Search ---
66
+ st.subheader("Search for Keywords")
67
+ keyword = st.text_input("Enter a keyword to search in the cleaned text")
68
+
69
+ if keyword:
70
+ if keyword.lower() in cleaned_text.lower():
71
+ st.success(f"Keyword **'{keyword}'** found in the text!")
72
+ else:
73
+ st.error(f"Keyword **'{keyword}'** not found.")