Spaces:

UniquePratham
/

DualTextOCRFusion

Sleeping

App Files Files Community

UniquePratham commited on Sep 29, 2024

Commit

bac8e56

verified ·

1 Parent(s): 58c599f

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -53

app.py CHANGED Viewed

@@ -10,18 +10,13 @@ import tempfile
 import os
 import re
 import json
 from groq import Groq
 # Page configuration
 st.set_page_config(page_title="DualTextOCRFusion", page_icon="🔍", layout="wide")
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Directories for images and results
-IMAGES_DIR = "images"
-RESULTS_DIR = "results"
-os.makedirs(IMAGES_DIR, exist_ok=True)
-os.makedirs(RESULTS_DIR, exist_ok=True)
 # Load Surya OCR Models (English + Hindi)
 det_processor, det_model = load_det_processor(), load_det_model()
 det_model.to(device)
@@ -56,12 +51,18 @@ def clean_extracted_text(text):
 # Polish the text using a model
 def polish_text_with_ai(cleaned_text):
-    prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces."
     client = Groq(api_key="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")
     chat_completion = client.chat.completions.create(
         messages=[
-            {"role": "system", "content": "You are a pedantic sentence corrector."},
-            {"role": "user", "content": prompt},
         ],
         model="gemma2-9b-it",
     )
@@ -103,35 +104,56 @@ model_choice = st.sidebar.selectbox("Select OCR Model:", ("GOT_CPU", "GOT_GPU",
 # Upload Section
 uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
-clipboard_text = st.sidebar.text_area("Paste image path from clipboard:")
-if uploaded_file or clipboard_text:
-    image_path = None
-    if uploaded_file:
-        image_path = os.path.join(IMAGES_DIR, uploaded_file.name)
-        with open(image_path, "wb") as f:
-            f.write(uploaded_file.getvalue())
-    elif clipboard_text:
-        image_path = clipboard_text.strip()
-    # Predict button
-    predict_button = st.sidebar.button("Predict")
-    # Main columns
-    col1, col2 = st.columns([2, 1])
-    # Check if result JSON already exists
-    result_json_path = os.path.join(RESULTS_DIR, f"{os.path.basename(image_path)}_result.json") if image_path else None
-    if predict_button and image_path:
-        if os.path.exists(result_json_path):
-            with open(result_json_path, "r") as json_file:
-                result_data = json.load(json_file)
-            polished_text = result_data.get("polished_text", "")
         else:
             with st.spinner("Processing..."):
-                image = Image.open(image_path).convert("RGB")
                 if model_choice == "GOT_CPU":
                     got_model, tokenizer = init_got_model()
                     extracted_text = extract_text_got(image_path, got_model, tokenizer)
@@ -150,26 +172,35 @@ if uploaded_file or clipboard_text:
                     text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
                     extracted_text = ' '.join(text_list)
                 cleaned_text = clean_extracted_text(extracted_text)
                 polished_text = polish_text_with_ai(cleaned_text) if model_choice in ["GOT_CPU", "GOT_GPU"] else cleaned_text
-                # Save result to JSON
-                with open(result_json_path, "w") as json_file:
-                    json.dump({"polished_text": polished_text}, json_file)
-        # Display image preview and text
-        if image_path:
-            with col1:
-                col1.image(image_path, caption='Uploaded Image', use_column_width=False, width=300)
         st.subheader("Extracted Text (Cleaned & Polished)")
-        st.markdown(polished_text, unsafe_allow_html=True)
-        # Input box for real-time search
-        search_query = st.text_input("Search in extracted text:", key="search_query", placeholder="Type to search...", on_change=lambda: st.session_state.update(search_query), disabled=not uploaded_file)
-        # Highlight the search term in the text
-        if search_query:
-            highlighted_text = highlight_text(polished_text, search_query)
             st.markdown("### Highlighted Search Results:")
-            st.markdown(highlighted_text, unsafe_allow_html=True)

 import os
 import re
 import json
+import base64
 from groq import Groq
 # Page configuration
 st.set_page_config(page_title="DualTextOCRFusion", page_icon="🔍", layout="wide")
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # Load Surya OCR Models (English + Hindi)
 det_processor, det_model = load_det_processor(), load_det_model()
 det_model.to(device)
 # Polish the text using a model
 def polish_text_with_ai(cleaned_text):
+    prompt = f"Remove unwanted spaces between and inside words to join incomplete words, creating a meaningful sentence in either Hindi, English, or Hinglish without altering any words from the given extracted text. Then, return the corrected text with adjusted spaces, keeping it as close to the original as possible."
     client = Groq(api_key="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")
     chat_completion = client.chat.completions.create(
         messages=[
+            {
+                "role": "system",
+                "content": "You are a pedantic sentence corrector. Remove extra spaces between and within words to make the sentence meaningful in English, Hindi, or Hinglish, according to the context of the sentence, without changing any words."
+            },
+            {
+                "role": "user",
+                "content": prompt,
+            }
         ],
         model="gemma2-9b-it",
     )
 # Upload Section
 uploaded_file = st.sidebar.file_uploader("Choose an image...", type=["png", "jpg", "jpeg"])
+# Input from clipboard
+if st.sidebar.button("Paste from Clipboard"):
+    try:
+        clipboard_data = st.experimental_get_clipboard()
+        if clipboard_data:
+            image_data = base64.b64decode(clipboard_data)
+            uploaded_file = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
+            uploaded_file.write(image_data)
+            uploaded_file.seek(0)
+    except Exception as e:
+        st.sidebar.warning(f"Clipboard data is not an image or an error occurred: {str(e)}")
+# Input from camera
+camera_file = st.sidebar.camera_input("Capture from Camera")
+if camera_file:
+    uploaded_file = camera_file
+# Predict button
+predict_button = st.sidebar.button("Predict")
+# Main columns
+col1, col2 = st.columns([2, 1])
+# Display image preview
+if uploaded_file:
+    image = Image.open(uploaded_file)
+    with col1:
+        col1.image(image, caption='Uploaded Image', use_column_width=False, width=300)
+    # Save uploaded image to 'images' folder
+    images_dir = 'images'
+    os.makedirs(images_dir, exist_ok=True)
+    image_path = os.path.join(images_dir, uploaded_file.name)
+    with open(image_path, 'wb') as f:
+        f.write(uploaded_file.getvalue())
+    # Check if the result already exists
+    results_dir = 'results'
+    os.makedirs(results_dir, exist_ok=True)
+    result_path = os.path.join(results_dir, f"{uploaded_file.name}_result.json")
+    # Handle predictions
+    if predict_button:
+        if os.path.exists(result_path):
+            with open(result_path, 'r') as f:
+                result_data = json.load(f)
+            extracted_text = result_data["polished_text"]
         else:
             with st.spinner("Processing..."):
                 if model_choice == "GOT_CPU":
                     got_model, tokenizer = init_got_model()
                     extracted_text = extract_text_got(image_path, got_model, tokenizer)
                     text_list = re.findall(r"text='(.*?)'", str(predictions[0]))
                     extracted_text = ' '.join(text_list)
+                # Clean and polish extracted text
                 cleaned_text = clean_extracted_text(extracted_text)
                 polished_text = polish_text_with_ai(cleaned_text) if model_choice in ["GOT_CPU", "GOT_GPU"] else cleaned_text
+                # Save results to JSON file
+                result_data = {"polished_text": polished_text}
+                with open(result_path, 'w') as f:
+                    json.dump(result_data, f)
+        # Display extracted text
         st.subheader("Extracted Text (Cleaned & Polished)")
+        st.markdown(extracted_text, unsafe_allow_html=True)
+        # Search functionality
+        def update_search():
+            if search_query:
+                highlighted_text = highlight_text(extracted_text, search_query)
+                st.session_state["highlighted_result"] = highlighted_text
+            else:
+                st.session_state["highlighted_result"] = extracted_text
+        search_query = st.text_input(
+            "Search in extracted text:",
+            key="search_query",
+            placeholder="Type to search...",
+            on_change=update_search,
+            disabled=not uploaded_file
+        )
+        if "highlighted_result" in st.session_state:
             st.markdown("### Highlighted Search Results:")
+            st.markdown(st.session_state["highlighted_result"], unsafe_allow_html=True)