yashbyname commited on
Commit
efe753c
·
verified ·
1 Parent(s): add4d26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -49
app.py CHANGED
@@ -1,50 +1,67 @@
 
 
 
1
  import gradio as gr
2
- import torch
3
- import pytesseract
4
- from transformers import AutoTokenizer, AutoModelForCausalLM
5
- import cv2 # Ensure you have OpenCV installed
6
-
7
- # Load models and tokenizers
8
- tokenizer_eng = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
9
- tokenizer_hin = AutoTokenizer.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
10
- model_eng = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
11
- model_hin = AutoModelForCausalLM.from_pretrained("ucaslcl/GOT-OCR2_0", trust_remote_code=True)
12
-
13
- def perform_ocr(image):
14
- """Perform OCR on the image for both English and Hindi."""
15
- # Set device to CPU or GPU
16
- device = "cuda" if torch.cuda.is_available() else "cpu"
17
-
18
- model_eng.to(device)
19
- model_hin.to(device)
20
-
21
- # Convert the input image to an OpenCV format
22
- img_cv = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Convert to BGR for OpenCV
23
- img_path = "temp_image.jpg" # Temporary path to save the image
24
- cv2.imwrite(img_path, img_cv) # Save the image temporarily
25
-
26
- # Use pytesseract for English OCR
27
- english_text = pytesseract.image_to_string(img_cv)
28
-
29
- # Use pytesseract for Hindi OCR
30
- tesseract_config = '--psm 6'
31
- hindi_text = pytesseract.image_to_string(img_cv, lang='hin', config=tesseract_config)
32
-
33
- return english_text, hindi_text
34
-
35
- def ocr_and_search(image):
36
- """Process the image and extract text in both languages."""
37
- english_text, hindi_text = perform_ocr(image)
38
- return english_text, hindi_text
39
-
40
- # Gradio interface
41
- iface = gr.Interface(
42
- fn=ocr_and_search,
43
- inputs=gr.inputs.Image(type="numpy"), # Use numpy array for OpenCV
44
- outputs=["text", "text"],
45
- title="OCR for English and Hindi",
46
- description="Upload an image to extract text in English and Hindi."
47
- )
48
-
49
- # Launch the interface
50
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ from pytesseract import pytesseract
3
+ from transformers import AutoModel, AutoTokenizer
4
  import gradio as gr
5
+
6
+ # Model and Tesseract Configuration
7
+ # Load GOT2 model for English text and configure Tesseract for Hindi text
8
+ tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
9
+ model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, device_map='cpu').eval()
10
+
11
+ # Define Tesseract path and configuration for Hindi
12
+ pytesseract.tesseract_cmd = '/usr/bin/tesseract'
13
+ tesseract_config = '--oem 3 --psm 6 -l hin'
14
+
15
+ # Perform OCR function
16
+ def perform_ocr(img, language):
17
+ img_path = "/tmp/uploaded_image.png"
18
+ img.save(img_path)
19
+
20
+ res_eng = ""
21
+ res_hin = ""
22
+
23
+ if language in ["English", "Both"]:
24
+ res_eng = model_eng.chat(tokenizer_eng, img_path, ocr_type='ocr')
25
+
26
+ if language in ["Hindi", "Both"]:
27
+ img_cv = cv2.imread(img_path)
28
+ res_hin = pytesseract.image_to_string(img_cv, config=tesseract_config)
29
+
30
+ return res_eng, res_hin
31
+
32
+ # Keyword Search Functionality
33
+ def ocr_and_search(image, language, keyword):
34
+ english_text, hindi_text = perform_ocr(image, language)
35
+
36
+ extracted_english = f"Extracted English Text:\n{english_text}" if english_text else "No English text extracted."
37
+ extracted_hindi = f"Extracted Hindi Text:\n{hindi_text}" if hindi_text else "No Hindi text extracted."
38
+
39
+ # Search for the keyword in the extracted text
40
+ search_results = []
41
+ if keyword:
42
+ if language in ["English", "Both"] and keyword.lower() in english_text.lower():
43
+ search_results.append(f"Keyword '{keyword}' found in English text.")
44
+
45
+ if language in ["Hindi", "Both"] and keyword.lower() in hindi_text.lower():
46
+ search_results.append(f"Keyword '{keyword}' found in Hindi text.")
47
+
48
+ search_output = "\n".join(search_results) if search_results else "No matches found."
49
+
50
+ return extracted_english, extracted_hindi, search_output
51
+
52
+ # Gradio Interface Setup
53
+ with gr.Blocks() as app:
54
+ gr.Markdown("### OCR Application")
55
+ image_input = gr.Image(type="pil", label="Upload Image")
56
+ language_selection = gr.Radio(choices=["English", "Hindi", "Both"], label="Select Language")
57
+ keyword_input = gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search")
58
+ output_english = gr.Textbox(label="Extracted English Text", interactive=False)
59
+ output_hindi = gr.Textbox(label="Extracted Hindi Text", interactive=False)
60
+ output_search = gr.Textbox(label="Search Results", interactive=False)
61
+
62
+ submit_button = gr.Button("Submit")
63
+ submit_button.click(fn=ocr_and_search, inputs=[image_input, language_selection, keyword_input], outputs=[output_english, output_hindi, output_search])
64
+
65
+ # Application Launch
66
+ if __name__ == "__main__":
67
+ app.launch()