yashbyname commited on
Commit
c2e789f
·
verified ·
1 Parent(s): 37b7885

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -5
app.py CHANGED
@@ -3,6 +3,7 @@ import torch
3
  import pytesseract
4
  from transformers import AutoTokenizer, AutoModel
5
 
 
6
  pytesseract.pytesseract.tesseract_cmd = r'/opt/homebrew/bin/tesseract'
7
 
8
  # Load the tokenizer and model
@@ -10,19 +11,27 @@ tokenizer_eng = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote
10
  model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).eval()
11
 
12
  def perform_ocr(image, language):
13
- # Perform OCR for English
14
  img_cv = image # Assuming image is already in the correct format
15
- res_eng = model_eng.chat(tokenizer_eng, img_cv, ocr_type='ocr')
16
 
17
- return res_eng # Return results for English
 
 
 
 
 
 
 
 
 
18
 
19
  def ocr_and_search(image, language):
20
  # Call the perform_ocr function
21
- english_text = perform_ocr(image, language)
22
  # You may also want to implement any searching functionality here
23
  # ...
24
 
25
- return english_text # Return the OCR result for English
26
 
27
  # Create Gradio interface
28
  iface = gr.Interface(
 
3
  import pytesseract
4
  from transformers import AutoTokenizer, AutoModel
5
 
6
+ # Set Tesseract executable path
7
  pytesseract.pytesseract.tesseract_cmd = r'/opt/homebrew/bin/tesseract'
8
 
9
  # Load the tokenizer and model
 
11
  model_eng = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True).eval()
12
 
13
  def perform_ocr(image, language):
14
+ # Convert the Gradio image input to the format suitable for pytesseract
15
  img_cv = image # Assuming image is already in the correct format
 
16
 
17
+ if language == "English":
18
+ # Perform OCR using the model for English
19
+ res_eng = model_eng.chat(tokenizer_eng, img_cv, ocr_type='ocr')
20
+ return res_eng # Return results for English
21
+ elif language == "Hindi":
22
+ # Perform OCR using pytesseract for Hindi
23
+ res_hin = pytesseract.image_to_string(img_cv, lang='hin', config='--psm 6')
24
+ return res_hin # Return results for Hindi
25
+ else:
26
+ return "Unsupported language selected."
27
 
28
  def ocr_and_search(image, language):
29
  # Call the perform_ocr function
30
+ extracted_text = perform_ocr(image, language)
31
  # You may also want to implement any searching functionality here
32
  # ...
33
 
34
+ return extracted_text # Return the OCR result for the selected language
35
 
36
  # Create Gradio interface
37
  iface = gr.Interface(