mkaramb commited on
Commit
e4387b3
·
verified ·
1 Parent(s): faf19c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -24
app.py CHANGED
@@ -1,40 +1,44 @@
1
  import pandas as pd
2
- from google.api_core.client_options import ClientOptions
3
- from google.cloud import documentai_v1 as documentai
4
- from google.cloud.documentai_v1.types import RawDocument
5
- from google.cloud import translate_v2 as translate
6
- import zipfile
7
  import os
 
 
8
  import io
9
  import gradio as gr
10
- import json
11
  from google.oauth2 import service_account
 
 
 
 
12
 
13
- # Upload credential json file from default compute service account
14
- #os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
15
 
 
16
  credentials_raw = os.environ.get("google_authentication")
17
  if not credentials_raw:
18
  raise EnvironmentError("Google Cloud credentials not found in environment.")
19
  credentials_json = json.loads(credentials_raw)
20
  credentials = service_account.Credentials.from_service_account_info(credentials_json)
 
21
 
22
  # Global DataFrame declaration
23
  results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
24
 
25
- # Set your Google Cloud Document AI processor details here
26
  project_id = "herbaria-ai"
27
  location = "us"
28
  processor_id = "4307b078717a399a"
29
 
30
  def translate_text(text, target_language="en"):
31
- translate_client = translate.Client(credentials=credenentials)
 
32
  result = translate_client.translate(text, target_language=target_language)
33
  return result["translatedText"]
34
 
35
  def batch_process_documents(file_path: str, file_mime_type: str) -> tuple:
 
36
  opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com", credentials=credentials)
37
-
38
  client = documentai.DocumentProcessorServiceClient(client_options=opts)
39
 
40
  with open(file_path, "rb") as file_stream:
@@ -46,9 +50,11 @@ def batch_process_documents(file_path: str, file_mime_type: str) -> tuple:
46
 
47
  extracted_text = result.document.text
48
  translated_text = translate_text(extracted_text)
 
49
  return extracted_text, translated_text
50
 
51
  def unzip_and_find_jpgs(file_path):
 
52
  extract_path = "extracted_files"
53
  os.makedirs(extract_path, exist_ok=True)
54
  jpg_files = []
@@ -61,13 +67,14 @@ def unzip_and_find_jpgs(file_path):
61
  if file.lower().endswith('.jpg'):
62
  full_path = os.path.join(root, file)
63
  jpg_files.append(full_path)
 
64
  return jpg_files
65
 
66
  def process_images(uploaded_file):
67
  global results_df
68
- results_df = results_df.iloc[0:0] # Clear the DataFrame if re-running this cell
69
-
70
- file_path = uploaded_file.name # Gradio provides the file path through the .name attribute
71
 
72
  try:
73
  image_files = unzip_and_find_jpgs(file_path)
@@ -84,8 +91,10 @@ def process_images(uploaded_file):
84
  }])
85
  results_df = pd.concat([results_df, new_row], ignore_index=True)
86
  except Exception as e:
 
87
  return f"An error occurred: {str(e)}"
88
 
 
89
  return results_df.to_html()
90
 
91
  interface = gr.Interface(
@@ -98,13 +107,3 @@ interface = gr.Interface(
98
 
99
  if __name__ == "__main__":
100
  interface.launch(debug=True)
101
-
102
-
103
-
104
-
105
- # def greet(name):
106
- # return "Hello " + name + "!!"
107
-
108
- #iface = gr.Interface(fn=greet, inputs="text", outputs="text")
109
- #iface.launch()
110
-
 
1
  import pandas as pd
 
 
 
 
 
2
  import os
3
+ import json
4
+ import zipfile
5
  import io
6
  import gradio as gr
7
+ import logging
8
  from google.oauth2 import service_account
9
+ from google.api_core.client_options import ClientOptions
10
+ from google.cloud import documentai_v1 as documentai
11
+ from google.cloud.documentai_v1.types import RawDocument
12
+ from google.cloud import translate_v2 as translate
13
 
14
+ # Setup logging
15
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
16
 
17
+ # Load credentials from environment variable
18
  credentials_raw = os.environ.get("google_authentication")
19
  if not credentials_raw:
20
  raise EnvironmentError("Google Cloud credentials not found in environment.")
21
  credentials_json = json.loads(credentials_raw)
22
  credentials = service_account.Credentials.from_service_account_info(credentials_json)
23
+ logging.info("Loaded Google Cloud credentials successfully.")
24
 
25
  # Global DataFrame declaration
26
  results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
27
 
28
+ # Google Cloud Document AI processor details
29
  project_id = "herbaria-ai"
30
  location = "us"
31
  processor_id = "4307b078717a399a"
32
 
33
  def translate_text(text, target_language="en"):
34
+ logging.info(f"Translating text to {target_language}.")
35
+ translate_client = translate.Client(credentials=credentials)
36
  result = translate_client.translate(text, target_language=target_language)
37
  return result["translatedText"]
38
 
39
  def batch_process_documents(file_path: str, file_mime_type: str) -> tuple:
40
+ logging.info(f"Processing document {file_path}.")
41
  opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com", credentials=credentials)
 
42
  client = documentai.DocumentProcessorServiceClient(client_options=opts)
43
 
44
  with open(file_path, "rb") as file_stream:
 
50
 
51
  extracted_text = result.document.text
52
  translated_text = translate_text(extracted_text)
53
+ logging.info(f"Document processed and translated for {file_path}.")
54
  return extracted_text, translated_text
55
 
56
  def unzip_and_find_jpgs(file_path):
57
+ logging.info(f"Unzipping file {file_path}.")
58
  extract_path = "extracted_files"
59
  os.makedirs(extract_path, exist_ok=True)
60
  jpg_files = []
 
67
  if file.lower().endswith('.jpg'):
68
  full_path = os.path.join(root, file)
69
  jpg_files.append(full_path)
70
+ logging.info(f"Found {len(jpg_files)} JPG files in {file_path}.")
71
  return jpg_files
72
 
73
  def process_images(uploaded_file):
74
  global results_df
75
+ results_df = results_df.iloc[0:0] # Clear the DataFrame
76
+ file_path = uploaded_file.name # Gradio provides the file path
77
+ logging.info(f"Received file {file_path} for processing.")
78
 
79
  try:
80
  image_files = unzip_and_find_jpgs(file_path)
 
91
  }])
92
  results_df = pd.concat([results_df, new_row], ignore_index=True)
93
  except Exception as e:
94
+ logging.error(f"An error occurred: {str(e)}")
95
  return f"An error occurred: {str(e)}"
96
 
97
+ logging.info("Processing complete. Generating HTML output.")
98
  return results_df.to_html()
99
 
100
  interface = gr.Interface(
 
107
 
108
  if __name__ == "__main__":
109
  interface.launch(debug=True)