mkaramb commited on
Commit
9ac2440
·
verified ·
1 Parent(s): 70266cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -33
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  # Upload credential json file from default compute service account
3
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
4
 
 
5
  from google.api_core.client_options import ClientOptions
6
  from google.cloud import documentai_v1 as documentai
7
  from google.cloud.documentai_v1.types import RawDocument
@@ -10,8 +11,6 @@ import zipfile
10
  import os
11
  import io
12
  import gradio as gr
13
- import pandas as pd
14
- import tempfile
15
 
16
  # Global DataFrame declaration
17
  results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
@@ -19,7 +18,7 @@ results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Tex
19
  # Set your Google Cloud Document AI processor details here
20
  project_id = "herbaria-ai"
21
  location = "us"
22
- processor_id = "de954414712822b3"
23
 
24
  def translate_text(text, target_language="en"):
25
  translate_client = translate.Client()
@@ -58,18 +57,15 @@ def unzip_and_find_jpgs(file_path):
58
 
59
  def process_images(uploaded_file):
60
  global results_df
61
- if uploaded_file is None:
62
- results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"]) # Clear DataFrame
63
- return "", "" # Return empty outputs if no file is uploaded
64
- else:
65
- # Reinitialize the DataFrame every time a new file is uploaded
66
- results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
67
-
68
- file_path = uploaded_file.name
69
  try:
70
  image_files = unzip_and_find_jpgs(file_path)
 
71
  if not image_files:
72
- return "No JPG files found in the zip.", ""
73
 
74
  for file_path in image_files:
75
  extracted_text, translated_text = batch_process_documents(file_path, "image/jpeg")
@@ -80,26 +76,17 @@ def process_images(uploaded_file):
80
  }])
81
  results_df = pd.concat([results_df, new_row], ignore_index=True)
82
  except Exception as e:
83
- return f"An error occurred: {str(e)}", ""
84
-
85
- html_output = results_df.to_html()
86
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
87
- results_df.to_csv(temp_file.name, index=False)
88
- temp_file.close() # File is closed but not deleted
89
- return html_output, temp_file.name
90
-
91
- with gr.Blocks() as interface:
92
- with gr.Row():
93
- gr.Markdown("# Document AI Translation")
94
- gr.Markdown("Upload a ZIP file containing JPEG/JPG images, and the system will extract and translate text from each image.")
95
- with gr.Row():
96
- file_input = gr.File(label="Upload ZIP File")
97
- with gr.Row():
98
- html_output = gr.HTML()
99
- with gr.Row():
100
- file_output = gr.File()
101
-
102
- file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
103
 
104
  if __name__ == "__main__":
105
- interface.launch(debug=True)
 
2
  # Upload credential json file from default compute service account
3
  os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
4
 
5
+ import pandas as pd
6
  from google.api_core.client_options import ClientOptions
7
  from google.cloud import documentai_v1 as documentai
8
  from google.cloud.documentai_v1.types import RawDocument
 
11
  import os
12
  import io
13
  import gradio as gr
 
 
14
 
15
  # Global DataFrame declaration
16
  results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
 
18
  # Set your Google Cloud Document AI processor details here
19
  project_id = "herbaria-ai"
20
  location = "us"
21
+ processor_id = "4307b078717a399a"
22
 
23
  def translate_text(text, target_language="en"):
24
  translate_client = translate.Client()
 
57
 
58
  def process_images(uploaded_file):
59
  global results_df
60
+ results_df = results_df.iloc[0:0] # Clear the DataFrame if re-running this cell
61
+
62
+ file_path = uploaded_file.name # Gradio provides the file path through the .name attribute
63
+
 
 
 
 
64
  try:
65
  image_files = unzip_and_find_jpgs(file_path)
66
+
67
  if not image_files:
68
+ return "No JPG files found in the zip."
69
 
70
  for file_path in image_files:
71
  extracted_text, translated_text = batch_process_documents(file_path, "image/jpeg")
 
76
  }])
77
  results_df = pd.concat([results_df, new_row], ignore_index=True)
78
  except Exception as e:
79
+ return f"An error occurred: {str(e)}"
80
+
81
+ return results_df.to_html()
82
+
83
+ interface = gr.Interface(
84
+ fn=process_images,
85
+ inputs="file",
86
+ outputs="html",
87
+ title="Document AI Translation",
88
+ description="Upload a ZIP file containing JPEG/JPG images, and the system will extract and translate text from each image."
89
+ )
 
 
 
 
 
 
 
 
 
90
 
91
  if __name__ == "__main__":
92
+ interface.launch(debug=True)