Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
# Upload credential json file from default compute service account
|
3 |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
|
4 |
|
|
|
5 |
from google.api_core.client_options import ClientOptions
|
6 |
from google.cloud import documentai_v1 as documentai
|
7 |
from google.cloud.documentai_v1.types import RawDocument
|
@@ -10,8 +11,6 @@ import zipfile
|
|
10 |
import os
|
11 |
import io
|
12 |
import gradio as gr
|
13 |
-
import pandas as pd
|
14 |
-
import tempfile
|
15 |
|
16 |
# Global DataFrame declaration
|
17 |
results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
|
@@ -19,7 +18,7 @@ results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Tex
|
|
19 |
# Set your Google Cloud Document AI processor details here
|
20 |
project_id = "herbaria-ai"
|
21 |
location = "us"
|
22 |
-
processor_id = "
|
23 |
|
24 |
def translate_text(text, target_language="en"):
|
25 |
translate_client = translate.Client()
|
@@ -58,18 +57,15 @@ def unzip_and_find_jpgs(file_path):
|
|
58 |
|
59 |
def process_images(uploaded_file):
|
60 |
global results_df
|
61 |
-
if
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
# Reinitialize the DataFrame every time a new file is uploaded
|
66 |
-
results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
|
67 |
-
|
68 |
-
file_path = uploaded_file.name
|
69 |
try:
|
70 |
image_files = unzip_and_find_jpgs(file_path)
|
|
|
71 |
if not image_files:
|
72 |
-
return "No JPG files found in the zip."
|
73 |
|
74 |
for file_path in image_files:
|
75 |
extracted_text, translated_text = batch_process_documents(file_path, "image/jpeg")
|
@@ -80,26 +76,17 @@ def process_images(uploaded_file):
|
|
80 |
}])
|
81 |
results_df = pd.concat([results_df, new_row], ignore_index=True)
|
82 |
except Exception as e:
|
83 |
-
return f"An error occurred: {str(e)}"
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
gr.Markdown("Upload a ZIP file containing JPEG/JPG images, and the system will extract and translate text from each image.")
|
95 |
-
with gr.Row():
|
96 |
-
file_input = gr.File(label="Upload ZIP File")
|
97 |
-
with gr.Row():
|
98 |
-
html_output = gr.HTML()
|
99 |
-
with gr.Row():
|
100 |
-
file_output = gr.File()
|
101 |
-
|
102 |
-
file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
|
103 |
|
104 |
if __name__ == "__main__":
|
105 |
-
interface.launch(debug=True)
|
|
|
2 |
# Upload credential json file from default compute service account
|
3 |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
|
4 |
|
5 |
+
import pandas as pd
|
6 |
from google.api_core.client_options import ClientOptions
|
7 |
from google.cloud import documentai_v1 as documentai
|
8 |
from google.cloud.documentai_v1.types import RawDocument
|
|
|
11 |
import os
|
12 |
import io
|
13 |
import gradio as gr
|
|
|
|
|
14 |
|
15 |
# Global DataFrame declaration
|
16 |
results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
|
|
|
18 |
# Set your Google Cloud Document AI processor details here
|
19 |
project_id = "herbaria-ai"
|
20 |
location = "us"
|
21 |
+
processor_id = "4307b078717a399a"
|
22 |
|
23 |
def translate_text(text, target_language="en"):
|
24 |
translate_client = translate.Client()
|
|
|
57 |
|
58 |
def process_images(uploaded_file):
|
59 |
global results_df
|
60 |
+
results_df = results_df.iloc[0:0] # Clear the DataFrame if re-running this cell
|
61 |
+
|
62 |
+
file_path = uploaded_file.name # Gradio provides the file path through the .name attribute
|
63 |
+
|
|
|
|
|
|
|
|
|
64 |
try:
|
65 |
image_files = unzip_and_find_jpgs(file_path)
|
66 |
+
|
67 |
if not image_files:
|
68 |
+
return "No JPG files found in the zip."
|
69 |
|
70 |
for file_path in image_files:
|
71 |
extracted_text, translated_text = batch_process_documents(file_path, "image/jpeg")
|
|
|
76 |
}])
|
77 |
results_df = pd.concat([results_df, new_row], ignore_index=True)
|
78 |
except Exception as e:
|
79 |
+
return f"An error occurred: {str(e)}"
|
80 |
+
|
81 |
+
return results_df.to_html()
|
82 |
+
|
83 |
+
interface = gr.Interface(
|
84 |
+
fn=process_images,
|
85 |
+
inputs="file",
|
86 |
+
outputs="html",
|
87 |
+
title="Document AI Translation",
|
88 |
+
description="Upload a ZIP file containing JPEG/JPG images, and the system will extract and translate text from each image."
|
89 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
|
91 |
if __name__ == "__main__":
|
92 |
+
interface.launch(debug=True)
|