Update app.py
Browse files
app.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
-
import os
|
2 |
import pandas as pd
|
3 |
from google.api_core.client_options import ClientOptions
|
4 |
from google.cloud import documentai_v1 as documentai
|
5 |
from google.cloud.documentai_v1.types import RawDocument
|
6 |
import zipfile
|
|
|
7 |
import gradio as gr
|
8 |
import tempfile
|
9 |
import textwrap
|
@@ -32,11 +32,6 @@ project_id = "herbaria-ai"
|
|
32 |
location = "us"
|
33 |
processor_id = "de954414712822b3"
|
34 |
|
35 |
-
# Set your Google Cloud Document AI processor details here
|
36 |
-
project_id = "herbaria-ai"
|
37 |
-
location = "us"
|
38 |
-
processor_id = "de954414712822b3"
|
39 |
-
|
40 |
# helper function for processing gemini responses (which are in markdown)
|
41 |
def to_markdown(text):
|
42 |
text = text.replace('•', ' *')
|
@@ -181,15 +176,14 @@ def batch_process_documents(file_path: str, file_mime_type: str) -> tuple:
|
|
181 |
request = documentai.ProcessRequest(name=name, raw_document=raw_document)
|
182 |
result = client.process_document(request=request)
|
183 |
|
184 |
-
extracted_text = result.document.text
|
185 |
-
|
186 |
-
return extracted_text, translated_text
|
187 |
|
188 |
# file upload
|
189 |
def unzip_and_find_jpgs(file_path):
|
190 |
extract_path = "extracted_files"
|
191 |
if os.path.exists(extract_path):
|
192 |
-
#
|
193 |
for root, dirs, files in os.walk(extract_path, topdown=False):
|
194 |
for name in files:
|
195 |
os.remove(os.path.join(root, name))
|
@@ -270,8 +264,8 @@ def process_images(uploaded_file):
|
|
270 |
|
271 |
with gr.Blocks() as interface:
|
272 |
with gr.Row():
|
273 |
-
gr.Markdown("#
|
274 |
-
gr.Markdown("Upload a ZIP file containing JPEG/JPG images, and the system will
|
275 |
with gr.Row():
|
276 |
file_input = gr.File(label="Upload ZIP File")
|
277 |
with gr.Row():
|
@@ -282,4 +276,4 @@ with gr.Blocks() as interface:
|
|
282 |
file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
|
283 |
|
284 |
if __name__ == "__main__":
|
285 |
-
interface.launch()
|
|
|
|
|
1 |
import pandas as pd
|
2 |
from google.api_core.client_options import ClientOptions
|
3 |
from google.cloud import documentai_v1 as documentai
|
4 |
from google.cloud.documentai_v1.types import RawDocument
|
5 |
import zipfile
|
6 |
+
import os
|
7 |
import gradio as gr
|
8 |
import tempfile
|
9 |
import textwrap
|
|
|
32 |
location = "us"
|
33 |
processor_id = "de954414712822b3"
|
34 |
|
|
|
|
|
|
|
|
|
|
|
35 |
# helper function for processing gemini responses (which are in markdown)
|
36 |
def to_markdown(text):
|
37 |
text = text.replace('•', ' *')
|
|
|
176 |
request = documentai.ProcessRequest(name=name, raw_document=raw_document)
|
177 |
result = client.process_document(request=request)
|
178 |
|
179 |
+
extracted_text = result.document.text.replace('\n', ' ')
|
180 |
+
return extracted_text
|
|
|
181 |
|
182 |
# file upload
|
183 |
def unzip_and_find_jpgs(file_path):
|
184 |
extract_path = "extracted_files"
|
185 |
if os.path.exists(extract_path):
|
186 |
+
# clear dir
|
187 |
for root, dirs, files in os.walk(extract_path, topdown=False):
|
188 |
for name in files:
|
189 |
os.remove(os.path.join(root, name))
|
|
|
264 |
|
265 |
with gr.Blocks() as interface:
|
266 |
with gr.Row():
|
267 |
+
gr.Markdown("# Herbaria Batch Metadata Extraction")
|
268 |
+
gr.Markdown("Upload a ZIP file containing JPEG/JPG images, and the system will translate and extract the text from each image.")
|
269 |
with gr.Row():
|
270 |
file_input = gr.File(label="Upload ZIP File")
|
271 |
with gr.Row():
|
|
|
276 |
file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
|
277 |
|
278 |
if __name__ == "__main__":
|
279 |
+
interface.launch(debug=True)
|