mkaramb commited on
Commit
290f08e
·
verified ·
1 Parent(s): 3801b0c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -13
app.py CHANGED
@@ -1,9 +1,9 @@
1
- import os
2
  import pandas as pd
3
  from google.api_core.client_options import ClientOptions
4
  from google.cloud import documentai_v1 as documentai
5
  from google.cloud.documentai_v1.types import RawDocument
6
  import zipfile
 
7
  import gradio as gr
8
  import tempfile
9
  import textwrap
@@ -32,11 +32,6 @@ project_id = "herbaria-ai"
32
  location = "us"
33
  processor_id = "de954414712822b3"
34
 
35
- # Set your Google Cloud Document AI processor details here
36
- project_id = "herbaria-ai"
37
- location = "us"
38
- processor_id = "de954414712822b3"
39
-
40
  # helper function for processing gemini responses (which are in markdown)
41
  def to_markdown(text):
42
  text = text.replace('•', ' *')
@@ -181,15 +176,14 @@ def batch_process_documents(file_path: str, file_mime_type: str) -> tuple:
181
  request = documentai.ProcessRequest(name=name, raw_document=raw_document)
182
  result = client.process_document(request=request)
183
 
184
- extracted_text = result.document.text
185
- translated_text = translate_text(extracted_text)
186
- return extracted_text, translated_text
187
 
188
  # file upload
189
  def unzip_and_find_jpgs(file_path):
190
  extract_path = "extracted_files"
191
  if os.path.exists(extract_path):
192
- # Remove the directory and its contents to start fresh
193
  for root, dirs, files in os.walk(extract_path, topdown=False):
194
  for name in files:
195
  os.remove(os.path.join(root, name))
@@ -270,8 +264,8 @@ def process_images(uploaded_file):
270
 
271
  with gr.Blocks() as interface:
272
  with gr.Row():
273
- gr.Markdown("# Document AI Translation")
274
- gr.Markdown("Upload a ZIP file containing JPEG/JPG images, and the system will extract and translate text from each image.")
275
  with gr.Row():
276
  file_input = gr.File(label="Upload ZIP File")
277
  with gr.Row():
@@ -282,4 +276,4 @@ with gr.Blocks() as interface:
282
  file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
283
 
284
  if __name__ == "__main__":
285
- interface.launch()
 
 
1
  import pandas as pd
2
  from google.api_core.client_options import ClientOptions
3
  from google.cloud import documentai_v1 as documentai
4
  from google.cloud.documentai_v1.types import RawDocument
5
  import zipfile
6
+ import os
7
  import gradio as gr
8
  import tempfile
9
  import textwrap
 
32
  location = "us"
33
  processor_id = "de954414712822b3"
34
 
 
 
 
 
 
35
  # helper function for processing gemini responses (which are in markdown)
36
  def to_markdown(text):
37
  text = text.replace('•', ' *')
 
176
  request = documentai.ProcessRequest(name=name, raw_document=raw_document)
177
  result = client.process_document(request=request)
178
 
179
+ extracted_text = result.document.text.replace('\n', ' ')
180
+ return extracted_text
 
181
 
182
  # file upload
183
  def unzip_and_find_jpgs(file_path):
184
  extract_path = "extracted_files"
185
  if os.path.exists(extract_path):
186
+ # clear dir
187
  for root, dirs, files in os.walk(extract_path, topdown=False):
188
  for name in files:
189
  os.remove(os.path.join(root, name))
 
264
 
265
  with gr.Blocks() as interface:
266
  with gr.Row():
267
+ gr.Markdown("# Herbaria Batch Metadata Extraction")
268
+ gr.Markdown("Upload a ZIP file containing JPEG/JPG images, and the system will translate and extract the text from each image.")
269
  with gr.Row():
270
  file_input = gr.File(label="Upload ZIP File")
271
  with gr.Row():
 
276
  file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
277
 
278
  if __name__ == "__main__":
279
+ interface.launch(debug=True)