Spaces:

spark-ds549
/

Chinese-Label-Transcription

Sleeping

App Files Files Community

mkaramb commited on May 7, 2024

Commit

290f08e

verified ·

1 Parent(s): 3801b0c

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -13

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
-import os
 import pandas as pd
 from google.api_core.client_options import ClientOptions
 from google.cloud import documentai_v1 as documentai
 from google.cloud.documentai_v1.types import RawDocument
 import zipfile
 import gradio as gr
 import tempfile
 import textwrap
@@ -32,11 +32,6 @@ project_id = "herbaria-ai"
 location = "us"
 processor_id = "de954414712822b3"
-# Set your Google Cloud Document AI processor details here
-project_id = "herbaria-ai"
-location = "us"
-processor_id = "de954414712822b3"
 # helper function for processing gemini responses (which are in markdown)
 def to_markdown(text):
     text = text.replace('•', '  *')
@@ -181,15 +176,14 @@ def batch_process_documents(file_path: str, file_mime_type: str) -> tuple:
     request = documentai.ProcessRequest(name=name, raw_document=raw_document)
     result = client.process_document(request=request)
-    extracted_text = result.document.text
-    translated_text = translate_text(extracted_text)
-    return extracted_text, translated_text
 # file upload
 def unzip_and_find_jpgs(file_path):
     extract_path = "extracted_files"
     if os.path.exists(extract_path):
-        # Remove the directory and its contents to start fresh
         for root, dirs, files in os.walk(extract_path, topdown=False):
             for name in files:
                 os.remove(os.path.join(root, name))
@@ -270,8 +264,8 @@ def process_images(uploaded_file):
 with gr.Blocks() as interface:
     with gr.Row():
-        gr.Markdown("# Document AI Translation")
-        gr.Markdown("Upload a ZIP file containing JPEG/JPG images, and the system will extract and translate text from each image.")
     with gr.Row():
         file_input = gr.File(label="Upload ZIP File")
     with gr.Row():
@@ -282,4 +276,4 @@ with gr.Blocks() as interface:
     file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
 if __name__ == "__main__":
-    interface.launch()

 import pandas as pd
 from google.api_core.client_options import ClientOptions
 from google.cloud import documentai_v1 as documentai
 from google.cloud.documentai_v1.types import RawDocument
 import zipfile
+import os
 import gradio as gr
 import tempfile
 import textwrap
 location = "us"
 processor_id = "de954414712822b3"
 # helper function for processing gemini responses (which are in markdown)
 def to_markdown(text):
     text = text.replace('•', '  *')
     request = documentai.ProcessRequest(name=name, raw_document=raw_document)
     result = client.process_document(request=request)
+    extracted_text = result.document.text.replace('\n', ' ')
+    return extracted_text
 # file upload
 def unzip_and_find_jpgs(file_path):
     extract_path = "extracted_files"
     if os.path.exists(extract_path):
+        # clear dir
         for root, dirs, files in os.walk(extract_path, topdown=False):
             for name in files:
                 os.remove(os.path.join(root, name))
 with gr.Blocks() as interface:
     with gr.Row():
+        gr.Markdown("# Herbaria Batch Metadata Extraction")
+        gr.Markdown("Upload a ZIP file containing JPEG/JPG images, and the system will translate and extract the text from each image.")
     with gr.Row():
         file_input = gr.File(label="Upload ZIP File")
     with gr.Row():
     file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
 if __name__ == "__main__":
+    interface.launch(debug=True)