Spaces:

spark-ds549
/

Chinese-Label-Transcription

Sleeping

App Files Files Community

mkaramb commited on Apr 18, 2024

Commit

e4387b3

verified ·

1 Parent(s): faf19c7

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -24

app.py CHANGED Viewed

@@ -1,40 +1,44 @@
 import pandas as pd
-from google.api_core.client_options import ClientOptions
-from google.cloud import documentai_v1 as documentai
-from google.cloud.documentai_v1.types import RawDocument
-from google.cloud import translate_v2 as translate
-import zipfile
 import os
 import io
 import gradio as gr
-import json
 from google.oauth2 import service_account
-# Upload credential json file from default compute service account
-#os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
 credentials_raw = os.environ.get("google_authentication")
 if not credentials_raw:
     raise EnvironmentError("Google Cloud credentials not found in environment.")
 credentials_json = json.loads(credentials_raw)
 credentials = service_account.Credentials.from_service_account_info(credentials_json)
 # Global DataFrame declaration
 results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
-# Set your Google Cloud Document AI processor details here
 project_id = "herbaria-ai"
 location = "us"
 processor_id = "4307b078717a399a"
 def translate_text(text, target_language="en"):
-    translate_client = translate.Client(credentials=credenentials)
     result = translate_client.translate(text, target_language=target_language)
     return result["translatedText"]
 def batch_process_documents(file_path: str, file_mime_type: str) -> tuple:
     opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com", credentials=credentials)
     client = documentai.DocumentProcessorServiceClient(client_options=opts)
     with open(file_path, "rb") as file_stream:
@@ -46,9 +50,11 @@ def batch_process_documents(file_path: str, file_mime_type: str) -> tuple:
     extracted_text = result.document.text
     translated_text = translate_text(extracted_text)
     return extracted_text, translated_text
 def unzip_and_find_jpgs(file_path):
     extract_path = "extracted_files"
     os.makedirs(extract_path, exist_ok=True)
     jpg_files = []
@@ -61,13 +67,14 @@ def unzip_and_find_jpgs(file_path):
                 if file.lower().endswith('.jpg'):
                     full_path = os.path.join(root, file)
                     jpg_files.append(full_path)
     return jpg_files
 def process_images(uploaded_file):
     global results_df
-    results_df = results_df.iloc[0:0]  # Clear the DataFrame if re-running this cell
-    file_path = uploaded_file.name  # Gradio provides the file path through the .name attribute
     try:
         image_files = unzip_and_find_jpgs(file_path)
@@ -84,8 +91,10 @@ def process_images(uploaded_file):
             }])
             results_df = pd.concat([results_df, new_row], ignore_index=True)
     except Exception as e:
         return f"An error occurred: {str(e)}"
     return results_df.to_html()
 interface = gr.Interface(
@@ -98,13 +107,3 @@ interface = gr.Interface(
 if __name__ == "__main__":
     interface.launch(debug=True)
-# def greet(name):
-   # return "Hello " + name + "!!"
-#iface = gr.Interface(fn=greet, inputs="text", outputs="text")
-#iface.launch()

 import pandas as pd
 import os
+import json
+import zipfile
 import io
 import gradio as gr
+import logging
 from google.oauth2 import service_account
+from google.api_core.client_options import ClientOptions
+from google.cloud import documentai_v1 as documentai
+from google.cloud.documentai_v1.types import RawDocument
+from google.cloud import translate_v2 as translate
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Load credentials from environment variable
 credentials_raw = os.environ.get("google_authentication")
 if not credentials_raw:
     raise EnvironmentError("Google Cloud credentials not found in environment.")
 credentials_json = json.loads(credentials_raw)
 credentials = service_account.Credentials.from_service_account_info(credentials_json)
+logging.info("Loaded Google Cloud credentials successfully.")
 # Global DataFrame declaration
 results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
+# Google Cloud Document AI processor details
 project_id = "herbaria-ai"
 location = "us"
 processor_id = "4307b078717a399a"
 def translate_text(text, target_language="en"):
+    logging.info(f"Translating text to {target_language}.")
+    translate_client = translate.Client(credentials=credentials)
     result = translate_client.translate(text, target_language=target_language)
     return result["translatedText"]
 def batch_process_documents(file_path: str, file_mime_type: str) -> tuple:
+    logging.info(f"Processing document {file_path}.")
     opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com", credentials=credentials)
     client = documentai.DocumentProcessorServiceClient(client_options=opts)
     with open(file_path, "rb") as file_stream:
     extracted_text = result.document.text
     translated_text = translate_text(extracted_text)
+    logging.info(f"Document processed and translated for {file_path}.")
     return extracted_text, translated_text
 def unzip_and_find_jpgs(file_path):
+    logging.info(f"Unzipping file {file_path}.")
     extract_path = "extracted_files"
     os.makedirs(extract_path, exist_ok=True)
     jpg_files = []
                 if file.lower().endswith('.jpg'):
                     full_path = os.path.join(root, file)
                     jpg_files.append(full_path)
+    logging.info(f"Found {len(jpg_files)} JPG files in {file_path}.")
     return jpg_files
 def process_images(uploaded_file):
     global results_df
+    results_df = results_df.iloc[0:0]  # Clear the DataFrame
+    file_path = uploaded_file.name  # Gradio provides the file path
+    logging.info(f"Received file {file_path} for processing.")
     try:
         image_files = unzip_and_find_jpgs(file_path)
             }])
             results_df = pd.concat([results_df, new_row], ignore_index=True)
     except Exception as e:
+        logging.error(f"An error occurred: {str(e)}")
         return f"An error occurred: {str(e)}"
+    logging.info("Processing complete. Generating HTML output.")
     return results_df.to_html()
 interface = gr.Interface(
 if __name__ == "__main__":
     interface.launch(debug=True)