|
import gradio as gr |
|
import os |
|
import pandas as pd |
|
from google.cloud import documentai_v1 as documentai |
|
from google.cloud.documentai_v1.types import RawDocument |
|
from google.cloud import translate_v2 as translate |
|
import zipfile |
|
import io |
|
|
|
|
|
|
|
project_id = "herbaria-ai" |
|
location = "us" |
|
processor_id = "4307b078717a399a" |
|
|
|
def translate_text(text, target_language="en"): |
|
translate_client = translate.Client() |
|
result = translate_client.translate(text, target_language=target_language) |
|
return result["translatedText"] |
|
|
|
def process_image(file): |
|
file_path = file.name |
|
file.save(file_path) |
|
extracted_text, translated_text = batch_process_documents(file_path, "image/jpeg") |
|
return extracted_text, translated_text |
|
|
|
def batch_process_documents(file_path: str, file_mime_type: str) -> tuple: |
|
opts = documentai.ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com") |
|
client = documentai.DocumentProcessorServiceClient(client_options=opts) |
|
with open(file_path, "rb") as file_stream: |
|
raw_document = RawDocument(content=file_stream.read(), mime_type=file_mime_type) |
|
name = client.processor_path(project_id, location, processor_id) |
|
request = documentai.ProcessRequest(name=name, raw_document=raw_document) |
|
result = client.process_document(request=request) |
|
extracted_text = result.document.text |
|
translated_text = translate_text(extracted_text) |
|
return extracted_text, translated_text |
|
|
|
iface = gr.Interface( |
|
fn=process_image, |
|
inputs=gr.inputs.File(label="Upload Image File"), |
|
outputs=[ |
|
gr.outputs.Textbox(label="Extracted Text"), |
|
gr.outputs.Textbox(label="Translated Text") |
|
] |
|
) |
|
|
|
iface.launch() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|