mkaramb's picture
Update app.py
56e077e verified
raw
history blame
2 kB
import gradio as gr
import os
import pandas as pd
from google.cloud import documentai_v1 as documentai
from google.cloud.documentai_v1.types import RawDocument
from google.cloud import translate_v2 as translate
import zipfile
import io
# Assuming GOOGLE_APPLICATION_CREDENTIALS is set in your environment
# Set your Google Cloud Document AI processor details here
project_id = "herbaria-ai"
location = "us"
processor_id = "4307b078717a399a"
def translate_text(text, target_language="en"):
translate_client = translate.Client()
result = translate_client.translate(text, target_language=target_language)
return result["translatedText"]
def process_image(file):
file_path = file.name
file.save(file_path) # Save the file so we can open and read it later
extracted_text, translated_text = batch_process_documents(file_path, "image/jpeg")
return extracted_text, translated_text
def batch_process_documents(file_path: str, file_mime_type: str) -> tuple:
opts = documentai.ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")
client = documentai.DocumentProcessorServiceClient(client_options=opts)
with open(file_path, "rb") as file_stream:
raw_document = RawDocument(content=file_stream.read(), mime_type=file_mime_type)
name = client.processor_path(project_id, location, processor_id)
request = documentai.ProcessRequest(name=name, raw_document=raw_document)
result = client.process_document(request=request)
extracted_text = result.document.text
translated_text = translate_text(extracted_text)
return extracted_text, translated_text
iface = gr.Interface(
fn=process_image,
inputs=gr.inputs.File(label="Upload Image File"),
outputs=[
gr.outputs.Textbox(label="Extracted Text"),
gr.outputs.Textbox(label="Translated Text")
]
)
iface.launch()
# def greet(name):
# return "Hello " + name + "!!"
#iface = gr.Interface(fn=greet, inputs="text", outputs="text")
#iface.launch()