import os
# Upload credential json file from default compute service account
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"

from google.api_core.client_options import ClientOptions
from google.cloud import documentai_v1 as documentai
from google.cloud.documentai_v1.types import RawDocument
from google.cloud import translate_v2 as translate
import zipfile
import os
import io
import gradio as gr
import pandas as pd
import tempfile

# Global DataFrame declaration
results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])

# Set your Google Cloud Document AI processor details here
project_id = "herbaria-ai"
location = "us"
processor_id = "de954414712822b3"

def translate_text(text, target_language="en"):
    translate_client = translate.Client()
    result = translate_client.translate(text, target_language=target_language)
    return result["translatedText"]

def batch_process_documents(file_path: str, file_mime_type: str) -> tuple:
    opts = ClientOptions(api_endpoint=f"{location}-documentai.googleapis.com")
    client = documentai.DocumentProcessorServiceClient(client_options=opts)

    with open(file_path, "rb") as file_stream:
        raw_document = RawDocument(content=file_stream.read(), mime_type=file_mime_type)

    name = client.processor_path(project_id, location, processor_id)
    request = documentai.ProcessRequest(name=name, raw_document=raw_document)
    result = client.process_document(request=request)

    extracted_text = result.document.text
    translated_text = translate_text(extracted_text)
    return extracted_text, translated_text

def unzip_and_find_jpgs(file_path):
    extract_path = "extracted_files"
    os.makedirs(extract_path, exist_ok=True)
    jpg_files = []
    with zipfile.ZipFile(file_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)
        for root, dirs, files in os.walk(extract_path):
            if '__MACOSX' in root:
                continue
            for file in files:
                if file.lower().endswith('.jpg'):
                    full_path = os.path.join(root, file)
                    jpg_files.append(full_path)
    return jpg_files

def process_images(uploaded_file):
    global results_df
    if uploaded_file is None:
        results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])  # Clear DataFrame
        return "", ""  # Return empty outputs if no file is uploaded
    else:
        # Reinitialize the DataFrame every time a new file is uploaded
        results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])

    file_path = uploaded_file.name
    try:
        image_files = unzip_and_find_jpgs(file_path)
        if not image_files:
            return "No JPG files found in the zip.", ""

        for file_path in image_files:
            extracted_text, translated_text = batch_process_documents(file_path, "image/jpeg")
            new_row = pd.DataFrame([{
                "Filename": os.path.basename(file_path),
                "Extracted Text": extracted_text,
                "Translated Text": translated_text
            }])
            results_df = pd.concat([results_df, new_row], ignore_index=True)
    except Exception as e:
        return f"An error occurred: {str(e)}", ""

    html_output = results_df.to_html()
    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
    results_df.to_csv(temp_file.name, index=False)
    temp_file.close()  # File is closed but not deleted
    return html_output, temp_file.name

with gr.Blocks() as interface:
    with gr.Row():
        gr.Markdown("# Document AI Translation")
        gr.Markdown("Upload a ZIP file containing JPEG/JPG images, and the system will extract and translate text from each image.")
    with gr.Row():
        file_input = gr.File(label="Upload ZIP File")
    with gr.Row():
        html_output = gr.HTML()
    with gr.Row():
        file_output = gr.File()

    file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])

if __name__ == "__main__":
    interface.launch(debug=True)