import pathlib
from constants import MODELS_REPO, MODELS_NAMES

import gradio as gr
import torch

from transformers import AutoFeatureExtractor, DetrForObjectDetection
from visualization import visualize_attention_map, visualize_prediction
from style import css, description, title

from PIL import Image


def make_prediction(img, feature_extractor, model):
    inputs = feature_extractor(img, return_tensors="pt")
    outputs = model(**inputs)
    img_size = torch.tensor([tuple(reversed(img.size))])
    processed_outputs = feature_extractor.post_process(outputs, img_size)
    print(outputs.keys())
    return (
        processed_outputs[0],
        outputs["decoder_attentions"],
        outputs["encoder_attentions"],
        # outputs["cross_attentions"],
    )


def detect_objects(model_name, image_input, threshold, display_mask=False, img_input_mask=None):
    feature_extractor = AutoFeatureExtractor.from_pretrained(MODELS_REPO[model_name])

    if "DETR" in model_name:
        model = DetrForObjectDetection.from_pretrained(MODELS_REPO[model_name])
        model_details = "DETR details"

    (
        processed_outputs,
        decoder_attention_map,
        encoder_attention_map,
        # cross_attention_map,
    ) = make_prediction(image_input, feature_extractor, model)

    viz_img = visualize_prediction(
        pil_img=image_input,
        output_dict=processed_outputs,
        threshold=threshold,
        id2label=model.config.id2label,
        display_mask=display_mask,
        mask=img_input_mask
    )
    decoder_attention_map_img = visualize_attention_map(
        image_input, decoder_attention_map
    )
    encoder_attention_map_img = visualize_attention_map(
        image_input, encoder_attention_map
    )
    # cross_attention_map_img = visualize_attention_map(image_input, cross_attention_map)

    return (
        viz_img,
        decoder_attention_map_img,
        encoder_attention_map_img,
        # cross_attention_map_img,
        model_details
    )


def set_example_image(example: list):
    print(f"Set example image to: {example[0]}")
    print(f"Set example image mask to: {example[1]}")
    return gr.Image.update(value=example[0]), gr.Image.update(value=example[1])


with gr.Blocks(css=css) as app:
    gr.Markdown(title)

    with gr.Tabs():
        with gr.TabItem("Image upload and detections visualization"):
            with gr.Row():
                with gr.Column():
                    with gr.Row():
                        img_input = gr.Image(type="pil")
                    img_input_mask = gr.Image(type="pil", visible=False)
                    with gr.Row():
                        example_images = gr.Dataset(
                            components=[img_input, img_input_mask],
                            samples=[
                                [path.as_posix(), path.as_posix().replace("_HE", "_mask")]
                                for path in sorted(
                                    pathlib.Path("cd45rb_test_imgs").rglob("*_HE.png")
                                )
                            ],
                            samples_per_page=2,
                        )
                with gr.Column():
                    with gr.Row():
                        options = gr.Dropdown(
                            value=MODELS_NAMES[0],
                            choices=MODELS_NAMES,
                            label="Select an object detection model",
                            show_label=True,
                        )
                    with gr.Row():
                        slider_input = gr.Slider(
                            minimum=0.2, maximum=1, value=0.7, label="Prediction threshold"
                        )
                    with gr.Row():
                        display_mask = gr.Checkbox(
                            label="Display masks", default=False
                        )
                    with gr.Row():
                        detect_button = gr.Button("Detect leukocytes")
            with gr.Row():
                with gr.Column():
                    gr.Markdown(
                        """The selected image with detected bounding boxes by the model"""
                    )
                    img_output_from_upload = gr.Image(shape=(800, 800))
        with gr.TabItem("Attentions visualization"):
            gr.Markdown("""Encoder attentions""")
            with gr.Row():
                encoder_att_map_output = gr.Image(shape=(850, 850))
            gr.Markdown("""Decoder attentions""")
            with gr.Row():
                decoder_att_map_output = gr.Image(shape=(850, 850))
            # gr.Markdown("""Cross attentions""")
            # with gr.Row():
            #    cross_att_map_output = gr.Image(shape=(850, 850))
        with gr.TabItem("Model details"):
            with gr.Row():
                model_details = gr.Markdown(""" """)
        with gr.TabItem("Dataset details"):
            with gr.Row():
                gr.Markdown(description)

    detect_button.click(
        detect_objects,
        inputs=[options, img_input, slider_input, display_mask, img_input_mask],
        outputs=[
            img_output_from_upload,
            decoder_att_map_output,
            encoder_att_map_output,
            # cross_att_map_output,
            model_details,
        ],
        queue=True,
    )
    example_images.click(
        fn=set_example_image, inputs=[example_images], outputs=[img_input, img_input_mask],
        show_progress=True
    )

    app.launch(enable_queue=True)