vicellst-att

Runtime error

File size: 4,879 Bytes

import pathlib
from constants import MODELS_REPO, MODELS_NAMES

import gradio as gr
import torch


from transformers import (AutoFeatureExtractor, DetrForObjectDetection,
                          YolosForObjectDetection)
from visualization import visualize_attention_map, visualize_prediction
from style import css, description, title


def make_prediction(img, feature_extractor, model):
    inputs = feature_extractor(img, return_tensors="pt")
    outputs = model(**inputs)
    img_size = torch.tensor([tuple(reversed(img.size))])
    processed_outputs = feature_extractor.post_process(outputs, img_size)
    print(outputs.keys())
    # if model type is YOLOS, then return "attentions"
    if "attentions" in outputs.keys():
        return (
            processed_outputs[0],
            outputs["attentions"],
            outputs["attentions"],
            outputs["attentions"],
        )
    return (
        processed_outputs[0],
        outputs["decoder_attentions"],
        outputs["encoder_attentions"],
        outputs["cross_attentions"],
    )


def detect_objects(model_name, image_input, threshold):
    feature_extractor = AutoFeatureExtractor.from_pretrained(MODELS_REPO[model_name])

    if "DETR" in model_name:
        model = DetrForObjectDetection.from_pretrained(MODELS_REPO[model_name])
        model_details = "DETR details"
    elif "YOLOS" in model_name:
        model = YolosForObjectDetection.from_pretrained(MODELS_REPO[model_name])

    (
        processed_outputs,
        decoder_attention_map,
        encoder_attention_map,
        cross_attention_map,
    ) = make_prediction(image_input, feature_extractor, model)

    viz_img = visualize_prediction(
        image_input, processed_outputs, threshold, model.config.id2label
    )
    decoder_attention_map_img = visualize_attention_map(
        image_input, decoder_attention_map
    )
    encoder_attention_map_img = visualize_attention_map(
        image_input, encoder_attention_map
    )
    cross_attention_map_img = visualize_attention_map(image_input, cross_attention_map)

    return (
        viz_img,
        decoder_attention_map_img,
        encoder_attention_map_img,
        cross_attention_map_img,
        model_details
    )


def set_example_image(example: list) -> dict:
    return gr.Image.update(value=example[0])


with gr.Blocks(css=css) as app:
    gr.Markdown(title)
    gr.Markdown(description)

    with gr.Tabs():
        with gr.TabItem("Image upload and detections visualization"):
            with gr.Row():
                with gr.Column():
                    img_input = gr.Image(type="pil")
                with gr.Column():
                    options = gr.Dropdown(
                        value=MODELS_NAMES[0],
                        choices=MODELS_NAMES,
                        label="Select an object detection model",
                        show_label=True,
                    )
                    slider_input = gr.Slider(
                        minimum=0.2, maximum=1, value=0.7, label="Prediction threshold"
                    )
                    detect_button = gr.Button("Detect leukocytes")
            with gr.Row():
                example_images = gr.Dataset(
                    components=[img_input],
                    samples=[
                        [path.as_posix()]
                        for path in sorted(
                            pathlib.Path("cd45rb_test_imgs").rglob("*.png")
                        )
                    ],
                )
            with gr.Row():
                with gr.Column():
                    gr.Markdown(
                        """The selected image with detected bounding boxes by the model"""
                    )
                    img_output_from_upload = gr.Image(shape=(850, 850))
        with gr.TabItem("Attention maps visualization"):
            with gr.Row():
                gr.Markdown("""Encoder attentions""")
                encoder_att_map_output = gr.Image(shape=(850, 850))
            with gr.Row():
                gr.Markdown("""Decoder attentions""")
                decoder_att_map_output = gr.Image(shape=(850, 850))
            with gr.Row():
                gr.Markdown("""Cross attentions""")
                cross_att_map_output = gr.Image(shape=(850, 850))
        with gr.TabItem("Model details"):
            with gr.Row():
                model_details = gr.Markdown(""" """)

    detect_button.click(
        detect_objects,
        inputs=[options, img_input, slider_input],
        outputs=[
            img_output_from_upload,
            decoder_att_map_output,
            encoder_att_map_output,
            cross_att_map_output,
            model_details,
        ],
        queue=True,
    )
    example_images.click(
        fn=set_example_image, inputs=[example_images], outputs=[img_input]
    )

    app.launch(enable_queue=True)