pawlo2013's picture
let us fix it
6bae70d
raw
history blame
5.46 kB
import os
import gradio as gr
from PIL import Image
import torch
from transformers import ViTForImageClassification, ViTImageProcessor
from datasets import load_dataset
import matplotlib.pyplot as plt
import numpy as np
import cv2
# Model and processor configuration
model_name_or_path = "google/vit-base-patch16-224-in21k"
processor = ViTImageProcessor.from_pretrained(model_name_or_path)
# Load dataset (adjust dataset_path accordingly)
dataset_path = "pawlo2013/chest_xray"
train_dataset = load_dataset(dataset_path, split="train")
class_names = train_dataset.features["label"].names
# Load ViT model
model = ViTForImageClassification.from_pretrained(
"./models",
num_labels=len(class_names),
id2label={str(i): label for i, label in enumerate(class_names)},
label2id={label: i for i, label in enumerate(class_names)},
)
# Set model to evaluation mode
model.eval()
# Define the classification function
def classify_and_visualize(img, device="cpu", discard_ratio=0.9, head_fusion="mean"):
img = img.convert("RGB")
processed_input = processor(images=img, return_tensors="pt").to(device)
processed_input = processed_input["pixel_values"].to(device)
with torch.no_grad():
outputs = model(processed_input, output_attentions=True)
logits = outputs.logits
probabilities = torch.softmax(logits, dim=1)[0].tolist()
prediction = torch.argmax(logits, dim=-1).item()
predicted_class = class_names[prediction]
result = {class_name: prob for class_name, prob in zip(class_names, probabilities)}
# Generate attention heatmap
heatmap_img = show_final_layer_attention_maps(
outputs, processed_input, device, discard_ratio, head_fusion
)
return {"probabilities": result, "heatmap": heatmap_img}
def format_output(output):
return (output["probabilities"], output["heatmap"])
# Function to load examples from a folder
def load_examples_from_folder(folder_path):
examples = []
for file in os.listdir(folder_path):
if file.endswith((".png", ".jpg", ".jpeg")):
examples.append(Image.open(os.path.join(folder_path, file)))
return examples
# Function to show final layer attention maps
def show_final_layer_attention_maps(
outputs,
processed_input,
device,
discard_ratio=0.6,
head_fusion="max",
only_last_layer=False,
):
with torch.no_grad():
image = processed_input.squeeze(0)
image = image - image.min()
image = image / image.max()
result = torch.eye(outputs.attentions[0].size(-1)).to(device)
if only_last_layer:
attention_list = outputs.attentions[-1].unsqueeze(0).to(device)
else:
attention_list = outputs.attentions
for attention in attention_list:
if head_fusion == "mean":
attention_heads_fused = attention.mean(axis=1)
elif head_fusion == "max":
attention_heads_fused = attention.max(axis=1)[0]
elif head_fusion == "min":
attention_heads_fused = attention.min(axis=1)[0]
flat = attention_heads_fused.view(attention_heads_fused.size(0), -1)
_, indices = flat.topk(int(flat.size(-1) * discard_ratio), -1, False)
indices = indices[indices != 0]
flat[0, indices] = 0
I = torch.eye(attention_heads_fused.size(-1)).to(device)
a = (attention_heads_fused + 1.0 * I) / 2
a = a / a.sum(dim=-1)
result = torch.matmul(a, result)
mask = result[0, 0, 1:]
width = int(mask.size(-1) ** 0.5)
mask = mask.reshape(width, width).cpu().numpy()
mask = mask / np.max(mask)
mask = cv2.resize(mask, (224, 224))
mask = (mask - np.min(mask)) / (np.max(mask) - np.min(mask))
heatmap = plt.cm.jet(mask)[:, :, :3]
showed_img = image.permute(1, 2, 0).detach().cpu().numpy()
showed_img = (showed_img - np.min(showed_img)) / (
np.max(showed_img) - np.min(showed_img)
)
superimposed_img = heatmap * 0.4 + showed_img * 0.6
superimposed_img_pil = Image.fromarray(
(superimposed_img * 255).astype(np.uint8)
)
return superimposed_img_pil
# Define the path to the examples folder
examples_folder = "./examples"
examples = load_examples_from_folder(examples_folder)
# Create the Gradio interface
iface = gr.Interface(
fn=lambda img: format_output(classify_and_visualize(img)),
inputs=gr.Image(type="pil", label="Upload X-Ray Image"),
outputs=[
gr.Label(),
gr.Image(label="Attention Heatmap"),
],
examples=examples,
cache_examples=False,
allow_flagging=False,
concurrency_limit=1,
title="Pneumonia X-Ray 3-Class Classification with Vision Transformer (ViT) using data augmentation",
description="Upload an X-ray image to classify it as normal, viral or bacterial pneumonia. Checkout the model in more details [here](https://huggingface.co/pawlo2013/vit-pneumonia-x-ray_3_class). The examples presented are taken from the test set of [Kermany et al. (2018) dataset.](https://data.mendeley.com/datasets/rscbjbr9sj/2.) The attention heatmap over all layers of the transfomer done by the attention rollout techinique by the implementation of [jacobgil](https://github.com/jacobgil/vit-explain).",
)
# Launch the app
if __name__ == "__main__":
iface.launch(debug=True)