Spaces:

BhumikaMak
/

NeuralVista

Sleeping

App Files Files Community

BhumikaMak commited on Dec 20, 2024

Commit

87360eb

1 Parent(s): 93fea1b

Debug: parsing detections

Browse files

Files changed (4) hide show

app.py +54 -39
test.png +0 -0
test.py +58 -0
yolov8.py +16 -1

app.py CHANGED Viewed

@@ -1,40 +1,55 @@
 import numpy as np
-import cv2
-from PIL import Image
-import torchvision.transforms as transforms
-import gradio as gr
-from yolov5 import xai_yolov5
-from yolov8 import xai_yolov8n
-def process_image(image, yolo_versions=["yolov5"]):
-    image = np.array(image)
-    image = cv2.resize(image, (640, 640))
-    result_images = []
-    for yolo_version in yolo_versions:
-        if yolo_version == "yolov5":
-            result_images.append(xai_yolov5(image))
-        elif yolo_version == "yolov8n":
-            result_images.append(xai_yolov8n(image))
-        else:
-            result_images.append((Image.fromarray(image), f"{yolo_version} not yet implemented."))
-    return result_images
-interface = gr.Interface(
-    fn=process_image,
-    inputs=[
-        gr.Image(type="pil", label="Upload an Image"),
-        gr.CheckboxGroup(
-            choices=["yolov5", "yolov8n", "yolov10"],
-            value=["yolov5"],  # Set default selection to YOLOv5
-            label="Select Model(s)",
-        )
-    ],
-    outputs=gr.Gallery(label="Results", elem_id="gallery", rows=2, height=500),
-    title="Explainable AI for YOLO Models",
-    description="Upload an image to visualize YOLO object detection with Grad-CAM."
-)
-if __name__ == "__main__":
-    interface.launch()

+import torch
+from transformers import BertTokenizer, BertForMaskedLM
+import matplotlib.pyplot as plt
 import numpy as np
+from sklearn.manifold import TSNE
+# Load a pre-trained model and tokenizer
+model_name = 'bert-base-uncased'
+tokenizer = BertTokenizer.from_pretrained(model_name)
+model = BertForMaskedLM.from_pretrained(model_name)
+# Example input text
+text = "The quick brown fox jumps over the lazy dog"
+# Tokenize the input text
+inputs = tokenizer(text, return_tensors="pt")
+input_ids = inputs['input_ids']
+# Get attention weights by running the model
+with torch.no_grad():
+    outputs = model(input_ids, output_attentions=True)
+# Extract the attention weights (size: [num_layers, num_heads, seq_len, seq_len])
+attention_weights = outputs.attentions
+# Select a specific layer and attention head
+layer_idx = 0  # First layer
+head_idx = 0   # First attention head
+# Get the attention matrix for this layer and head
+attention_matrix = attention_weights[layer_idx][0][head_idx].cpu().numpy()
+# Use t-SNE to reduce the dimensionality of the attention matrix (embedding space)
+# Attention matrix shape: [seq_len, seq_len], so we reduce each row (which corresponds to a token's attention distribution)
+tsne = TSNE(n_components=2, random_state=42)
+reduced_attention = tsne.fit_transform(attention_matrix)
+# Plotting the reduced attention embeddings
+fig, ax = plt.subplots(figsize=(10, 10))
+# Plot the reduced attention in 2D
+ax.scatter(reduced_attention[:, 0], reduced_attention[:, 1])
+# Annotate the tokens in the scatter plot
+tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
+for i, token in enumerate(tokens):
+    ax.annotate(token, (reduced_attention[i, 0], reduced_attention[i, 1]), fontsize=12, ha='right')
+# Display the plot
+plt.title(f"t-SNE Visualization of Attention - Layer {layer_idx+1}, Head {head_idx+1}")
+plt.xlabel("t-SNE Dimension 1")
+plt.ylabel("t-SNE Dimension 2")
+plt.grid(True)
+plt.show()
+plt.savefig('test.png')

test.png ADDED Viewed

test.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import torch
+from transformers import BertTokenizer, BertForMaskedLM
+import matplotlib.pyplot as plt
+from sklearn.manifold import TSNE
+import numpy as np
+from mpl_toolkits.mplot3d import Axes3D
+# Load a pre-trained model and tokenizer
+model_name = 'bert-base-uncased'
+tokenizer = BertTokenizer.from_pretrained(model_name)
+model = BertForMaskedLM.from_pretrained(model_name)
+# Example input text
+text = "The quick brown fox jumps over the lazy dog"
+# Tokenize the input text
+inputs = tokenizer(text, return_tensors="pt")
+input_ids = inputs['input_ids']
+# Get attention weights by running the model
+with torch.no_grad():
+    outputs = model(input_ids, output_attentions=True)
+# Extract the attention weights (size: [num_layers, num_heads, seq_len, seq_len])
+attention_weights = outputs.attentions
+# Select a specific layer and attention head
+layer_idx = 0  # First layer
+head_idx = 0   # First attention head
+# Get the attention matrix for this layer and head
+attention_matrix = attention_weights[layer_idx][0][head_idx].cpu().numpy()
+# Use t-SNE to reduce the dimensionality of the attention matrix (embedding space)
+# Attention matrix shape: [seq_len, seq_len], so we reduce each row (which corresponds to a token's attention distribution)
+tsne = TSNE(n_components=3, random_state=42, perplexity=5)  # Set a lower perplexity value
+reduced_attention = tsne.fit_transform(attention_matrix)
+# Plotting the reduced attention embeddings in 3D
+fig = plt.figure(figsize=(12, 10))
+ax = fig.add_subplot(111, projection='3d')
+# Plot the reduced attention in 3D
+ax.scatter(reduced_attention[:, 0], reduced_attention[:, 1], reduced_attention[:, 2])
+# Annotate the tokens in the scatter plot
+tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
+for i, token in enumerate(tokens):
+    ax.text(reduced_attention[i, 0], reduced_attention[i, 1], reduced_attention[i, 2],
+            token, fontsize=12, ha='center')
+# Set plot labels
+ax.set_title(f"3D t-SNE Visualization of Attention - Layer {layer_idx+1}, Head {head_idx+1}")
+ax.set_xlabel("t-SNE Dimension 1")
+ax.set_ylabel("t-SNE Dimension 2")
+ax.set_zlabel("t-SNE Dimension 3")
+plt.show()

yolov8.py CHANGED Viewed

@@ -56,19 +56,34 @@ def xai_yolov8n(image):
     # Check if GPU is available and use it
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model.to(device)
     target_layers = [model.model.model[-2]]  # Grad-CAM target layer
     results = model([image])
     if isinstance(results, list):
         results = results[0]  # Extracting the first result (if list)
     boxes, colors, names = parse_detections([results])  # Ensure results are passed as a list
     detections_img = draw_detections(boxes, colors, names, image.copy())
     img_float = np.float32(image) / 255
     transform = transforms.ToTensor()
     tensor = transform(img_float).unsqueeze(0).to(device)  # Ensure tensor is on the right device
     cam_image, renormalized_cam_image = generate_cam_image(model, target_layers, tensor, image, boxes)
     final_image = np.hstack((image, cam_image, renormalized_cam_image))
     # Return final image and a caption
     caption = "Results using YOLOv8n"
-    return Image.fromarray(final_image), caption

     # Check if GPU is available and use it
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model.to(device)
     target_layers = [model.model.model[-2]]  # Grad-CAM target layer
+    # Process the image through the model
     results = model([image])
+    # If results are a list, extract the first element (detected results)
     if isinstance(results, list):
         results = results[0]  # Extracting the first result (if list)
+    # Ensure that outputs are in tensor form
+    logits = results.pred[0]  # Get the prediction tensor from the results
+    # Parse the detections
     boxes, colors, names = parse_detections([results])  # Ensure results are passed as a list
     detections_img = draw_detections(boxes, colors, names, image.copy())
+    # Prepare image for Grad-CAM
     img_float = np.float32(image) / 255
     transform = transforms.ToTensor()
     tensor = transform(img_float).unsqueeze(0).to(device)  # Ensure tensor is on the right device
+    # Generate CAM images
     cam_image, renormalized_cam_image = generate_cam_image(model, target_layers, tensor, image, boxes)
+    # Combine original image, CAM image, and renormalized CAM image
     final_image = np.hstack((image, cam_image, renormalized_cam_image))
     # Return final image and a caption
     caption = "Results using YOLOv8n"
+    return Image.fromarray(final_image), caption