# build_kdtree.py import os import cv2 import numpy as np import pickle import math from sklearn.neighbors import KDTree # ----------------- Constants ----------------- DATASET_FOLDER = "Dataset" # Folder containing your dataset images KD_TILE_SIZE = (50, 50) # Fixed size to which each dataset image will be resized KD_TREE_PATH = "kdtree_dataset.pkl" # Output pickle file # ----------------- Feature Extraction ----------------- def compute_features(image): """ Compute a set of features for an image: - Average Lab color (using a Gaussian-blurred version) - Edge density using Canny edge detection (normalized) - Texture measure using the standard deviation of the grayscale image (normalized) - Average gradient magnitude computed via Sobel operators (normalized) Returns: (avg_lab, avg_edge, avg_texture, avg_grad) """ # Gaussian blur to reduce noise before computing Lab color blurred = cv2.GaussianBlur(image, (5, 5), 0) img_lab = cv2.cvtColor(blurred, cv2.COLOR_RGB2LAB) avg_lab = np.mean(img_lab, axis=(0, 1)) # Convert to grayscale for edge and texture computations gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) # Edge density: apply Canny and normalize edges = cv2.Canny(gray, 100, 200) avg_edge = np.mean(edges) / 255.0 # Texture: standard deviation (normalized) avg_texture = np.std(gray) / 255.0 # Gradient magnitude using Sobel operators grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3) grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3) grad_mag = np.sqrt(grad_x**2 + grad_y**2) avg_grad = np.mean(grad_mag) / 255.0 return avg_lab, avg_edge, avg_texture, avg_grad def build_kdtree(): """ Build a KDTree from dataset images. Each image is resized to KD_TILE_SIZE, its features are computed and then weighted (using weights: 1.0 for Lab channels, 0.5 for edge, texture, and gradient differences). The KDTree along with the list of dataset images is stored in a pickle file. """ # Weights: for the Lab channels, weight = 1.0 (so sqrt(1.0)=1), # for the other features, weight = 0.5 (so multiply by sqrt(0.5)). scale = np.array([1.0, 1.0, 1.0, math.sqrt(0.5), math.sqrt(0.5), math.sqrt(0.5)]) feature_list = [] images_list = [] # Get full paths for images in the dataset folder image_paths = [os.path.join(DATASET_FOLDER, img) for img in os.listdir(DATASET_FOLDER) if img.lower().endswith(('.png', '.jpg', '.jpeg'))] for img_path in image_paths: img = cv2.imread(img_path) if img is None: continue # Resize image to KD_TILE_SIZE and convert BGR -> RGB img = cv2.resize(img, KD_TILE_SIZE) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Compute features for the image avg_lab, avg_edge, avg_texture, avg_grad = compute_features(img) # Concatenate the features into a 6-dimensional vector: raw_feature = np.concatenate([avg_lab, [avg_edge, avg_texture, avg_grad]]) # Apply weighting: multiply each element by the square-root of its weight weighted_feature = raw_feature * scale feature_list.append(weighted_feature) images_list.append(img) if not feature_list: print("No images found in dataset folder!") return features = np.array(feature_list) # Build the KDTree using the weighted features tree = KDTree(features) tree_data = { 'tree': tree, 'images': images_list, 'features': features # optional: may be used for debugging } # Save the KDTree and dataset images to a pickle file with open(KD_TREE_PATH, "wb") as f: pickle.dump(tree_data, f) print(f"KDTree built and saved to {KD_TREE_PATH}. Total images: {len(images_list)}") if __name__ == "__main__": build_kdtree()