# build_kdtree.py

import os
import cv2
import numpy as np
import pickle
import math
from sklearn.neighbors import KDTree

# ----------------- Constants -----------------
DATASET_FOLDER = "Dataset"     # Folder containing your dataset images
KD_TILE_SIZE = (50, 50)        # Fixed size to which each dataset image will be resized
KD_TREE_PATH = "kdtree_dataset.pkl"  # Output pickle file

# ----------------- Feature Extraction -----------------
def compute_features(image):
    """
    Compute a set of features for an image:
      - Average Lab color (using a Gaussian-blurred version)
      - Edge density using Canny edge detection (normalized)
      - Texture measure using the standard deviation of the grayscale image (normalized)
      - Average gradient magnitude computed via Sobel operators (normalized)
    Returns: (avg_lab, avg_edge, avg_texture, avg_grad)
    """
    # Gaussian blur to reduce noise before computing Lab color
    blurred = cv2.GaussianBlur(image, (5, 5), 0)
    img_lab = cv2.cvtColor(blurred, cv2.COLOR_RGB2LAB)
    avg_lab = np.mean(img_lab, axis=(0, 1))

    # Convert to grayscale for edge and texture computations
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)

    # Edge density: apply Canny and normalize
    edges = cv2.Canny(gray, 100, 200)
    avg_edge = np.mean(edges) / 255.0

    # Texture: standard deviation (normalized)
    avg_texture = np.std(gray) / 255.0

    # Gradient magnitude using Sobel operators
    grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    grad_mag = np.sqrt(grad_x**2 + grad_y**2)
    avg_grad = np.mean(grad_mag) / 255.0

    return avg_lab, avg_edge, avg_texture, avg_grad

def build_kdtree():
    """
    Build a KDTree from dataset images. Each image is resized to KD_TILE_SIZE,
    its features are computed and then weighted (using weights: 1.0 for Lab channels,
    0.5 for edge, texture, and gradient differences).
    The KDTree along with the list of dataset images is stored in a pickle file.
    """
    # Weights: for the Lab channels, weight = 1.0 (so sqrt(1.0)=1),
    # for the other features, weight = 0.5 (so multiply by sqrt(0.5)).
    scale = np.array([1.0, 1.0, 1.0, math.sqrt(0.5), math.sqrt(0.5), math.sqrt(0.5)])
    
    feature_list = []
    images_list = []

    # Get full paths for images in the dataset folder
    image_paths = [os.path.join(DATASET_FOLDER, img) for img in os.listdir(DATASET_FOLDER)
                   if img.lower().endswith(('.png', '.jpg', '.jpeg'))]

    for img_path in image_paths:
        img = cv2.imread(img_path)
        if img is None:
            continue
        # Resize image to KD_TILE_SIZE and convert BGR -> RGB
        img = cv2.resize(img, KD_TILE_SIZE)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Compute features for the image
        avg_lab, avg_edge, avg_texture, avg_grad = compute_features(img)
        # Concatenate the features into a 6-dimensional vector:
        raw_feature = np.concatenate([avg_lab, [avg_edge, avg_texture, avg_grad]])
        # Apply weighting: multiply each element by the square-root of its weight
        weighted_feature = raw_feature * scale
        feature_list.append(weighted_feature)
        images_list.append(img)

    if not feature_list:
        print("No images found in dataset folder!")
        return

    features = np.array(feature_list)
    # Build the KDTree using the weighted features
    tree = KDTree(features)

    tree_data = {
        'tree': tree,
        'images': images_list,
        'features': features  # optional: may be used for debugging
    }

    # Save the KDTree and dataset images to a pickle file
    with open(KD_TREE_PATH, "wb") as f:
        pickle.dump(tree_data, f)

    print(f"KDTree built and saved to {KD_TREE_PATH}. Total images: {len(images_list)}")

if __name__ == "__main__":
    build_kdtree()