Spaces:
Sleeping
Sleeping
File size: 3,907 Bytes
0404f22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
# build_kdtree.py
import os
import cv2
import numpy as np
import pickle
import math
from sklearn.neighbors import KDTree
# ----------------- Constants -----------------
DATASET_FOLDER = "Dataset" # Folder containing your dataset images
KD_TILE_SIZE = (50, 50) # Fixed size to which each dataset image will be resized
KD_TREE_PATH = "kdtree_dataset.pkl" # Output pickle file
# ----------------- Feature Extraction -----------------
def compute_features(image):
"""
Compute a set of features for an image:
- Average Lab color (using a Gaussian-blurred version)
- Edge density using Canny edge detection (normalized)
- Texture measure using the standard deviation of the grayscale image (normalized)
- Average gradient magnitude computed via Sobel operators (normalized)
Returns: (avg_lab, avg_edge, avg_texture, avg_grad)
"""
# Gaussian blur to reduce noise before computing Lab color
blurred = cv2.GaussianBlur(image, (5, 5), 0)
img_lab = cv2.cvtColor(blurred, cv2.COLOR_RGB2LAB)
avg_lab = np.mean(img_lab, axis=(0, 1))
# Convert to grayscale for edge and texture computations
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
# Edge density: apply Canny and normalize
edges = cv2.Canny(gray, 100, 200)
avg_edge = np.mean(edges) / 255.0
# Texture: standard deviation (normalized)
avg_texture = np.std(gray) / 255.0
# Gradient magnitude using Sobel operators
grad_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
grad_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
grad_mag = np.sqrt(grad_x**2 + grad_y**2)
avg_grad = np.mean(grad_mag) / 255.0
return avg_lab, avg_edge, avg_texture, avg_grad
def build_kdtree():
"""
Build a KDTree from dataset images. Each image is resized to KD_TILE_SIZE,
its features are computed and then weighted (using weights: 1.0 for Lab channels,
0.5 for edge, texture, and gradient differences).
The KDTree along with the list of dataset images is stored in a pickle file.
"""
# Weights: for the Lab channels, weight = 1.0 (so sqrt(1.0)=1),
# for the other features, weight = 0.5 (so multiply by sqrt(0.5)).
scale = np.array([1.0, 1.0, 1.0, math.sqrt(0.5), math.sqrt(0.5), math.sqrt(0.5)])
feature_list = []
images_list = []
# Get full paths for images in the dataset folder
image_paths = [os.path.join(DATASET_FOLDER, img) for img in os.listdir(DATASET_FOLDER)
if img.lower().endswith(('.png', '.jpg', '.jpeg'))]
for img_path in image_paths:
img = cv2.imread(img_path)
if img is None:
continue
# Resize image to KD_TILE_SIZE and convert BGR -> RGB
img = cv2.resize(img, KD_TILE_SIZE)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# Compute features for the image
avg_lab, avg_edge, avg_texture, avg_grad = compute_features(img)
# Concatenate the features into a 6-dimensional vector:
raw_feature = np.concatenate([avg_lab, [avg_edge, avg_texture, avg_grad]])
# Apply weighting: multiply each element by the square-root of its weight
weighted_feature = raw_feature * scale
feature_list.append(weighted_feature)
images_list.append(img)
if not feature_list:
print("No images found in dataset folder!")
return
features = np.array(feature_list)
# Build the KDTree using the weighted features
tree = KDTree(features)
tree_data = {
'tree': tree,
'images': images_list,
'features': features # optional: may be used for debugging
}
# Save the KDTree and dataset images to a pickle file
with open(KD_TREE_PATH, "wb") as f:
pickle.dump(tree_data, f)
print(f"KDTree built and saved to {KD_TREE_PATH}. Total images: {len(images_list)}")
if __name__ == "__main__":
build_kdtree()
|