Spaces:

CVPR
/

visual-clustering

Build error

File size: 7,244 Bytes

import math
import cv2
from sklearn import datasets
import numpy as np
from matplotlib import pyplot as plt
from scipy import ndimage
from skimage import measure, color, io
from tensorflow.keras.preprocessing import image
from scipy import ndimage
import skimage.io as io
import skimage.transform as trans
import numpy as np
import tensorflow as tf
import gradio as gr
from huggingface_hub.keras_mixin import from_pretrained_keras
from itertools import cycle, islice




#Function that predicts on only 1 sample 
def predict_sample(image):
  prediction = model.predict(image[tf.newaxis, ...])
  prediction[prediction > 0.5 ] = 1
  prediction[prediction !=1] = 0
  result = prediction[0]*255
  return result




def create_input_image(data, visualize=False):
  #Initialize input matrix
  input = np.ones((256,256))

  #Fill matrix with data point values
  for i in range(0,len(data)):
    if math.floor(data[i][0]) < 256 and math.floor(data[i][1]) < 256:
      input[math.floor(data[i][0])][math.floor(data[i][1])] = 0
    elif math.floor(data[i][0]) >= 256:
      input[255][math.floor(data[i][1])] = 0
    elif math.floor(data[i][1]) >= 256:
      input[math.floor(data[i][0])][255] = 0
  
  #Visualize
  if visualize == True:
    plt.imshow(input.T, cmap='gray')
    plt.gca().invert_yaxis()

  return input

model = from_pretrained_keras("tareknaous/unet-visual-clustering")


def get_instances(prediction, data, max_filter_size=1):
  #Adjust format (clusters to be 255 and rest is 0)
  prediction[prediction == 255] = 3
  prediction[prediction == 0] = 4
  prediction[prediction == 3] = 0
  prediction[prediction == 4] = 255

  #Convert to 8-bit image
  prediction = image.img_to_array(prediction, dtype='uint8')
  
  #Get 1 color channel
  cells=prediction[:,:,0]
  #Threshold
  ret1, thresh = cv2.threshold(cells, 0, 255, cv2.THRESH_BINARY)
  #Filter to remove noise
  kernel = np.ones((3,3),np.uint8)
  opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 2)

  #Get the background
  background = cv2.dilate(opening,kernel,iterations=5)
  dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
  ret2, foreground = cv2.threshold(dist_transform,0.04*dist_transform.max(),255,0)
  foreground = np.uint8(foreground)
  unknown = cv2.subtract(background,foreground)

  #Connected Component Analysis
  ret3, markers = cv2.connectedComponents(foreground)
  markers = markers+10
  markers[unknown==255] = 0

  #Watershed
  img = cv2.merge((prediction,prediction,prediction))
  markers = cv2.watershed(img,markers)
  img[markers == -1] = [0,255,255]  

  #Maximum filtering
  markers = ndimage.maximum_filter(markers, size=max_filter_size)
  # plt.imshow(markers.T, cmap='gray')
  # plt.gca().invert_yaxis()

  #Get an RGB colored image
  img2 = color.label2rgb(markers, bg_label=1)
  # plt.imshow(img2)
  # plt.gca().invert_yaxis()

  #Get regions
  regions = measure.regionprops(markers, intensity_image=cells)

  #Get Cluster IDs
  cluster_ids = np.zeros(len(data))

  for i in range(0,len(cluster_ids)):
    row = math.floor(data[i][0])
    column = math.floor(data[i][1])
    if row < 256 and column < 256:
      cluster_ids[i] = markers[row][column] - 10
    elif row >= 256:
      # cluster_ids[i] = markers[255][column]
      cluster_ids[i] = 0
    elif column >= 256:
      # cluster_ids[i] = markers[row][255] 
      cluster_ids[i] = 0

  cluster_ids = cluster_ids.astype('int8')
  cluster_ids[cluster_ids == -11] = 0
    
  return cluster_ids
  
  



def visual_clustering(cluster_type, num_clusters, num_samples, noise, random_state, median_kernel_size, max_kernel_size):

  NUM_CLUSTERS = num_clusters
  CLUSTER_STD = 4 * np.ones(NUM_CLUSTERS)

  if cluster_type == "blobs":
    data = datasets.make_blobs(n_samples=num_samples, centers=NUM_CLUSTERS, random_state=random_state,center_box=(0, 256), cluster_std=CLUSTER_STD)
  
  elif cluster_type == "varied blobs":
    cluster_std = 1.5 * np.ones(NUM_CLUSTERS)
    data = datasets.make_blobs(n_samples=num_samples, centers=NUM_CLUSTERS, cluster_std=cluster_std, random_state=random_state)

  elif cluster_type == "aniso":
    X, y = datasets.make_blobs(n_samples=num_samples, centers=NUM_CLUSTERS, random_state=random_state, center_box=(-30, 30))
    transformation = [[0.8, -0.6], [-0.4, 0.8]]
    X_aniso = np.dot(X, transformation)
    data = (X_aniso, y)

  elif cluster_type == "noisy moons":
    data = datasets.make_moons(n_samples=num_samples, noise=noise)

  elif cluster_type == "noisy circles":
    data = datasets.make_circles(n_samples=num_samples, factor=.01, noise=noise)

  max_x = max(data[0][:, 0])
  min_x = min(data[0][:, 0])
  new_max = 256
  new_min = 0

  data[0][:, 0] = (((data[0][:, 0] - min_x)*(new_max-new_min))/(max_x-min_x))+ new_min

  max_y = max(data[0][:, 1])
  min_y = min(data[0][:, 1])
  new_max_y = 256
  new_min_y = 0

  data[0][:, 1] = (((data[0][:, 1] - min_y)*(new_max_y-new_min_y))/(max_y-min_y))+ new_min_y

  fig1 = plt.figure()
  plt.scatter(data[0][:, 0], data[0][:, 1], s=1, c='black')
  plt.close()

  input = create_input_image(data[0])
  filtered = ndimage.median_filter(input, size=median_kernel_size)
  result = predict_sample(filtered)
  y_km = get_instances(result, data[0], max_filter_size=max_kernel_size)

  colors = np.array(list(islice(cycle(["#000000", '#377eb8', '#ff7f00', '#4daf4a',
                                             '#f781bf', '#a65628', '#984ea3',
                                             '#999999', '#e41a1c', '#dede00' ,'#491010']),
                                      int(max(y_km) + 1))))
  #add black color for outliers (if any)
  colors = np.append(colors, ["#000000"])
  
  fig2 = plt.figure()
  plt.scatter(data[0][:, 0], data[0][:, 1], s=10, color=colors[y_km.astype('int8')])
  plt.close()

  return fig1, fig2
  
title = "Clustering Plotted Data by Image Segmentation"
description = '''
Gradio Demo for Visual Clustering on synthetic datasets.

* **Number of Clusters**:  Set the number of clusters to generate in the dataset (Fixed to only 2 in noisy circles and moons)
* **Number of Samples**: Number of data points in the dataset
* **Noise**: Controls level of noise in noisy circles and moons 
* **Random State**: Allows you to change the location of the generated clusters
* **Denoising Filter Kernel Size**: Size of the denoising filter
* **Max Filter Kernel Size**: Size of the max filter 
'''

iface = gr.Interface(
    
  fn=visual_clustering, 

  inputs=[
          gr.inputs.Dropdown(["blobs", "varied blobs",  "aniso", "noisy moons", "noisy circles" ]),
          gr.inputs.Slider(1, 10, step=1, label='Number of Clusters'),
          gr.inputs.Slider(10000, 1000000, step=10000, label='Number of Samples'),
          gr.inputs.Slider(0.03, 0.1, step=0.01, label='Noise'),
          gr.inputs.Slider(1, 100, step=1, label='Random State'),
          gr.inputs.Slider(1, 100, step=1, label='Denoising Filter Kernel Size'),
          gr.inputs.Slider(1,100, step=1, label='Max Filter Kernel Size')
          ],

  outputs=[
           gr.outputs.Image(type='plot', label='Dataset'),
           gr.outputs.Image(type='plot', label='Clustering Result')
           ],
           
  title=title,
  description=description,
           )
iface.launch(debug=True)