File size: 7,244 Bytes
e6770c4
 
471a329
e6770c4
 
 
 
 
 
90b19f5
 
 
 
d2ae5cd
90b19f5
58a97e3
e6770c4
819d283
 
 
e6770c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e69a8dd
e6770c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90b19f5
 
 
58a97e3
90b19f5
 
e69a8dd
90b19f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e69a8dd
90b19f5
 
e69a8dd
90b19f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58a97e3
e69a8dd
97c0880
158315c
 
 
ed46de3
 
 
 
 
97c0880
90b19f5
 
 
 
 
 
 
 
 
e69a8dd
90b19f5
 
 
 
 
 
 
 
e69a8dd
 
 
 
90b19f5
2e7c2d1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
import math
import cv2
from sklearn import datasets
import numpy as np
from matplotlib import pyplot as plt
from scipy import ndimage
from skimage import measure, color, io
from tensorflow.keras.preprocessing import image
from scipy import ndimage
import skimage.io as io
import skimage.transform as trans
import numpy as np
import tensorflow as tf
import gradio as gr
from huggingface_hub.keras_mixin import from_pretrained_keras
from itertools import cycle, islice




#Function that predicts on only 1 sample 
def predict_sample(image):
  prediction = model.predict(image[tf.newaxis, ...])
  prediction[prediction > 0.5 ] = 1
  prediction[prediction !=1] = 0
  result = prediction[0]*255
  return result




def create_input_image(data, visualize=False):
  #Initialize input matrix
  input = np.ones((256,256))

  #Fill matrix with data point values
  for i in range(0,len(data)):
    if math.floor(data[i][0]) < 256 and math.floor(data[i][1]) < 256:
      input[math.floor(data[i][0])][math.floor(data[i][1])] = 0
    elif math.floor(data[i][0]) >= 256:
      input[255][math.floor(data[i][1])] = 0
    elif math.floor(data[i][1]) >= 256:
      input[math.floor(data[i][0])][255] = 0
  
  #Visualize
  if visualize == True:
    plt.imshow(input.T, cmap='gray')
    plt.gca().invert_yaxis()

  return input

model = from_pretrained_keras("tareknaous/unet-visual-clustering")


def get_instances(prediction, data, max_filter_size=1):
  #Adjust format (clusters to be 255 and rest is 0)
  prediction[prediction == 255] = 3
  prediction[prediction == 0] = 4
  prediction[prediction == 3] = 0
  prediction[prediction == 4] = 255

  #Convert to 8-bit image
  prediction = image.img_to_array(prediction, dtype='uint8')
  
  #Get 1 color channel
  cells=prediction[:,:,0]
  #Threshold
  ret1, thresh = cv2.threshold(cells, 0, 255, cv2.THRESH_BINARY)
  #Filter to remove noise
  kernel = np.ones((3,3),np.uint8)
  opening = cv2.morphologyEx(thresh,cv2.MORPH_OPEN,kernel, iterations = 2)

  #Get the background
  background = cv2.dilate(opening,kernel,iterations=5)
  dist_transform = cv2.distanceTransform(opening,cv2.DIST_L2,5)
  ret2, foreground = cv2.threshold(dist_transform,0.04*dist_transform.max(),255,0)
  foreground = np.uint8(foreground)
  unknown = cv2.subtract(background,foreground)

  #Connected Component Analysis
  ret3, markers = cv2.connectedComponents(foreground)
  markers = markers+10
  markers[unknown==255] = 0

  #Watershed
  img = cv2.merge((prediction,prediction,prediction))
  markers = cv2.watershed(img,markers)
  img[markers == -1] = [0,255,255]  

  #Maximum filtering
  markers = ndimage.maximum_filter(markers, size=max_filter_size)
  # plt.imshow(markers.T, cmap='gray')
  # plt.gca().invert_yaxis()

  #Get an RGB colored image
  img2 = color.label2rgb(markers, bg_label=1)
  # plt.imshow(img2)
  # plt.gca().invert_yaxis()

  #Get regions
  regions = measure.regionprops(markers, intensity_image=cells)

  #Get Cluster IDs
  cluster_ids = np.zeros(len(data))

  for i in range(0,len(cluster_ids)):
    row = math.floor(data[i][0])
    column = math.floor(data[i][1])
    if row < 256 and column < 256:
      cluster_ids[i] = markers[row][column] - 10
    elif row >= 256:
      # cluster_ids[i] = markers[255][column]
      cluster_ids[i] = 0
    elif column >= 256:
      # cluster_ids[i] = markers[row][255] 
      cluster_ids[i] = 0

  cluster_ids = cluster_ids.astype('int8')
  cluster_ids[cluster_ids == -11] = 0
    
  return cluster_ids
  
  



def visual_clustering(cluster_type, num_clusters, num_samples, noise, random_state, median_kernel_size, max_kernel_size):

  NUM_CLUSTERS = num_clusters
  CLUSTER_STD = 4 * np.ones(NUM_CLUSTERS)

  if cluster_type == "blobs":
    data = datasets.make_blobs(n_samples=num_samples, centers=NUM_CLUSTERS, random_state=random_state,center_box=(0, 256), cluster_std=CLUSTER_STD)
  
  elif cluster_type == "varied blobs":
    cluster_std = 1.5 * np.ones(NUM_CLUSTERS)
    data = datasets.make_blobs(n_samples=num_samples, centers=NUM_CLUSTERS, cluster_std=cluster_std, random_state=random_state)

  elif cluster_type == "aniso":
    X, y = datasets.make_blobs(n_samples=num_samples, centers=NUM_CLUSTERS, random_state=random_state, center_box=(-30, 30))
    transformation = [[0.8, -0.6], [-0.4, 0.8]]
    X_aniso = np.dot(X, transformation)
    data = (X_aniso, y)

  elif cluster_type == "noisy moons":
    data = datasets.make_moons(n_samples=num_samples, noise=noise)

  elif cluster_type == "noisy circles":
    data = datasets.make_circles(n_samples=num_samples, factor=.01, noise=noise)

  max_x = max(data[0][:, 0])
  min_x = min(data[0][:, 0])
  new_max = 256
  new_min = 0

  data[0][:, 0] = (((data[0][:, 0] - min_x)*(new_max-new_min))/(max_x-min_x))+ new_min

  max_y = max(data[0][:, 1])
  min_y = min(data[0][:, 1])
  new_max_y = 256
  new_min_y = 0

  data[0][:, 1] = (((data[0][:, 1] - min_y)*(new_max_y-new_min_y))/(max_y-min_y))+ new_min_y

  fig1 = plt.figure()
  plt.scatter(data[0][:, 0], data[0][:, 1], s=1, c='black')
  plt.close()

  input = create_input_image(data[0])
  filtered = ndimage.median_filter(input, size=median_kernel_size)
  result = predict_sample(filtered)
  y_km = get_instances(result, data[0], max_filter_size=max_kernel_size)

  colors = np.array(list(islice(cycle(["#000000", '#377eb8', '#ff7f00', '#4daf4a',
                                             '#f781bf', '#a65628', '#984ea3',
                                             '#999999', '#e41a1c', '#dede00' ,'#491010']),
                                      int(max(y_km) + 1))))
  #add black color for outliers (if any)
  colors = np.append(colors, ["#000000"])
  
  fig2 = plt.figure()
  plt.scatter(data[0][:, 0], data[0][:, 1], s=10, color=colors[y_km.astype('int8')])
  plt.close()

  return fig1, fig2
  
title = "Clustering Plotted Data by Image Segmentation"
description = '''
Gradio Demo for Visual Clustering on synthetic datasets.

* **Number of Clusters**:  Set the number of clusters to generate in the dataset (Fixed to only 2 in noisy circles and moons)
* **Number of Samples**: Number of data points in the dataset
* **Noise**: Controls level of noise in noisy circles and moons 
* **Random State**: Allows you to change the location of the generated clusters
* **Denoising Filter Kernel Size**: Size of the denoising filter
* **Max Filter Kernel Size**: Size of the max filter 
'''

iface = gr.Interface(
    
  fn=visual_clustering, 

  inputs=[
          gr.inputs.Dropdown(["blobs", "varied blobs",  "aniso", "noisy moons", "noisy circles" ]),
          gr.inputs.Slider(1, 10, step=1, label='Number of Clusters'),
          gr.inputs.Slider(10000, 1000000, step=10000, label='Number of Samples'),
          gr.inputs.Slider(0.03, 0.1, step=0.01, label='Noise'),
          gr.inputs.Slider(1, 100, step=1, label='Random State'),
          gr.inputs.Slider(1, 100, step=1, label='Denoising Filter Kernel Size'),
          gr.inputs.Slider(1,100, step=1, label='Max Filter Kernel Size')
          ],

  outputs=[
           gr.outputs.Image(type='plot', label='Dataset'),
           gr.outputs.Image(type='plot', label='Clustering Result')
           ],
           
  title=title,
  description=description,
           )
iface.launch(debug=True)