|
import gradio as gr |
|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
from sklearn.cluster import MeanShift, estimate_bandwidth |
|
from sklearn.datasets import make_blobs |
|
|
|
|
|
def get_clusters_plot(n_blobs, quantile, cluster_std): |
|
X, _, centers = make_blobs( |
|
n_samples=10000, cluster_std=cluster_std, centers=n_blobs, return_centers=True |
|
) |
|
|
|
bandwidth = estimate_bandwidth(X, quantile=quantile, n_samples=500) |
|
|
|
ms = MeanShift(bandwidth=bandwidth, bin_seeding=True) |
|
ms.fit(X) |
|
labels = ms.labels_ |
|
cluster_centers = ms.cluster_centers_ |
|
|
|
labels_unique = np.unique(labels) |
|
n_clusters_ = len(labels_unique) |
|
|
|
fig = plt.figure() |
|
|
|
for k in range(n_clusters_): |
|
my_members = labels == k |
|
cluster_center = cluster_centers[k] |
|
plt.scatter(X[my_members, 0], X[my_members, 1]) |
|
plt.plot( |
|
cluster_center[0], |
|
cluster_center[1], |
|
"x", |
|
markeredgecolor="k", |
|
markersize=14, |
|
) |
|
plt.xlabel("Feature 1") |
|
plt.ylabel("Feature 2") |
|
|
|
plt.title(f"Estimated number of clusters: {n_clusters_}") |
|
|
|
if len(centers) != n_clusters_: |
|
message = ( |
|
'<p style="text-align: center;">' |
|
+ f"The number of estimated clusters ({n_clusters_})" |
|
+ f" differs from the true number of clusters ({n_blobs})." |
|
+ " Try changing the `Quantile` parameter.</p>" |
|
) |
|
else: |
|
message = ( |
|
'<p style="text-align: center;">' |
|
+ f"The number of estimated clusters ({n_clusters_})" |
|
+ f" matches the true number of clusters ({n_blobs})!</p>" |
|
) |
|
return fig, message |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown( |
|
""" |
|
# Mean Shift Clustering |
|
|
|
This space shows how to use the [Mean Shift Clustering](https://scikit-learn.org/stable/modules/generated/sklearn.cluster.MeanShift.html) algorithm to cluster 2D data points. You can change the parameters using the sliders and see how the model performs. |
|
|
|
This space is based on [sklearn's original demo](https://scikit-learn.org/stable/auto_examples/cluster/plot_mean_shift.html#sphx-glr-auto-examples-cluster-plot-mean-shift-py). |
|
""" |
|
) |
|
with gr.Row(): |
|
with gr.Column(scale=1): |
|
n_blobs = gr.Slider( |
|
minimum=2, |
|
maximum=10, |
|
label="Number of clusters in the data", |
|
step=1, |
|
value=3, |
|
) |
|
quantile = gr.Slider( |
|
minimum=0, |
|
maximum=1, |
|
step=0.05, |
|
value=0.2, |
|
label="Quantile", |
|
info="Used to determine clustering's bandwidth.", |
|
) |
|
cluster_std = gr.Slider( |
|
minimum=0.1, |
|
maximum=1, |
|
label="Clusters' standard deviation", |
|
step=0.1, |
|
value=0.6, |
|
) |
|
with gr.Column(scale=4): |
|
clusters_plots = gr.Plot(label="Clusters' Plot") |
|
message = gr.HTML() |
|
|
|
n_blobs.change( |
|
get_clusters_plot, |
|
[n_blobs, quantile, cluster_std], |
|
[clusters_plots, message], |
|
queue=False, |
|
) |
|
quantile.change( |
|
get_clusters_plot, |
|
[n_blobs, quantile, cluster_std], |
|
[clusters_plots, message], |
|
queue=False, |
|
) |
|
cluster_std.change( |
|
get_clusters_plot, |
|
[n_blobs, quantile, cluster_std], |
|
[clusters_plots, message], |
|
queue=False, |
|
) |
|
demo.load( |
|
get_clusters_plot, |
|
[n_blobs, quantile, cluster_std], |
|
[clusters_plots, message], |
|
queue=False, |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|