# Gradio Implementation: Lenix Carter # License: BSD 3-Clause or CC-0 import gradio as gr import matplotlib.pyplot as plt from sklearn.cluster import kmeans_plusplus from sklearn.cluster import KMeans from sklearn.datasets import make_blobs plt.switch_backend("agg") def initial_points(X, y_true, n_components, n_clust): # Calculate seeds from k-means++ centers_init, indices = kmeans_plusplus(X, n_clusters=n_clust, random_state=0) # Plot init seeds along side sample data init_points_plot, ax = plt.subplots() for k in range(n_components): cluster_data = y_true == k plt.scatter(X[cluster_data, 0], X[cluster_data, 1], marker=".", s=10) plt.subplots_adjust(top=0.8) plt.scatter(centers_init[:, 0], centers_init[:, 1], c="b", s=50) plt.title("K-Means++ Initialization") plt.xticks([]) plt.yticks([]) return init_points_plot def one_step(X, n_clust): kmeans = KMeans(n_clusters=n_clust, max_iter=1, n_init=1, random_state=0).fit(X) y_hat = kmeans.predict(X) one_step, ax = plt.subplots() plt.scatter(X[:, 0], X[:, 1], marker=".", s=10, c=y_hat) centers = kmeans.cluster_centers_ plt.scatter(centers[:, 0], centers[:, 1], c="b", s=50) plt.subplots_adjust(top=0.8) plt.title("K-Means After One Step") plt.xticks([]) plt.yticks([]) return one_step def k_means(n_samples, n_components, clst_std, n_clust): plt.clf() # Generate sample data X, y_true = make_blobs( n_samples=n_samples, centers=n_components, cluster_std=clst_std, random_state=0 ) X = X[:, ::-1] plus_plot = initial_points(X, y_true, n_components, n_clust) step_plot = one_step(X, n_clust) return plus_plot, step_plot title = "An example of K-Means++ Initialization" with gr.Blocks() as demo: gr.Markdown(f" # {title}") gr.Markdown(""" This example shows the ouput of the K-Means++ function. K-Means++ is the default initialization function for the K-Means algorithm in scikit learn. K-Means++ serves to find smarter centroids or mean points. This prevents the common drawback of K-Means, where poor initialization points lead to poor results. These points will serve as initialization points for the iterative clustering. In this example, we use blobs to demonstrate the algorithm. The blobs are groups of points where the smaller the standard deviation, the tighter they are packed. We can initialize number of blobs and number of clusters separately to demonstrate how the algorithms perform when the optimal number of clusters for the number of blobs was not chosen. This is based on the example [here](https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_plusplus.html#sphx-glr-auto-examples-cluster-plot-kmeans-plusplus-py). """) with gr.Row(): with gr.Column(): n_samples = gr.Slider(100, 4000, 1000, step=1, label="Number of Samples") n_clusters = gr.Slider(1, 10, 4, step=1, label="Number of Clusters to Initialize") with gr.Column(): n_components = gr.Slider(1, 10, 4, step=1, label="Number of blobs") clst_std = gr.Slider(.1, 1, .6, label="Blob Standard Deviation") with gr.Row(): graph_points = gr.Plot(label="K-Means++ Initial Points") init_plus_one = gr.Plot(label="K-Means after one Step") n_samples.change( fn=k_means, inputs=[n_samples, n_components, clst_std, n_clusters], outputs=[graph_points, init_plus_one] ) n_clusters.change( fn=k_means, inputs=[n_samples, n_components, clst_std, n_clusters], outputs=[graph_points, init_plus_one] ) n_components.change( fn=k_means, inputs=[n_samples, n_components, clst_std, n_clusters], outputs=[graph_points, init_plus_one] ) clst_std.change( fn=k_means, inputs=[n_samples, n_components, clst_std, n_clusters], outputs=[graph_points, init_plus_one] ) if __name__ == '__main__': demo.launch()