Spaces:

sklearn-docs
/

An-example-of-K-Means-plus-plus-initialization

Runtime error

App Files Files Community

An-example-of-K-Means-plus-plus-initialization / app.py

LenixC

Converted button to event listeners.

3d2ccd9 over 1 year ago

raw

history blame contribute delete

4.28 kB

	# Gradio Implementation: Lenix Carter
	# License: BSD 3-Clause or CC-0

	import gradio as gr
	import matplotlib.pyplot as plt

	from sklearn.cluster import kmeans_plusplus
	from sklearn.cluster import KMeans
	from sklearn.datasets import make_blobs

	plt.switch_backend("agg")

	def initial_points(X, y_true, n_components, n_clust):
	# Calculate seeds from k-means++
	centers_init, indices = kmeans_plusplus(X, n_clusters=n_clust, random_state=0)

	# Plot init seeds along side sample data
	init_points_plot, ax = plt.subplots()

	for k in range(n_components):
	cluster_data = y_true == k
	plt.scatter(X[cluster_data, 0], X[cluster_data, 1], marker=".", s=10)

	plt.subplots_adjust(top=0.8)
	plt.scatter(centers_init[:, 0], centers_init[:, 1], c="b", s=50)
	plt.title("K-Means++ Initialization")
	plt.xticks([])
	plt.yticks([])
	return init_points_plot

	def one_step(X, n_clust):
	kmeans = KMeans(n_clusters=n_clust, max_iter=1, n_init=1, random_state=0).fit(X)
	y_hat = kmeans.predict(X)

	one_step, ax = plt.subplots()
	plt.scatter(X[:, 0], X[:, 1], marker=".", s=10, c=y_hat)
	centers = kmeans.cluster_centers_
	plt.scatter(centers[:, 0], centers[:, 1], c="b", s=50)

	plt.subplots_adjust(top=0.8)
	plt.title("K-Means After One Step")
	plt.xticks([])
	plt.yticks([])

	return one_step

	def k_means(n_samples, n_components, clst_std, n_clust):
	plt.clf()
	# Generate sample data

	X, y_true = make_blobs(
	n_samples=n_samples, centers=n_components, cluster_std=clst_std, random_state=0
	)
	X = X[:, ::-1]

	plus_plot = initial_points(X, y_true, n_components, n_clust)
	step_plot = one_step(X, n_clust)

	return plus_plot, step_plot

	title = "An example of K-Means++ Initialization"
	with gr.Blocks() as demo:
	gr.Markdown(f" # {title}")
	gr.Markdown("""
	This example shows the ouput of the K-Means++ function.

	K-Means++ is the default initialization function for the K-Means algorithm in scikit learn. K-Means++ serves to find smarter centroids or mean points. This prevents the common drawback of K-Means, where poor initialization points lead to poor results. These points will serve as initialization points for the iterative clustering.

	In this example, we use blobs to demonstrate the algorithm. The blobs are groups of points where the smaller the standard deviation, the tighter they are packed. We can initialize number of blobs and number of clusters separately to demonstrate how the algorithms perform when the optimal number of clusters for the number of blobs was not chosen.

	This is based on the example [here](https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_plusplus.html#sphx-glr-auto-examples-cluster-plot-kmeans-plusplus-py).
	""")
	with gr.Row():
	with gr.Column():
	n_samples = gr.Slider(100, 4000, 1000, step=1,
	label="Number of Samples")
	n_clusters = gr.Slider(1, 10, 4, step=1,
	label="Number of Clusters to Initialize")
	with gr.Column():
	n_components = gr.Slider(1, 10, 4, step=1, label="Number of blobs")
	clst_std = gr.Slider(.1, 1, .6, label="Blob Standard Deviation")


	with gr.Row():
	graph_points = gr.Plot(label="K-Means++ Initial Points")
	init_plus_one = gr.Plot(label="K-Means after one Step")

	n_samples.change(
	fn=k_means,
	inputs=[n_samples, n_components, clst_std, n_clusters],
	outputs=[graph_points, init_plus_one]
	)
	n_clusters.change(
	fn=k_means,
	inputs=[n_samples, n_components, clst_std, n_clusters],
	outputs=[graph_points, init_plus_one]
	)
	n_components.change(
	fn=k_means,
	inputs=[n_samples, n_components, clst_std, n_clusters],
	outputs=[graph_points, init_plus_one]
	)
	clst_std.change(
	fn=k_means,
	inputs=[n_samples, n_components, clst_std, n_clusters],
	outputs=[graph_points, init_plus_one]
	)

	if __name__ == '__main__':
	demo.launch()