# Gradio Implementation: Lenix Carter
# License: BSD 3-Clause or CC-0

import gradio as gr
import matplotlib.pyplot as plt

from sklearn.cluster import kmeans_plusplus
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs

plt.switch_backend("agg")

def initial_points(X, y_true, n_components, n_clust):
    # Calculate seeds from k-means++
    centers_init, indices = kmeans_plusplus(X, n_clusters=n_clust, random_state=0)
    
    # Plot init seeds along side sample data
    init_points_plot, ax = plt.subplots()

    for k in range(n_components):
        cluster_data = y_true == k
        plt.scatter(X[cluster_data, 0], X[cluster_data, 1], marker=".", s=10)

    plt.subplots_adjust(top=0.8)
    plt.scatter(centers_init[:, 0], centers_init[:, 1], c="b", s=50)
    plt.title("K-Means++ Initialization")
    plt.xticks([])
    plt.yticks([])
    return init_points_plot

def one_step(X, n_clust):
    kmeans = KMeans(n_clusters=n_clust, max_iter=1, n_init=1, random_state=0).fit(X)
    y_hat = kmeans.predict(X)

    one_step, ax = plt.subplots()
    plt.scatter(X[:, 0], X[:, 1], marker=".", s=10, c=y_hat)
    centers = kmeans.cluster_centers_
    plt.scatter(centers[:, 0], centers[:, 1], c="b", s=50)

    plt.subplots_adjust(top=0.8)
    plt.title("K-Means After One Step")
    plt.xticks([])
    plt.yticks([])

    return one_step

def k_means(n_samples, n_components, clst_std, n_clust):
    plt.clf()
    # Generate sample data

    X, y_true = make_blobs(
        n_samples=n_samples, centers=n_components, cluster_std=clst_std, random_state=0
    )
    X = X[:, ::-1]

    plus_plot = initial_points(X, y_true, n_components, n_clust)
    step_plot = one_step(X, n_clust)
    
    return plus_plot, step_plot

title = "An example of K-Means++ Initialization"
with gr.Blocks() as demo:
    gr.Markdown(f" # {title}")
    gr.Markdown("""
                This example shows the ouput of the K-Means++ function. 

                K-Means++ is the default initialization function for the K-Means algorithm in scikit learn. K-Means++ serves to find smarter centroids or mean points. This prevents the common drawback of K-Means, where poor initialization points lead to poor results. These points will serve as initialization points for the iterative clustering.

                In this example, we use blobs to demonstrate the algorithm. The blobs are groups of points where the smaller the standard deviation, the tighter they are packed. We can initialize number of blobs and number of clusters separately to demonstrate how the algorithms perform when the optimal number of clusters for the number of blobs was not chosen. 

                This is based on the example [here](https://scikit-learn.org/stable/auto_examples/cluster/plot_kmeans_plusplus.html#sphx-glr-auto-examples-cluster-plot-kmeans-plusplus-py).
                """)
    with gr.Row():
        with gr.Column():
            n_samples = gr.Slider(100, 4000, 1000, step=1, 
                                  label="Number of Samples")
            n_clusters = gr.Slider(1, 10, 4, step=1, 
                                   label="Number of Clusters to Initialize")
        with gr.Column():
            n_components = gr.Slider(1, 10, 4, step=1, label="Number of blobs")
            clst_std = gr.Slider(.1, 1, .6, label="Blob Standard Deviation")
            
    
    with gr.Row():
        graph_points = gr.Plot(label="K-Means++ Initial Points")
        init_plus_one = gr.Plot(label="K-Means after one Step")

    n_samples.change(
            fn=k_means,
            inputs=[n_samples, n_components, clst_std, n_clusters],
            outputs=[graph_points, init_plus_one]
            )
    n_clusters.change(
            fn=k_means,
            inputs=[n_samples, n_components, clst_std, n_clusters],
            outputs=[graph_points, init_plus_one]
            )
    n_components.change(
            fn=k_means,
            inputs=[n_samples, n_components, clst_std, n_clusters],
            outputs=[graph_points, init_plus_one]
            )
    clst_std.change(
            fn=k_means,
            inputs=[n_samples, n_components, clst_std, n_clusters],
            outputs=[graph_points, init_plus_one]
            )

if __name__ == '__main__':
    demo.launch()