# Original Author: Gael Varoquaux # Gradio Implementation: Lenix Carter # License: BSD 3-Clause or CC-0 import gradio as gr import numpy as np import matplotlib import matplotlib.pyplot as plt import matplotlib.patheffects as PathEffects from sklearn.cluster import AgglomerativeClustering from sklearn.metrics import pairwise_distances np.random.seed(0) matplotlib.use('agg') labels = ("Waveform 1", "Waveform 2", "Waveform 3") colors = ["#f7bd01", "#377eb8", "#f781bf"] n_clusters = 3 def sqr(x): return np.sign(np.cos(x)) def ground_truth_plot(n_features): t = np.pi * np.linspace(0, 1, n_features) X = list() y = list() for i, (phi, a) in enumerate([(0.5, 0.15), (0.5, 0.6), (0.3, 0.2)]): for _ in range(30): phase_noise = 0.01 * np.random.normal() amplitude_noise = 0.04 * np.random.normal() additional_noise = 1 - 2 * np.random.rand(n_features) # Make the noise sparse additional_noise[np.abs(additional_noise) < 0.997] = 0 X.append( 12 * ( (a + amplitude_noise) * (sqr(6 * (t + phi + phase_noise))) + additional_noise ) ) y.append(i) X = np.array(X) y = np.array(y) # Plot the ground-truth labelling gt_plot = plt.figure() plt.axes([0, 0, 1, 1]) for l, color, n in zip(range(n_clusters), colors, labels): lines = plt.plot(X[y == l].T, c=color, alpha=0.5) lines[0].set_label(n) plt.legend(loc="best") plt.axis("tight") plt.axis("off") plt.suptitle("Ground truth", size=20, y=1) return gt_plot, X, y def plot_cluster_waves(metric, X, y): model = AgglomerativeClustering( n_clusters=n_clusters, linkage="average", metric=metric ) model.fit(X) clust_plot = plt.figure() plt.axes([0, 0, 1, 1]) for l, color in zip(np.arange(model.n_clusters), colors): plt.plot(X[model.labels_ == l].T, c=color, alpha=0.5) plt.axis("tight") plt.axis("off") plt.suptitle("AgglomerativeClustering(metric=%s)" % metric, size=20, y=1) return clust_plot def plot_distances(metric, X, y): avg_dist = np.zeros((n_clusters, n_clusters)) dist_plot = plt.figure() for i in range(n_clusters): for j in range(n_clusters): avg_dist[i, j] = pairwise_distances( X[y == i], X[y == j], metric=metric ).mean() avg_dist /= avg_dist.max() for i in range(n_clusters): for j in range(n_clusters): t = plt.text( i, j, "%5.3f" % avg_dist[i, j], verticalalignment="center", horizontalalignment="center", ) t.set_path_effects( [PathEffects.withStroke(linewidth=5, foreground="w", alpha=0.5)] ) plt.imshow(avg_dist, interpolation="nearest", cmap="cividis", vmin=0) plt.xticks(range(n_clusters), labels, rotation=45) plt.yticks(range(n_clusters), labels) plt.colorbar() plt.suptitle("Interclass %s distances" % metric, size=18, y=1) plt.tight_layout() return dist_plot def agg_cluster(n_feats, measure): plt.clf() gt_plt, X, y = ground_truth_plot(n_feats) cluster_waves_plot = plot_cluster_waves(measure, X, y) dist_plot = plot_distances(measure, X, y) return gt_plt, cluster_waves_plot, dist_plot title = "Agglomerative clustering with different metrics" with gr.Blocks() as demo: gr.Markdown(f" # {title}") gr.Markdown( """ This example demonstrates the effect of different metrics on hierarchical clustering. This is based on the example [here](https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_clustering_metrics.html#sphx-glr-auto-examples-cluster-plot-agglomerative-clustering-metrics-py) """ ) with gr.Row(): with gr.Column(): n_feats = gr.Slider(10, 4000, 2000, label="Number of Features") measure = gr.Dropdown(["cosine", "euclidean", "cityblock"], value="cosine") btn = gr.Button(label="Run") gt_graph = gr.Plot(label="Ground Truth Graph") with gr.Row(): dist_plot = gr.Plot(label="Interclass Distances") clust_waves = gr.Plot(label="Agglomerative Clustering") btn.click( fn=agg_cluster, inputs=[n_feats, measure], outputs=[gt_graph, clust_waves, dist_plot] ) if __name__ == '__main__': demo.launch()