# Gradio Implementation: Lenix Carter # License: BSD 3-Clause or CC-0 import gradio as gr import numpy as np import matplotlib import matplotlib.pyplot as plt from sklearn import datasets from sklearn.model_selection import train_test_split from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler matplotlib.use('agg') def reduce_dimensions(n_neighbors, random_state): # Load Digits dataset X, y = datasets.load_digits(return_X_y=True) # Split into train/test X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.5, stratify=y, random_state=random_state ) dim = len(X[0]) n_classes = len(np.unique(y)) # Reduce dimension to 2 with PCA pca = make_pipeline(StandardScaler(), PCA(n_components=2, random_state=random_state)) # Reduce dimension to 2 with LinearDiscriminantAnalysis lda = make_pipeline(StandardScaler(), LinearDiscriminantAnalysis(n_components=2)) # Reduce dimension to 2 with NeighborhoodComponentAnalysis nca = make_pipeline( StandardScaler(), NeighborhoodComponentsAnalysis(n_components=2, random_state=random_state), ) # Use a nearest neighbor classifier to evaluate the methods knn = KNeighborsClassifier(n_neighbors=n_neighbors) # Make a list of the methods to be compared dim_reduction_methods = [("PCA", pca), ("LDA", lda), ("NCA", nca)] dim_red_graphs = [] for i, (name, model) in enumerate(dim_reduction_methods): new = plt.figure() # Fit the method's model model.fit(X_train, y_train) # Fit a nearest neighbor classifier on the embedded training set knn.fit(model.transform(X_train), y_train) # Compute the nearest neighbor accuracy on the embedded test set acc_knn = knn.score(model.transform(X_test), y_test) # Embed the data set in 2 dimensions using the fitted model X_embedded = model.transform(X) # Plot the projected points and show the evaluation score plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap="Set1") plt.title( "{}, KNN (k={})\nTest accuracy = {:.2f}".format(name, n_neighbors, acc_knn) ) dim_red_graphs.append(new) return dim_red_graphs title = "Dimensionality Reduction with Neighborhood Components Analysis" with gr.Blocks() as demo: gr.Markdown(f" # {title}") gr.Markdown(""" This example performs and displays the results of Principal Component Analysis, Linear Descriminant Analysis, and Neighborhood Component Analysis on the Digits dataset. The result shows that NCA produces visually meaningful clustering. This based on the example [here](https://scikit-learn.org/stable/auto_examples/neighbors/plot_nca_dim_reduction.html#sphx-glr-auto-examples-neighbors-plot-nca-dim-reduction-py) """) with gr.Row(): n_neighbors = gr.Slider(2, 10, 3, step=1, label="Number of Neighbors for KNN") random_state = gr.Slider(0, 100, 0, step=1, label="Random State") with gr.Row(): pca_graph = gr.Plot(label="PCA") lda_graph = gr.Plot(label="LDA") nca_graph = gr.Plot(label="NCA") n_neighbors.change( fn=reduce_dimensions, inputs=[n_neighbors, random_state], outputs=[pca_graph, lda_graph, nca_graph] ) random_state.change( fn=reduce_dimensions, inputs=[n_neighbors, random_state], outputs=[pca_graph, lda_graph, nca_graph] ) if __name__ == '__main__': demo.launch()