File size: 3,667 Bytes
3c6e069 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
# Gradio Implementation: Lenix Carter
# License: BSD 3-Clause or CC-0
import gradio as gr
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
matplotlib.use('agg')
def reduce_dimensions(n_neighbors, random_state):
# Load Digits dataset
X, y = datasets.load_digits(return_X_y=True)
# Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.5, stratify=y, random_state=random_state
)
dim = len(X[0])
n_classes = len(np.unique(y))
# Reduce dimension to 2 with PCA
pca = make_pipeline(StandardScaler(), PCA(n_components=2, random_state=random_state))
# Reduce dimension to 2 with LinearDiscriminantAnalysis
lda = make_pipeline(StandardScaler(), LinearDiscriminantAnalysis(n_components=2))
# Reduce dimension to 2 with NeighborhoodComponentAnalysis
nca = make_pipeline(
StandardScaler(),
NeighborhoodComponentsAnalysis(n_components=2, random_state=random_state),
)
# Use a nearest neighbor classifier to evaluate the methods
knn = KNeighborsClassifier(n_neighbors=n_neighbors)
# Make a list of the methods to be compared
dim_reduction_methods = [("PCA", pca), ("LDA", lda), ("NCA", nca)]
dim_red_graphs = []
for i, (name, model) in enumerate(dim_reduction_methods):
new = plt.figure()
# Fit the method's model
model.fit(X_train, y_train)
# Fit a nearest neighbor classifier on the embedded training set
knn.fit(model.transform(X_train), y_train)
# Compute the nearest neighbor accuracy on the embedded test set
acc_knn = knn.score(model.transform(X_test), y_test)
# Embed the data set in 2 dimensions using the fitted model
X_embedded = model.transform(X)
# Plot the projected points and show the evaluation score
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap="Set1")
plt.title(
"{}, KNN (k={})\nTest accuracy = {:.2f}".format(name, n_neighbors, acc_knn)
)
dim_red_graphs.append(new)
return dim_red_graphs
title = "Dimensionality Reduction with Neighborhood Components Analysis"
with gr.Blocks() as demo:
gr.Markdown(f" # {title}")
gr.Markdown("""
This example performs and displays the results of Principal Component Analysis, Linear Descriminant Analysis, and Neighborhood Component Analysis on the Digits dataset.
The result shows that NCA produces visually meaningful clustering.
This based on the example [here](https://scikit-learn.org/stable/auto_examples/neighbors/plot_nca_dim_reduction.html#sphx-glr-auto-examples-neighbors-plot-nca-dim-reduction-py)
""")
n_neighbors = gr.Slider(2, 10, 3, step=1, label="Number of Neighbors for KNN")
random_state = gr.Slider(0, 100, 0, step=1, label="Random State")
btn = gr.Button(label="Run")
with gr.Row():
pca_graph = gr.Plot(label="PCA")
lda_graph = gr.Plot(label="LDA")
nca_graph = gr.Plot(label="NCA")
btn.click(
fn=reduce_dimensions,
inputs=[n_neighbors, random_state],
outputs=[pca_graph, lda_graph, nca_graph]
)
if __name__ == '__main__':
demo.launch()
|