File size: 3,667 Bytes
3c6e069
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# Gradio Implementation: Lenix Carter
# License: BSD 3-Clause or CC-0

import gradio as gr
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

matplotlib.use('agg')

def reduce_dimensions(n_neighbors, random_state):
    # Load Digits dataset
    X, y = datasets.load_digits(return_X_y=True)
    
    # Split into train/test
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.5, stratify=y, random_state=random_state
    )

    dim = len(X[0])
    n_classes = len(np.unique(y))

    # Reduce dimension to 2 with PCA
    pca = make_pipeline(StandardScaler(), PCA(n_components=2, random_state=random_state))

    # Reduce dimension to 2 with LinearDiscriminantAnalysis
    lda = make_pipeline(StandardScaler(), LinearDiscriminantAnalysis(n_components=2))

    # Reduce dimension to 2 with NeighborhoodComponentAnalysis
    nca = make_pipeline(
        StandardScaler(),
        NeighborhoodComponentsAnalysis(n_components=2, random_state=random_state),
    )

    # Use a nearest neighbor classifier to evaluate the methods
    knn = KNeighborsClassifier(n_neighbors=n_neighbors)

    # Make a list of the methods to be compared
    dim_reduction_methods = [("PCA", pca), ("LDA", lda), ("NCA", nca)]

    dim_red_graphs = []

    for i, (name, model) in enumerate(dim_reduction_methods):
        new = plt.figure()

        # Fit the method's model
        model.fit(X_train, y_train)

        # Fit a nearest neighbor classifier on the embedded training set
        knn.fit(model.transform(X_train), y_train)

        # Compute the nearest neighbor accuracy on the embedded test set
        acc_knn = knn.score(model.transform(X_test), y_test)

        # Embed the data set in 2 dimensions using the fitted model
        X_embedded = model.transform(X)

        # Plot the projected points and show the evaluation score
        plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=y, s=30, cmap="Set1")
        plt.title(
            "{}, KNN (k={})\nTest accuracy = {:.2f}".format(name, n_neighbors, acc_knn)
        )
        dim_red_graphs.append(new)
    return dim_red_graphs

title = "Dimensionality Reduction with Neighborhood Components Analysis"
with gr.Blocks() as demo:
    gr.Markdown(f" # {title}")
    gr.Markdown("""
                This example performs and displays the results of Principal Component Analysis, Linear Descriminant Analysis, and Neighborhood Component Analysis on the Digits dataset. 

                The result shows that NCA produces visually meaningful clustering.

                This based on the example [here](https://scikit-learn.org/stable/auto_examples/neighbors/plot_nca_dim_reduction.html#sphx-glr-auto-examples-neighbors-plot-nca-dim-reduction-py)
                """)
    n_neighbors = gr.Slider(2, 10, 3, step=1, label="Number of Neighbors for KNN")
    random_state = gr.Slider(0, 100, 0, step=1, label="Random State")
    btn = gr.Button(label="Run")
    with gr.Row():
        pca_graph = gr.Plot(label="PCA")
        lda_graph = gr.Plot(label="LDA")
        nca_graph = gr.Plot(label="NCA")
    btn.click(
            fn=reduce_dimensions,
            inputs=[n_neighbors, random_state],
            outputs=[pca_graph, lda_graph, nca_graph]
            )

if __name__ == '__main__':
    demo.launch()