File size: 5,935 Bytes
ac8e656
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b8c493d
 
ac8e656
 
b8c493d
ac8e656
 
 
 
b8c493d
 
ac8e656
 
 
 
 
 
b8c493d
ac8e656
 
 
 
 
 
b8c493d
ac8e656
 
 
 
 
 
 
 
 
 
 
3af96c7
 
ac8e656
 
 
b8c493d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac8e656
 
 
 
43ae0c3
b8c493d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
plt.rcParams['figure.dpi'] = 100

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_gaussian_quantiles
from sklearn.inspection import DecisionBoundaryDisplay

import gradio as gr

#=======================================================
C1, C2 = '#ff0000', '#0000ff'
CMAP = ListedColormap([C1, C2])
GRANULARITY = 0.05
#=======================================================
def get_decision_surface(X, y, model):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xrange = np.arange(x_min, x_max, GRANULARITY)
    yrange = np.arange(y_min, y_max, GRANULARITY)
    xx, yy = np.meshgrid(xrange, yrange)

    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    return xx, yy, Z

def create_plot(x1, y1, x2, y2, cov1, cov2, n1, n2, max_depth, n_estimators):
    #Generate the dataset
    X1, y1 = make_gaussian_quantiles(
        mean=(x1, y1), cov=cov1, n_samples=n1, n_features=2, n_classes=2
        )
    X2, y2 = make_gaussian_quantiles(
        mean=(x2, y2), cov=cov2, n_samples=n2, n_features=2, n_classes=2
        )
    X = np.concatenate((X1, X2))
    y = np.concatenate((y1, -y2 + 1))

    clf =  AdaBoostClassifier(DecisionTreeClassifier(max_depth=max_depth), algorithm="SAMME", n_estimators=n_estimators)

    clf.fit(X, y)

    fig = plt.figure(figsize=(4.5, 6.9))
    ax = fig.add_subplot(211)

    xx, yy, Z = get_decision_surface(X, y, clf)
    ax.contourf(xx, yy, Z, cmap=CMAP, alpha=0.4)

    X1, y1 = X[y==0], y[y==0]
    X2, y2 = X[y==1], y[y==1]

    ax.scatter(X1[:, 0], X1[:, 1], c=C1, edgecolor='k', s=20, label='Class A')
    ax.scatter(X2[:, 0], X2[:, 1], c=C2, edgecolor='k', s=20, label='Class B')
    
    ax.legend()
    ax.set_title(f'AdaBoostClassifier Decision Surface')

    scores = clf.decision_function(X)

    ax = fig.add_subplot(212)
    ax.hist(scores[y==0], bins=100, range=(scores.min(), scores.max()), facecolor=C1, label="Class A", alpha=0.5, edgecolor="k")
    ax.hist(scores[y==1], bins=100, range=(scores.min(), scores.max()), facecolor=C2, label="Class B", alpha=0.5, edgecolor="k")

    ax.set_xlabel('Score'); ax.set_ylabel('Frequency')
    ax.legend()
    ax.set_title('Decision Scores')
    fig.set_tight_layout(True)

    return fig

info = '''
This example fits an [AdaBoost classifier](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html#sklearn.ensemble.AdaBoostClassifier) on two non-linearly separable classes. The samples are generated using two [Gaussian quantiles](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.make_gaussian_quantiles.html#sklearn.datasets.make_gaussian_quantiles) of configurable mean and covariance (see the sliders below).

For the first generated Gaussian, the inner half quantile is assigned to Class A and the outer half quantile is assigned to class B. For the second generated quantile, the opposite assignment happens (inner = Class B, outer = Class A).

A histogram of the decision scores of the AdaBoostClassifer is shown below. Values closer to -1 mean a high confidence that the sample belongs to Class A, and values closer to 1 mean a high confidence that the sample belongs to Class B.

Use the controls below to change the Gaussian distribution parameters, number of generated samples in each Gaussian distribution, and the classifier's max_depth and n_estimators.

Created by [@huabdul](https://huggingface.co/huabdul) based on [Scikit-learn docs](https://scikit-learn.org/stable/auto_examples/ensemble/plot_adaboost_twoclass.html).
'''
with gr.Blocks(analytics_enabled=False) as demo:
    with gr.Row():
        with gr.Column(scale=2):
            gr.Markdown(info)
            with gr.Row():
                with gr.Column(min_width=100):
                    s_x1 = gr.Slider(-10, 10, value=0, step=0.1, label='Mean x1')
                with gr.Column(min_width=100):
                    s_y1 = gr.Slider(-10, 10, value=0, step=0.1, label='Mean y1')
            with gr.Row():
                with gr.Column(min_width=100):
                    s_x2 = gr.Slider(-10, 10, value=2, step=0.1, label='Mean x2')
                with gr.Column(min_width=100):
                    s_y2 = gr.Slider(-10, 10, value=2, step=0.1, label='Mean y2')

            with gr.Row():
                with gr.Column(min_width=100):
                    s_cov1 = gr.Slider(0.01, 5, value=1, step=0.01, label='Covariance 1')
                with gr.Column(min_width=100):
                    s_cov2 = gr.Slider(0.01, 5, value=2, step=0.01, label='Covariance 2')
                
            with gr.Row():
                with gr.Column(min_width=100):
                    s_n_samples1 = gr.Slider(1, 1000, value=200, step=1, label='n_samples 1')
                with gr.Column(min_width=100):
                    s_n_samples2 = gr.Slider(1, 1000, value=300, step=1, label='n_samples 2')

            with gr.Row():
                with gr.Column(min_width=100):
                    s_max_depth = gr.Slider(1, 50, value=1, step=1, label='AdaBoostClassifier max_depth')
                with gr.Column(min_width=100):
                    s_n_estimators = gr.Slider(1, 500, value=300, step=1, label='AdaBoostClassifier n_estimators')
           
            btn = gr.Button('Submit')
        with gr.Column(scale=1.5):
            plot = gr.Plot(show_label=False)

    btn.click(create_plot, inputs=[s_x1, s_y1, s_x2, s_y2, s_cov1, s_cov2, s_n_samples1, s_n_samples2, s_max_depth, s_n_estimators], outputs=[plot])
    demo.load(create_plot, inputs=[s_x1, s_y1, s_x2, s_y2, s_cov1, s_cov2, s_n_samples1, s_n_samples2, s_max_depth, s_n_estimators], outputs=[plot])

demo.launch()
#=======================================================