Nahrawy commited on
Commit
61efba9
·
1 Parent(s): 21779f0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+
7
+ from sklearn import linear_model
8
+ from sklearn.datasets import fetch_openml
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.utils._testing import ignore_warnings
11
+ from sklearn.exceptions import ConvergenceWarning
12
+ from sklearn.utils import shuffle
13
+
14
+
15
+ def load_mnist(classes, n_samples):
16
+ """Load MNIST, select two classes, shuffle and return only n_samples."""
17
+ # Load data from http://openml.org/d/554
18
+ mnist = fetch_openml("mnist_784", version=1, as_frame=False, parser="pandas")
19
+
20
+ # take only two classes for binary classification
21
+ mask = np.in1d(mnist.target, classes)
22
+
23
+ X, y = shuffle(mnist.data[mask], mnist.target[mask], random_state=42)
24
+ X, y = X[:n_samples], y[:n_samples]
25
+ return X, y
26
+
27
+
28
+ @ignore_warnings(category=ConvergenceWarning)
29
+ def fit_and_score(estimator, max_iter, X_train, X_test, y_train, y_test):
30
+ """Fit the estimator on the train set and score it on both sets"""
31
+ estimator.set_params(max_iter=max_iter)
32
+ estimator.set_params(random_state=0)
33
+
34
+ start = time.time()
35
+ estimator.fit(X_train, y_train)
36
+
37
+ fit_time = time.time() - start
38
+ n_iter = estimator.n_iter_
39
+ train_score = estimator.score(X_train, y_train)
40
+ test_score = estimator.score(X_test, y_test)
41
+
42
+ return fit_time, n_iter, train_score, test_score
43
+
44
+ def plot(classes, max_iterations, num_samples, n_iter_no_change, validation_fraction, tol):
45
+ if len(classes) <2:
46
+ raise gr.Error(f'Invalid number of classes (Numbers to be included in training)')
47
+ max_iterations = int(max_iterations)
48
+ num_samples = int(num_samples)
49
+ n_iter_no_change = int(n_iter_no_change)
50
+ validation_fraction = float(validation_fraction)
51
+ tol = float(tol)
52
+ # Define the estimators to compare
53
+ estimator_dict = {
54
+ "No stopping criterion": linear_model.SGDClassifier(n_iter_no_change=n_iter_no_change),
55
+ "Training loss": linear_model.SGDClassifier(
56
+ early_stopping=False, n_iter_no_change=n_iter_no_change, tol=0.1
57
+ ),
58
+ "Validation score": linear_model.SGDClassifier(
59
+ early_stopping=True, n_iter_no_change=n_iter_no_change, tol=tol, validation_fraction=validation_fraction
60
+ ),
61
+ }
62
+
63
+ # Load the dataset
64
+ X, y = load_mnist(classes, n_samples=num_samples)
65
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
66
+
67
+ results = []
68
+ for estimator_name, estimator in estimator_dict.items():
69
+ for max_iter in range(1, max_iterations):
70
+
71
+ fit_time, n_iter, train_score, test_score = fit_and_score(
72
+ estimator, max_iter, X_train, X_test, y_train, y_test
73
+ )
74
+
75
+ results.append(
76
+ (estimator_name, max_iter, fit_time, n_iter, train_score, test_score)
77
+ )
78
+
79
+ # Transform the results in a pandas dataframe for easy plotting
80
+ columns = [
81
+ "Stopping criterion",
82
+ "max_iter",
83
+ "Fit time (sec)",
84
+ "n_iter_",
85
+ "Train score",
86
+ "Test score",
87
+ ]
88
+ results_df = pd.DataFrame(results, columns=columns)
89
+
90
+ # Define what to plot
91
+ lines = "Stopping criterion"
92
+ x_axis = "max_iter"
93
+ styles = ["-.", "--", "-"]
94
+
95
+ # First plot: train and test scores
96
+ fig1, axes1 = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=(12, 4))
97
+ for ax, y_axis in zip(axes1, ["Train score", "Test score"]):
98
+ for style, (criterion, group_df) in zip(styles, results_df.groupby(lines)):
99
+ group_df.plot(x=x_axis, y=y_axis, label=criterion, ax=ax, style=style)
100
+ ax.set_title(y_axis)
101
+ ax.legend(title=lines)
102
+ fig1.tight_layout()
103
+
104
+ # Second plot: n_iter and fit time
105
+ fig2, axes2 = plt.subplots(nrows=1, ncols=2, figsize=(12, 4))
106
+ for ax, y_axis in zip(axes2, ["n_iter_", "Fit time (sec)"]):
107
+ for style, (criterion, group_df) in zip(styles, results_df.groupby(lines)):
108
+ group_df.plot(x=x_axis, y=y_axis, label=criterion, ax=ax, style=style)
109
+ ax.set_title(y_axis)
110
+ ax.legend(title=lines)
111
+ fig2.tight_layout()
112
+
113
+ return fig1, fig2
114
+
115
+ with gr.Blocks() as demo:
116
+ gr.Markdown(info)
117
+ with gr.Row():
118
+ with gr.Column():
119
+ classes = gr.CheckboxGroup(["0", "1", "2","3","4","5","6","7","8","9"], value=['0','8'],label="Classes", info="Numbers to include in the training, for fast and stable training please choose 2 classes only")
120
+ max_iterations = gr.Slider(label="Max Number of Iterations", value="50", minimum=5, maximum=50, step=1, info="Max Number of iterations to run SGD")
121
+ num_samples = gr.Slider(label="Number of Samples", value="10000", minimum=1000, maximum=70000, step=100, info="Number of samples to include in the training")
122
+ n_iter_no_change = gr.Slider(label="Number of Iterations with No Change", value="3", minimum=1, maximum=10, step=1, info="Maximum number of iterations with no score improvement by at leat tol, before stopping")
123
+ validation_fraction = gr.Slider(label="Validation Fraction", value="0.2", minimum=0.05, maximum=0.9, step=0.01, info="Fraction of the training data to be used for validation")
124
+ tol = gr.Textbox(label='Stopping Criterion', value="0.0001",info="The minimum improvement of score to be considered")
125
+ out1 = gr.Plot()
126
+ out2 = gr.Plot()
127
+
128
+ btn = gr.Button("Run")
129
+ btn.click(fn=plot, inputs=[classes, max_iterations, num_samples, n_iter_no_change, validation_fraction, tol], outputs=[out1, out2])
130
+ demo.launch()