File size: 6,792 Bytes
307e9da fc66475 307e9da 9d640ed 307e9da fc66475 9d640ed 50d8155 9d640ed fc66475 9d640ed 50d8155 9d640ed fc66475 9d640ed 50d8155 9d640ed fc66475 307e9da |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 |
import gradio as gr
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
def choose_model(model):
if model == "Logistic Regression":
return LogisticRegression(max_iter=1000, random_state=123)
elif model == "Random Forest":
return RandomForestClassifier(n_estimators=100, random_state=123)
elif model == "Gaussian Naive Bayes":
return GaussianNB()
else:
raise ValueError("Model is not supported.")
def get_proba_plots(
model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight
):
clf1 = choose_model(model_1)
clf2 = choose_model(model_2)
clf3 = choose_model(model_3)
X = np.array([[-1.0, -1.0], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
y = np.array([1, 1, 2, 2])
eclf = VotingClassifier(
estimators=[("clf1", clf1), ("clf2", clf2), ("clf3", clf3)],
voting="soft",
weights=[model_1_weight, model_2_weight, model_3_weight],
)
# predict class probabilities for all classifiers
probas = [c.fit(X, y).predict_proba(X) for c in (clf1, clf2, clf3, eclf)]
# get class probabilities for the first sample in the dataset
class1_1 = [pr[0, 0] for pr in probas]
class2_1 = [pr[0, 1] for pr in probas]
# plotting
N = 4 # number of groups
ind = np.arange(N) # group positions
width = 0.35 # bar width
fig, ax = plt.subplots()
# bars for classifier 1-3
p1 = ax.bar(
ind, np.hstack(([class1_1[:-1], [0]])), width, color="green", edgecolor="k"
)
p2 = ax.bar(
ind + width,
np.hstack(([class2_1[:-1], [0]])),
width,
color="lightgreen",
edgecolor="k",
)
# bars for VotingClassifier
ax.bar(ind, [0, 0, 0, class1_1[-1]], width, color="blue", edgecolor="k")
ax.bar(
ind + width, [0, 0, 0, class2_1[-1]], width, color="steelblue", edgecolor="k"
)
# plot annotations
plt.axvline(2.8, color="k", linestyle="dashed")
ax.set_xticks(ind + width)
ax.set_xticklabels(
[
f"{model_1}\nweight {model_1_weight}",
f"{model_2}\nweight {model_2_weight}",
f"{model_3}\nweight {model_3_weight}",
"VotingClassifier\n(average probabilities)",
],
rotation=40,
ha="right",
)
plt.ylim([0, 1])
plt.title("Class probabilities for sample 1 by different classifiers")
plt.legend([p1[0], p2[0]], ["class 1", "class 2"], loc="upper left")
plt.tight_layout()
plt.show()
return fig
with gr.Blocks() as demo:
gr.Markdown(
"""
# Class probabilities by the `VotingClassifier`
This space shows the effect of the weight of different classifiers when using sklearn's [VotingClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.VotingClassifier.html#sklearn.ensemble.VotingClassifier).
For example, suppose you set the weights as in the table below, and the models have the following predicted probabilities:
| | Weights | Predicted Probabilities |
|---------|:-------:|:----------------:|
| Model 1 | 1 | 0.5 |
| Model 2 | 2 | 0.8 |
| Model 3 | 5 | 0.9 |
The predicted probability by the `VotingClassifier` will be $(1*0.5 + 2*0.8 + 5*0.9) / (1 + 2 + 5)$
You can experiment with different model types and weights and see their effect on the VotingClassifier's prediction.
This space is based on [sklearn’s original demo](https://scikit-learn.org/stable/auto_examples/ensemble/plot_voting_probas.html#sphx-glr-auto-examples-ensemble-plot-voting-probas-py).
"""
)
with gr.Row():
with gr.Column(scale=3):
with gr.Row():
model_1 = gr.Dropdown(
[
"Logistic Regression",
"Random Forest",
"Gaussian Naive Bayes",
],
label="Model 1",
value="Logistic Regression",
)
model_1_weight = gr.Slider(
value=1, label="Model 1 Weight", minimum=0, maximum=10, step=1
)
with gr.Row():
model_2 = gr.Dropdown(
[
"Logistic Regression",
"Random Forest",
"Gaussian Naive Bayes",
],
label="Model 2",
value="Random Forest",
)
model_2_weight = gr.Slider(
value=1, label="Model 2 Weight", minimum=0, maximum=10, step=1
)
with gr.Row():
model_3 = gr.Dropdown(
[
"Logistic Regression",
"Random Forest",
"Gaussian Naive Bayes",
],
label="Model 3",
value="Gaussian Naive Bayes",
)
model_3_weight = gr.Slider(
value=5, label="Model 3 Weight", minimum=0, maximum=10, step=1
)
with gr.Column(scale=4):
proba_plots = gr.Plot()
model_1.change(
get_proba_plots,
[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
proba_plots,
queue=False,
)
model_2.change(
get_proba_plots,
[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
proba_plots,
queue=False,
)
model_3.change(
get_proba_plots,
[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
proba_plots,
queue=False,
)
model_1_weight.change(
get_proba_plots,
[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
proba_plots,
queue=False,
)
model_2_weight.change(
get_proba_plots,
[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
proba_plots,
queue=False,
)
model_3_weight.change(
get_proba_plots,
[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
proba_plots,
queue=False,
)
demo.load(
get_proba_plots,
[model_1, model_2, model_3, model_1_weight, model_2_weight, model_3_weight],
proba_plots,
queue=False,
)
if __name__ == "__main__":
demo.launch()
|