File size: 4,176 Bytes
d8e0581
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a68744f
d8e0581
 
 
 
 
a68744f
 
 
 
 
d8e0581
 
 
 
 
 
 
 
a68744f
 
d8e0581
 
 
 
 
 
 
 
a68744f
 
 
 
d8e0581
 
 
 
95c1d69
8a32f4d
53b80a9
 
 
 
 
 
8a32f4d
 
 
 
 
 
 
 
 
 
95c1d69
 
d8e0581
 
95c1d69
a68744f
 
 
d8e0581
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.linear_model import RidgeCV
from sklearn.feature_selection import SelectFromModel
from time import time
from sklearn.feature_selection import SequentialFeatureSelector
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt

def select_features(method,num_features):
    diabetes = load_diabetes()
    X, y = diabetes.data, diabetes.target
    ridge = RidgeCV(alphas=np.logspace(-6, 6, num=5)).fit(X, y)
    feature_names = np.array(diabetes.feature_names)
    if method == 'model':
        importance = np.abs(ridge.coef_)
        tic = time()
        sfm = SelectFromModel(ridge, threshold=-np.inf,max_features=num_features).fit(X, y)
        toc = time()
        selected_features = feature_names[sfm.get_support()]
        if int(num_features) < len(selected_features):
            selected_features = selected_features[:int(num_features)]
        execution_time = toc - tic
        fig, ax = plt.subplots()
        ax.bar(height=importance, x=feature_names)
        ax.set_title("Feature importances via coefficients")
        ax.set_ylabel("Importance coefficient")
        ax.set_xlabel("Features")
    elif method == 'sfs-forward':
        tic_fwd = time()
        sfs_forward = SequentialFeatureSelector(
            ridge, n_features_to_select=int(num_features), direction="forward"
        ).fit(X, y)
        toc_fwd = time()
        selected_features = feature_names[sfs_forward.get_support()]
        execution_time = toc_fwd - tic_fwd
        importance = np.abs(sfs_forward.get_params()['estimator'].coef_)        
        fig = None
    elif method == 'sfs-backward':
        tic_bwd = time()
        sfs_backward = SequentialFeatureSelector(
            ridge, n_features_to_select=int(num_features), direction="backward"
        ).fit(X, y)
        toc_bwd = time()
        selected_features = feature_names[sfs_backward.get_support()]
        execution_time = toc_bwd - tic_bwd
        importance = np.abs(sfs_backward.get_params()['estimator'].coef_)
        fig = None

    return f"Selected the following features: {', '.join(selected_features)} in {execution_time:.3f} seconds", fig

title = "Selecting features with Sequential Feature Selection"
with gr.Blocks(title=title) as demo:
    gr.Markdown(f"## {title}")
    gr.Markdown("""
        This app demonstrates feature selection techniques using model based selection and sequential feature selection.\n\n 
        Model based selection is based on feature importance. Each feature is assigned a score on how much influence they have on the model output. 
        The feature with highest score is considered the most important feature.\n\n
        Sequential feature selection is based on greedy approach. In greedy approach, the feature is added or removed to the selected features at each iteration 
        based on the model performance score.\n\n
        This app uses Ridge estimator and the diabetes dataset from sklearn. Diabetes dataset consist of quantitative measure of diabetes progression and 
        10 following variables obtained from 442 diabetes patients:
        1. Age (age)
        2. Sex (sex)
        3. Body mass index (bmi)
        4. Average blood pressure (bp)
        5. Total serum cholesterol (s1)
        6. Low-density lipoproteins (s2)
        7. High-density lipoproteins (s3)
        8. Total cholesterol / HDL (s4)
        9. Possibly log of serum triglycerides level (s5)
        10. Blood sugar level (s6)\n\n 
        This app is developed based on [scikit-learn example](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_select_from_model_diabetes.html#sphx-glr-auto-examples-feature-selection-plot-select-from-model-diabetes-py)
    """)

    method = gr.Radio(["model", "sfs-forward", "sfs-backward"], label="Method")
    num_features = gr.Slider(minimum=2, maximum=10, step=1, label = "Number of features")
    output = gr.Textbox(label="Selected features")
    plot = gr.Plot(label="Feature importance plot")
    num_features.change(fn=select_features, inputs=[method,num_features], outputs=[output,plot])

demo.launch()