Spaces:

sklearn-docs
/

model-feature-selection

Sleeping

App Files Files Community

model-feature-selection / app.py

haizad

fix description formating

8a32f4d almost 2 years ago

raw

history blame

3.72 kB

	import gradio as gr
	import numpy as np
	from sklearn.datasets import load_diabetes
	from sklearn.linear_model import RidgeCV
	from sklearn.feature_selection import SelectFromModel
	from time import time
	from sklearn.feature_selection import SequentialFeatureSelector
	import matplotlib
	matplotlib.use("Agg")
	import matplotlib.pyplot as plt

	def select_features(method,num_features):
	diabetes = load_diabetes()
	X, y = diabetes.data, diabetes.target
	ridge = RidgeCV(alphas=np.logspace(-6, 6, num=5)).fit(X, y)
	feature_names = np.array(diabetes.feature_names)
	if method == 'model':
	importance = np.abs(ridge.coef_)
	threshold = np.sort(importance)[-3] + 0.01
	tic = time()
	sfm = SelectFromModel(ridge, threshold=threshold).fit(X, y)
	toc = time()
	selected_features = feature_names[sfm.get_support()]
	if int(num_features) < len(selected_features):
	selected_features = selected_features[:int(num_features)]
	execution_time = toc - tic
	elif method == 'sfs-forward':
	tic_fwd = time()
	sfs_forward = SequentialFeatureSelector(
	ridge, n_features_to_select=int(num_features), direction="forward"
	).fit(X, y)
	toc_fwd = time()
	selected_features = feature_names[sfs_forward.get_support()]
	execution_time = toc_fwd - tic_fwd
	elif method == 'sfs-backward':
	tic_bwd = time()
	sfs_backward = SequentialFeatureSelector(
	ridge, n_features_to_select=int(num_features), direction="backward"
	).fit(X, y)
	toc_bwd = time()
	selected_features = feature_names[sfs_backward.get_support()]
	execution_time = toc_bwd - tic_bwd
	return f"Selected the following features: {','.join(selected_features)} in {execution_time:.3f} seconds"

	title = "Selecting features with Sequential Feature Selection"
	with gr.Blocks(title=title) as demo:
	gr.Markdown(f"## {title}")
	gr.Markdown("""
	This app demonstrates feature selection techniques using model based selection and sequential feature selection.\n\n
	Model based selection is based on feature importance. Each feature is assigned a score on how much influence they have on the model output. The feature with highest score is considered the most important feature.\n\n
	Sequential feature selection is based on greedy approach. In greedy approach, the feature is added or removed to the selected features at each iteration based on the model performance score.\n\n
	This app uses Ridge estimator and the diabetes dataset from sklearn. Diabetes dataset consist of quantitative measure of diabetes progression and 10 following variables obtained from 442 diabetes patients:
	1. Age (age)
	2. Sex (sex)
	3. Body mass index (bmi)
	4. Average blood pressure (bp)
	5. Total serum cholesterol (s1)
	6. Low-density lipoproteins (s2)
	7. High-density lipoproteins (s3)
	8. Total cholesterol / HDL (s4)
	9. Possibly log of serum triglycerides level (s5)
	10. Blood sugar level (s6)\n\n
	This app is developed based on [scikit-learn example](https://scikit-learn.org/stable/auto_examples/feature_selection/plot_select_from_model_diabetes.html#sphx-glr-auto-examples-feature-selection-plot-select-from-model-diabetes-py)
	""")

	method = gr.Radio(["model", "sfs-forward", "sfs-backward"], label="Method")
	num_features = gr.Slider(minimum=2, maximum=10, step=1, label = "Number of features")
	output = gr.Textbox(label="Output Box")
	select_btn = gr.Button("Select")
	select_btn.click(fn=select_features, inputs=[method,num_features], outputs=output)

	demo.launch()