Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.model_selection import train_test_split | |
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier | |
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix | |
from sklearn.preprocessing import LabelEncoder | |
import shap | |
# Load Dataset | |
data_path = 'Survey Final.csv' | |
df = pd.read_csv(data_path) | |
# Encode Target Column | |
le = LabelEncoder() | |
df['Percieved Safety'] = le.fit_transform(df['Percieved Safety']) | |
# Data Splitting (Global for Use in All Sections) | |
test_size = 0.2 # Default test size (can be changed in data splitting section) | |
X = df.drop(columns=['Percieved Safety']) | |
y = df['Percieved Safety'] | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42) | |
# Streamlit App | |
st.set_page_config(page_title="Evaluating Safety Perception on Commuting App", layout='wide') | |
st.title("Evaluating Safety Perception on Commuting ") | |
# Sidebar section | |
with st.sidebar: | |
st.image("logo.png", use_container_width=True, caption="Safety Perception") | |
st.markdown("---") | |
selected = st.selectbox( | |
"Navigation", | |
[ | |
"๐ Data Overview", | |
"๐ Exploratory Data Analysis", | |
"๐ค Model Training, Evaluation & Explanations", | |
"๐ฎ Predict Perceived Safety" | |
] | |
) | |
# Data Overview | |
if selected == "๐ Data Overview": | |
st.header("๐ Data Overview") | |
if st.checkbox("Show Dataset"): | |
st.write(df.head()) | |
st.write(f"Dataset Shape: {df.shape}") | |
st.write("Data Types:") | |
st.write(df.dtypes) | |
# Exploratory Data Analysis | |
if selected == "๐ Exploratory Data Analysis": | |
st.header("๐ Exploratory Data Analysis") | |
if st.checkbox("Correlation Heatmap"): | |
st.write("Correlation Heatmap") | |
fig, ax = plt.subplots(figsize=(10, 6)) | |
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', ax=ax) | |
st.pyplot(fig) | |
if st.checkbox("Histogram"): | |
st.write("Histograms of Numeric Columns") | |
numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist() | |
selected_column = st.selectbox("Select Column for Histogram", numeric_columns) | |
fig, ax = plt.subplots() | |
sns.histplot(df[selected_column], kde=True, ax=ax) | |
st.pyplot(fig) | |
if st.checkbox("Boxplot for Numeric Columns"): | |
st.write("Boxplot of Numeric Columns") | |
numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist() | |
selected_column = st.selectbox("Select Column for Boxplot", numeric_columns) | |
fig, ax = plt.subplots() | |
sns.boxplot(data=df, x=selected_column, ax=ax) | |
st.pyplot(fig) | |
if st.checkbox("Pairplot of Dataset"): | |
st.write("Pairplot of the Dataset") | |
fig = sns.pairplot(df) | |
st.pyplot(fig) | |
# Model Training, Evaluation & Explanations | |
if selected == "๐ค Model Training, Evaluation & Explanations": | |
st.header("๐ค Model Training, Evaluation & Explanations") | |
if st.checkbox("Train, Evaluate, and Explain Models"): | |
# Model Training | |
st.write("Training Tree-Based Models") | |
models = { | |
"Random Forest": RandomForestClassifier(random_state=42), | |
"Gradient Boosting": GradientBoostingClassifier(random_state=42), | |
"Extra Trees": ExtraTreesClassifier(random_state=42), | |
"Histogram Gradient Boosting": HistGradientBoostingClassifier(random_state=42) | |
} | |
model_preds = {} | |
model_accuracies = {} | |
for model_name, model in models.items(): | |
model.fit(X_train, y_train) | |
preds = model.predict(X_test) | |
accuracy = accuracy_score(y_test, preds) | |
model_preds[model_name] = preds | |
model_accuracies[model_name] = accuracy | |
st.write(f"{model_name} Accuracy: {accuracy:.2f}") | |
# Model Evaluation | |
selected_model = st.selectbox("Select Model for Detailed Evaluation", list(models.keys())) | |
selected_model_instance = models[selected_model] | |
selected_preds = model_preds[selected_model] | |
st.write("Classification Report:") | |
st.text(classification_report(y_test, selected_preds)) | |
st.write("Confusion Matrix:") | |
st.write(confusion_matrix(y_test, selected_preds)) | |
# Feature Importance | |
if st.checkbox("Show Feature Importance"): | |
st.write(f"Feature Importance from {selected_model} Model") | |
if hasattr(selected_model_instance, 'feature_importances_'): | |
feature_importances = selected_model_instance.feature_importances_ | |
importance_df = pd.DataFrame({"Feature": X_train.columns, "Importance": feature_importances}) | |
importance_df = importance_df.sort_values(by="Importance", ascending=False) | |
st.bar_chart(importance_df.set_index("Feature")) | |
else: | |
st.write("The selected model does not support feature importances.") | |
# SHAP Explanations | |
if st.checkbox("Explain Predictions with SHAP"): | |
st.write(f"SHAP Explanation for {selected_model} Model") | |
explainer = shap.TreeExplainer(selected_model_instance) | |
shap_values = explainer.shap_values(X_test) | |
shap.summary_plot(shap_values, X_test, plot_type="bar") | |
st.pyplot() | |
# Predict Percieved Safety | |
if selected == "๐ฎ Predict Perceived Safety": | |
st.header("๐ฎ Predict Percieved Safety") | |
st.write("Please provide the following information to predict Percieved Safety for transport:") | |
# User Input for Prediction | |
overcrowding = st.selectbox("How overcrowded do you think the transport is on a scale from 0 (Not overcrowded) to 4 (Very overcrowded)?", [0, 1, 2, 3, 4]) | |
preference = st.selectbox("How much do you prefer this mode of transport on a scale from 0 (Not preferred) to 4 (Highly preferred)?", [0, 1, 2, 3, 4]) | |
daytime_safety = st.selectbox("How safe do you feel using this transport during the daytime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4]) | |
nighttime_safety = st.selectbox("How safe do you feel using this transport during the nighttime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4]) | |
taxi_dsafety = st.selectbox("How safe do you feel using a taxi during the day on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4]) | |
taxi_nsafety = st.selectbox("How safe do you feel using a taxi during the night on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4]) | |
reporting = st.selectbox("How comfortable are you with reporting incidents related to this transport on a scale from 0 (Not comfortable) to 4 (Very comfortable)?", [0, 1, 2, 3, 4]) | |
background_check = st.selectbox("How effective do you think background checks are for transport personnel on a scale from 0 (Not effective) to 4 (Very effective)?", [0, 1, 2, 3, 4]) | |
user_data = np.array([[ | |
overcrowding, preference, daytime_safety, nighttime_safety, | |
taxi_dsafety, taxi_nsafety, reporting, background_check | |
]]) | |
if st.button("Predict Percieved Safety"): | |
# Train the Model (Again) and Predict | |
model = RandomForestClassifier(random_state=42) | |
model.fit(X_train, y_train) | |
prediction = model.predict(user_data) | |
predicted_class = le.inverse_transform(prediction) | |
st.write(f"Predicted Percieved Safety Class: {predicted_class[0]}") | |