Safety_Perception / safetyapp.py
GitsSaikat
Update safetyapp.py
0c2667f unverified
raw
history blame
7.68 kB
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import shap
# Load Dataset
data_path = 'Survey Final.csv'
df = pd.read_csv(data_path)
# Encode Target Column
le = LabelEncoder()
df['Percieved Safety'] = le.fit_transform(df['Percieved Safety'])
# Data Splitting (Global for Use in All Sections)
test_size = 0.2 # Default test size (can be changed in data splitting section)
X = df.drop(columns=['Percieved Safety'])
y = df['Percieved Safety']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
# Streamlit App
st.set_page_config(page_title="Evaluating Safety Perception on Commuting App", layout='wide')
st.title("Evaluating Safety Perception on Commuting ")
# Sidebar section
with st.sidebar:
st.image("logo.png", use_column_width=True, caption="Your Company Name")
st.markdown("---")
selected = st.selectbox(
"Navigation",
[
"๐Ÿ“Š Data Overview",
"๐Ÿ” Exploratory Data Analysis",
"๐Ÿค– Model Training, Evaluation & Explanations",
"๐Ÿ”ฎ Predict Perceived Safety"
]
)
# Data Overview
if selected == "๐Ÿ“Š Data Overview":
st.header("๐Ÿ“Š Data Overview")
if st.checkbox("Show Dataset"):
st.write(df.head())
st.write(f"Dataset Shape: {df.shape}")
st.write("Data Types:")
st.write(df.dtypes)
# Exploratory Data Analysis
if selected == "๐Ÿ” Exploratory Data Analysis":
st.header("๐Ÿ” Exploratory Data Analysis")
if st.checkbox("Correlation Heatmap"):
st.write("Correlation Heatmap")
fig, ax = plt.subplots(figsize=(10, 6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', ax=ax)
st.pyplot(fig)
if st.checkbox("Histogram"):
st.write("Histograms of Numeric Columns")
numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
selected_column = st.selectbox("Select Column for Histogram", numeric_columns)
fig, ax = plt.subplots()
sns.histplot(df[selected_column], kde=True, ax=ax)
st.pyplot(fig)
if st.checkbox("Boxplot for Numeric Columns"):
st.write("Boxplot of Numeric Columns")
numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
selected_column = st.selectbox("Select Column for Boxplot", numeric_columns)
fig, ax = plt.subplots()
sns.boxplot(data=df, x=selected_column, ax=ax)
st.pyplot(fig)
if st.checkbox("Pairplot of Dataset"):
st.write("Pairplot of the Dataset")
fig = sns.pairplot(df)
st.pyplot(fig)
# Model Training, Evaluation & Explanations
if selected == "๐Ÿค– Model Training, Evaluation & Explanations":
st.header("๐Ÿค– Model Training, Evaluation & Explanations")
if st.checkbox("Train, Evaluate, and Explain Models"):
# Model Training
st.write("Training Tree-Based Models")
models = {
"Random Forest": RandomForestClassifier(random_state=42),
"Gradient Boosting": GradientBoostingClassifier(random_state=42),
"Extra Trees": ExtraTreesClassifier(random_state=42),
"Histogram Gradient Boosting": HistGradientBoostingClassifier(random_state=42)
}
model_preds = {}
model_accuracies = {}
for model_name, model in models.items():
model.fit(X_train, y_train)
preds = model.predict(X_test)
accuracy = accuracy_score(y_test, preds)
model_preds[model_name] = preds
model_accuracies[model_name] = accuracy
st.write(f"{model_name} Accuracy: {accuracy:.2f}")
# Model Evaluation
selected_model = st.selectbox("Select Model for Detailed Evaluation", list(models.keys()))
selected_model_instance = models[selected_model]
selected_preds = model_preds[selected_model]
st.write("Classification Report:")
st.text(classification_report(y_test, selected_preds))
st.write("Confusion Matrix:")
st.write(confusion_matrix(y_test, selected_preds))
# Feature Importance
if st.checkbox("Show Feature Importance"):
st.write(f"Feature Importance from {selected_model} Model")
if hasattr(selected_model_instance, 'feature_importances_'):
feature_importances = selected_model_instance.feature_importances_
importance_df = pd.DataFrame({"Feature": X_train.columns, "Importance": feature_importances})
importance_df = importance_df.sort_values(by="Importance", ascending=False)
st.bar_chart(importance_df.set_index("Feature"))
else:
st.write("The selected model does not support feature importances.")
# SHAP Explanations
if st.checkbox("Explain Predictions with SHAP"):
st.write(f"SHAP Explanation for {selected_model} Model")
explainer = shap.TreeExplainer(selected_model_instance)
shap_values = explainer.shap_values(X_test)
shap.summary_plot(shap_values, X_test, plot_type="bar")
st.pyplot()
# Predict Percieved Safety
if selected == "๐Ÿ”ฎ Predict Percieved Safety":
st.header("๐Ÿ”ฎ Predict Percieved Safety")
st.write("Please provide the following information to predict Percieved Safety for transport:")
# User Input for Prediction
overcrowding = st.selectbox("How overcrowded do you think the transport is on a scale from 0 (Not overcrowded) to 4 (Very overcrowded)?", [0, 1, 2, 3, 4])
preference = st.selectbox("How much do you prefer this mode of transport on a scale from 0 (Not preferred) to 4 (Highly preferred)?", [0, 1, 2, 3, 4])
daytime_safety = st.selectbox("How safe do you feel using this transport during the daytime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
nighttime_safety = st.selectbox("How safe do you feel using this transport during the nighttime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
taxi_dsafety = st.selectbox("How safe do you feel using a taxi during the day on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
taxi_nsafety = st.selectbox("How safe do you feel using a taxi during the night on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
reporting = st.selectbox("How comfortable are you with reporting incidents related to this transport on a scale from 0 (Not comfortable) to 4 (Very comfortable)?", [0, 1, 2, 3, 4])
background_check = st.selectbox("How effective do you think background checks are for transport personnel on a scale from 0 (Not effective) to 4 (Very effective)?", [0, 1, 2, 3, 4])
user_data = np.array([[
overcrowding, preference, daytime_safety, nighttime_safety,
taxi_dsafety, taxi_nsafety, reporting, background_check
]])
if st.button("Predict Percieved Safety"):
# Train the Model (Again) and Predict
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
prediction = model.predict(user_data)
predicted_class = le.inverse_transform(prediction)
st.write(f"Predicted Percieved Safety Class: {predicted_class[0]}")