Spaces:

AlignAI
/

Safety_Perception

Sleeping

File size: 7,683 Bytes

import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import shap

# Load Dataset
data_path = 'Survey Final.csv'
df = pd.read_csv(data_path)

# Encode Target Column
le = LabelEncoder()
df['Percieved Safety'] = le.fit_transform(df['Percieved Safety'])

# Data Splitting (Global for Use in All Sections)
test_size = 0.2  # Default test size (can be changed in data splitting section)
X = df.drop(columns=['Percieved Safety'])
y = df['Percieved Safety']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

# Streamlit App
st.set_page_config(page_title="Evaluating Safety Perception on Commuting App", layout='wide')
st.title("Evaluating Safety Perception on Commuting ")

# Sidebar section
with st.sidebar:
    st.image("logo.png", use_column_width=True, caption="Your Company Name")
    st.markdown("---") 
    selected = st.selectbox(
        "Navigation",
        [
            "📊 Data Overview",
            "🔍 Exploratory Data Analysis",
            "🤖 Model Training, Evaluation & Explanations",
            "🔮 Predict Perceived Safety"
        ]
    )

# Data Overview
if selected == "📊 Data Overview":
    st.header("📊 Data Overview")
    if st.checkbox("Show Dataset"):
        st.write(df.head())
        st.write(f"Dataset Shape: {df.shape}")
        st.write("Data Types:")
        st.write(df.dtypes)

# Exploratory Data Analysis
if selected == "🔍 Exploratory Data Analysis":
    st.header("🔍 Exploratory Data Analysis")
    if st.checkbox("Correlation Heatmap"):
        st.write("Correlation Heatmap")
        fig, ax = plt.subplots(figsize=(10, 6))
        sns.heatmap(df.corr(), annot=True, cmap='coolwarm', ax=ax)
        st.pyplot(fig)

    if st.checkbox("Histogram"):
        st.write("Histograms of Numeric Columns")
        numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
        selected_column = st.selectbox("Select Column for Histogram", numeric_columns)
        fig, ax = plt.subplots()
        sns.histplot(df[selected_column], kde=True, ax=ax)
        st.pyplot(fig)

    if st.checkbox("Boxplot for Numeric Columns"):
        st.write("Boxplot of Numeric Columns")
        numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
        selected_column = st.selectbox("Select Column for Boxplot", numeric_columns)
        fig, ax = plt.subplots()
        sns.boxplot(data=df, x=selected_column, ax=ax)
        st.pyplot(fig)

    if st.checkbox("Pairplot of Dataset"):
        st.write("Pairplot of the Dataset")
        fig = sns.pairplot(df)
        st.pyplot(fig)

# Model Training, Evaluation & Explanations
if selected == "🤖 Model Training, Evaluation & Explanations":
    st.header("🤖 Model Training, Evaluation & Explanations")
    if st.checkbox("Train, Evaluate, and Explain Models"):
        # Model Training
        st.write("Training Tree-Based Models")
        models = {
            "Random Forest": RandomForestClassifier(random_state=42),
            "Gradient Boosting": GradientBoostingClassifier(random_state=42),
            "Extra Trees": ExtraTreesClassifier(random_state=42),
            "Histogram Gradient Boosting": HistGradientBoostingClassifier(random_state=42)
        }
        
        model_preds = {}
        model_accuracies = {}
        for model_name, model in models.items():
            model.fit(X_train, y_train)
            preds = model.predict(X_test)
            accuracy = accuracy_score(y_test, preds)
            model_preds[model_name] = preds
            model_accuracies[model_name] = accuracy
            st.write(f"{model_name} Accuracy: {accuracy:.2f}")
        
        # Model Evaluation
        selected_model = st.selectbox("Select Model for Detailed Evaluation", list(models.keys()))
        selected_model_instance = models[selected_model]
        selected_preds = model_preds[selected_model]
        st.write("Classification Report:")
        st.text(classification_report(y_test, selected_preds))
        st.write("Confusion Matrix:")
        st.write(confusion_matrix(y_test, selected_preds))
        
        # Feature Importance
        if st.checkbox("Show Feature Importance"):
            st.write(f"Feature Importance from {selected_model} Model")
            if hasattr(selected_model_instance, 'feature_importances_'):
                feature_importances = selected_model_instance.feature_importances_
                importance_df = pd.DataFrame({"Feature": X_train.columns, "Importance": feature_importances})
                importance_df = importance_df.sort_values(by="Importance", ascending=False)
                st.bar_chart(importance_df.set_index("Feature"))
            else:
                st.write("The selected model does not support feature importances.")
        
        # SHAP Explanations
        if st.checkbox("Explain Predictions with SHAP"):
            st.write(f"SHAP Explanation for {selected_model} Model")
            explainer = shap.TreeExplainer(selected_model_instance)
            shap_values = explainer.shap_values(X_test)
            shap.summary_plot(shap_values, X_test, plot_type="bar")
            st.pyplot()

# Predict Percieved Safety
if selected == "🔮 Predict Percieved Safety":
    st.header("🔮 Predict Percieved Safety")
    st.write("Please provide the following information to predict Percieved Safety for transport:")
    
    # User Input for Prediction
    overcrowding = st.selectbox("How overcrowded do you think the transport is on a scale from 0 (Not overcrowded) to 4 (Very overcrowded)?", [0, 1, 2, 3, 4])
    preference = st.selectbox("How much do you prefer this mode of transport on a scale from 0 (Not preferred) to 4 (Highly preferred)?", [0, 1, 2, 3, 4])
    daytime_safety = st.selectbox("How safe do you feel using this transport during the daytime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
    nighttime_safety = st.selectbox("How safe do you feel using this transport during the nighttime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
    taxi_dsafety = st.selectbox("How safe do you feel using a taxi during the day on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
    taxi_nsafety = st.selectbox("How safe do you feel using a taxi during the night on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
    reporting = st.selectbox("How comfortable are you with reporting incidents related to this transport on a scale from 0 (Not comfortable) to 4 (Very comfortable)?", [0, 1, 2, 3, 4])
    background_check = st.selectbox("How effective do you think background checks are for transport personnel on a scale from 0 (Not effective) to 4 (Very effective)?", [0, 1, 2, 3, 4])

    user_data = np.array([[
        overcrowding, preference, daytime_safety, nighttime_safety,
        taxi_dsafety, taxi_nsafety, reporting, background_check
    ]])
    
    if st.button("Predict Percieved Safety"):
        # Train the Model (Again) and Predict
        model = RandomForestClassifier(random_state=42)
        model.fit(X_train, y_train)
        prediction = model.predict(user_data)
        predicted_class = le.inverse_transform(prediction)
        
        st.write(f"Predicted Percieved Safety Class: {predicted_class[0]}")