File size: 7,683 Bytes
0238ee4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c2667f
 
 
 
 
 
 
 
 
 
 
 
 
0238ee4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c2667f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, HistGradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import shap

# Load Dataset
data_path = 'Survey Final.csv'
df = pd.read_csv(data_path)

# Encode Target Column
le = LabelEncoder()
df['Percieved Safety'] = le.fit_transform(df['Percieved Safety'])

# Data Splitting (Global for Use in All Sections)
test_size = 0.2  # Default test size (can be changed in data splitting section)
X = df.drop(columns=['Percieved Safety'])
y = df['Percieved Safety']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

# Streamlit App
st.set_page_config(page_title="Evaluating Safety Perception on Commuting App", layout='wide')
st.title("Evaluating Safety Perception on Commuting ")

# Sidebar section
with st.sidebar:
    st.image("logo.png", use_column_width=True, caption="Your Company Name")
    st.markdown("---") 
    selected = st.selectbox(
        "Navigation",
        [
            "๐Ÿ“Š Data Overview",
            "๐Ÿ” Exploratory Data Analysis",
            "๐Ÿค– Model Training, Evaluation & Explanations",
            "๐Ÿ”ฎ Predict Perceived Safety"
        ]
    )

# Data Overview
if selected == "๐Ÿ“Š Data Overview":
    st.header("๐Ÿ“Š Data Overview")
    if st.checkbox("Show Dataset"):
        st.write(df.head())
        st.write(f"Dataset Shape: {df.shape}")
        st.write("Data Types:")
        st.write(df.dtypes)

# Exploratory Data Analysis
if selected == "๐Ÿ” Exploratory Data Analysis":
    st.header("๐Ÿ” Exploratory Data Analysis")
    if st.checkbox("Correlation Heatmap"):
        st.write("Correlation Heatmap")
        fig, ax = plt.subplots(figsize=(10, 6))
        sns.heatmap(df.corr(), annot=True, cmap='coolwarm', ax=ax)
        st.pyplot(fig)

    if st.checkbox("Histogram"):
        st.write("Histograms of Numeric Columns")
        numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
        selected_column = st.selectbox("Select Column for Histogram", numeric_columns)
        fig, ax = plt.subplots()
        sns.histplot(df[selected_column], kde=True, ax=ax)
        st.pyplot(fig)

    if st.checkbox("Boxplot for Numeric Columns"):
        st.write("Boxplot of Numeric Columns")
        numeric_columns = df.select_dtypes(include=['int64', 'float64']).columns.tolist()
        selected_column = st.selectbox("Select Column for Boxplot", numeric_columns)
        fig, ax = plt.subplots()
        sns.boxplot(data=df, x=selected_column, ax=ax)
        st.pyplot(fig)

    if st.checkbox("Pairplot of Dataset"):
        st.write("Pairplot of the Dataset")
        fig = sns.pairplot(df)
        st.pyplot(fig)

# Model Training, Evaluation & Explanations
if selected == "๐Ÿค– Model Training, Evaluation & Explanations":
    st.header("๐Ÿค– Model Training, Evaluation & Explanations")
    if st.checkbox("Train, Evaluate, and Explain Models"):
        # Model Training
        st.write("Training Tree-Based Models")
        models = {
            "Random Forest": RandomForestClassifier(random_state=42),
            "Gradient Boosting": GradientBoostingClassifier(random_state=42),
            "Extra Trees": ExtraTreesClassifier(random_state=42),
            "Histogram Gradient Boosting": HistGradientBoostingClassifier(random_state=42)
        }
        
        model_preds = {}
        model_accuracies = {}
        for model_name, model in models.items():
            model.fit(X_train, y_train)
            preds = model.predict(X_test)
            accuracy = accuracy_score(y_test, preds)
            model_preds[model_name] = preds
            model_accuracies[model_name] = accuracy
            st.write(f"{model_name} Accuracy: {accuracy:.2f}")
        
        # Model Evaluation
        selected_model = st.selectbox("Select Model for Detailed Evaluation", list(models.keys()))
        selected_model_instance = models[selected_model]
        selected_preds = model_preds[selected_model]
        st.write("Classification Report:")
        st.text(classification_report(y_test, selected_preds))
        st.write("Confusion Matrix:")
        st.write(confusion_matrix(y_test, selected_preds))
        
        # Feature Importance
        if st.checkbox("Show Feature Importance"):
            st.write(f"Feature Importance from {selected_model} Model")
            if hasattr(selected_model_instance, 'feature_importances_'):
                feature_importances = selected_model_instance.feature_importances_
                importance_df = pd.DataFrame({"Feature": X_train.columns, "Importance": feature_importances})
                importance_df = importance_df.sort_values(by="Importance", ascending=False)
                st.bar_chart(importance_df.set_index("Feature"))
            else:
                st.write("The selected model does not support feature importances.")
        
        # SHAP Explanations
        if st.checkbox("Explain Predictions with SHAP"):
            st.write(f"SHAP Explanation for {selected_model} Model")
            explainer = shap.TreeExplainer(selected_model_instance)
            shap_values = explainer.shap_values(X_test)
            shap.summary_plot(shap_values, X_test, plot_type="bar")
            st.pyplot()

# Predict Percieved Safety
if selected == "๐Ÿ”ฎ Predict Percieved Safety":
    st.header("๐Ÿ”ฎ Predict Percieved Safety")
    st.write("Please provide the following information to predict Percieved Safety for transport:")
    
    # User Input for Prediction
    overcrowding = st.selectbox("How overcrowded do you think the transport is on a scale from 0 (Not overcrowded) to 4 (Very overcrowded)?", [0, 1, 2, 3, 4])
    preference = st.selectbox("How much do you prefer this mode of transport on a scale from 0 (Not preferred) to 4 (Highly preferred)?", [0, 1, 2, 3, 4])
    daytime_safety = st.selectbox("How safe do you feel using this transport during the daytime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
    nighttime_safety = st.selectbox("How safe do you feel using this transport during the nighttime on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
    taxi_dsafety = st.selectbox("How safe do you feel using a taxi during the day on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
    taxi_nsafety = st.selectbox("How safe do you feel using a taxi during the night on a scale from 0 (Not safe) to 4 (Very safe)?", [0, 1, 2, 3, 4])
    reporting = st.selectbox("How comfortable are you with reporting incidents related to this transport on a scale from 0 (Not comfortable) to 4 (Very comfortable)?", [0, 1, 2, 3, 4])
    background_check = st.selectbox("How effective do you think background checks are for transport personnel on a scale from 0 (Not effective) to 4 (Very effective)?", [0, 1, 2, 3, 4])

    user_data = np.array([[
        overcrowding, preference, daytime_safety, nighttime_safety,
        taxi_dsafety, taxi_nsafety, reporting, background_check
    ]])
    
    if st.button("Predict Percieved Safety"):
        # Train the Model (Again) and Predict
        model = RandomForestClassifier(random_state=42)
        model.fit(X_train, y_train)
        prediction = model.predict(user_data)
        predicted_class = le.inverse_transform(prediction)
        
        st.write(f"Predicted Percieved Safety Class: {predicted_class[0]}")