Abubakari commited on
Commit
9a00e58
β€’
1 Parent(s): bb6a175

Upload 5 files

Browse files
IsolationForest.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85fb3fbf5ea5b7783ee5115e6007f58675658e65da10e3de166287a749c19055
3
+ size 762750
StandardScaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13a817bb12b0e5d57640bd60f1c6f908d9124a521a7985787aacde254a229cba
3
+ size 1167
app.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import joblib
4
+ from sklearn.preprocessing import StandardScaler
5
+ import pandas as pd
6
+ import matplotlib.pyplot as plt
7
+ import seaborn as sns
8
+
9
+ # Load the non-anomaly data
10
+ non_anomaly_csv_filename = 'non_anomaly_data.csv'
11
+ non_anomaly_df = pd.read_csv(non_anomaly_csv_filename)
12
+
13
+ # Open the Mitos Spreadsheet file
14
+ #st.write("Opening Mitos Spreadsheet file...")
15
+ #st.csv_open("non_anomaly_data.csv")
16
+
17
+ # Display the first sheet
18
+ #st.write(st.get_active_sheet().name)
19
+
20
+ # Display the first row of the first sheet
21
+ #st.write(st.get_active_sheet().rows[0])
22
+
23
+ # Load the Isolation Forest model
24
+ model_filename = "IsolationForest.joblib"
25
+ isolation_forest = joblib.load(model_filename)
26
+
27
+ # Load the StandardScaler
28
+ scaler_filename = "StandardScaler.joblib"
29
+ scaler = joblib.load(scaler_filename)
30
+
31
+ st.title("Anomaly Detection App with Isolation Forest")
32
+
33
+ st.sidebar.title("Input Feature Values")
34
+ transaction_dollar_amount = st.sidebar.slider("Transaction Dollar Amount", min_value=0.0, max_value=10000.0)
35
+ longitude = st.sidebar.slider("Longitude (Long)", min_value=-180.0, max_value=180.0)
36
+ latitude = st.sidebar.slider("Latitude (Lat)", min_value=-90.0, max_value=90.0)
37
+ credit_card_limit = st.sidebar.slider("Credit Card Limit", min_value=0, max_value=50000)
38
+ year = st.sidebar.slider("Year", min_value=2000, max_value=2030)
39
+ month = st.sidebar.slider("Month", min_value=1, max_value=12)
40
+ day = st.sidebar.slider("Day", min_value=1, max_value=31)
41
+
42
+ submitted = st.sidebar.button("Submit")
43
+
44
+ if submitted:
45
+ input_data = {
46
+ 'transaction_dollar_amount': transaction_dollar_amount,
47
+ 'Long': longitude,
48
+ 'Lat': latitude,
49
+ 'credit_card_limit': credit_card_limit,
50
+ 'year': year,
51
+ 'month': month,
52
+ 'day': day
53
+ }
54
+
55
+ selected_columns = pd.DataFrame([input_data])
56
+
57
+ # Standardize the input data using the loaded StandardScaler
58
+ selected_columns_scaled = scaler.transform(selected_columns)
59
+
60
+ # Apply Isolation Forest for anomaly detection on the non-anomaly dataset
61
+ non_anomaly_scores = isolation_forest.decision_function(scaler.transform(non_anomaly_df))
62
+
63
+ # Apply Isolation Forest for anomaly detection on your single input data
64
+ your_anomaly_score = isolation_forest.decision_function(selected_columns_scaled)[0]
65
+
66
+
67
+
68
+ # Calculate the minimum and maximum anomaly scores from non-anomaly data
69
+ min_non_anomaly_score = np.min(non_anomaly_scores)
70
+ max_non_anomaly_score = np.max(non_anomaly_scores)
71
+
72
+ # Add a margin of error for the range
73
+ margin = 0.5
74
+ min_threshold = min_non_anomaly_score - margin
75
+ max_threshold = max_non_anomaly_score + margin
76
+
77
+ # Determine if the input data point is an anomaly based on the score
78
+ #is_anomaly = your_anomaly_score >= np.percentile(non_anomaly_scores, 95)
79
+
80
+ # Determine if the input data point is an anomaly based on the score
81
+ is_anomaly = your_anomaly_score < min_threshold or your_anomaly_score > max_threshold
82
+
83
+
84
+ # Print the anomaly status
85
+ st.subheader("Anomaly Classification")
86
+ if is_anomaly:
87
+ st.write("Prediction Result: 🚨 Anomaly Detected!")
88
+ else:
89
+ st.write("Prediction Result: βœ… Not Anomaly")
90
+
91
+ # Create a bar plot to visualize the anomaly score distribution and your data point's score
92
+ plt.figure(figsize=(8, 5))
93
+
94
+ # Plot the distribution of anomaly scores from the non-anomaly dataset
95
+ sns.histplot(non_anomaly_scores, kde=True, color='gray', label='Non-Anomaly Score Distribution')
96
+
97
+ # Plot your data point's anomaly score
98
+ plt.axvline(x=your_anomaly_score, color='blue', linestyle='dashed', label='Your Data Point')
99
+
100
+ # Set labels and title
101
+ plt.xlabel('Anomaly Score')
102
+ plt.ylabel('Frequency')
103
+ plt.title('Anomaly Score Distribution and Your Data Point')
104
+ plt.legend()
105
+ #plt.grid(True)
106
+
107
+ # Display the histogram plot
108
+ st.pyplot(plt)
109
+
110
+
111
+ # Explain the results
112
+ st.write("The input data point has been classified as an anomaly." if is_anomaly
113
+ else "The input data point is not classified as an anomaly.")
114
+ st.write("The anomaly score is:", your_anomaly_score)
115
+ st.write("The threshold for anomaly detection is:", min_threshold, "to", max_threshold)
116
+
117
+ # Create a scatter plot for longitude and latitude
118
+ fig, ax = plt.subplots(figsize=(10, 8))
119
+
120
+ # Plot non-anomaly data
121
+ sns.scatterplot(data=non_anomaly_df, x='Long', y='Lat', color='lightgrey', label='Normal πŸ™οΈ', ax=ax)
122
+
123
+ # Plot input data
124
+ if is_anomaly:
125
+ ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='red', label='Suspicious 🚩', s=100, marker='x')
126
+ anomaly_marker = 'Suspicious 🚩'
127
+ else:
128
+ ax.scatter(selected_columns['Long'], selected_columns['Lat'], color='green', label='Valid βœ…', s=100, marker='o')
129
+ anomaly_marker = 'Valid βœ…'
130
+
131
+ ax.set_xlabel("Longitude")
132
+ ax.set_ylabel("Latitude")
133
+ ax.set_title("Location Plot: Anomaly Detection πŸ—ΊοΈ")
134
+ ax.legend()
135
+ ax.grid(True)
136
+
137
+ # Show the scatter plot in Streamlit
138
+ st.subheader("Location Plot: Anomaly Detection πŸ—ΊοΈ")
139
+ st.pyplot(fig)
140
+
141
+ # Explanation based on the anomaly classification
142
+ st.subheader("Anomaly Classification")
143
+ if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold:
144
+ st.write("Prediction Result: 🚨 Anomaly Detected!")
145
+ else:
146
+ st.write("Prediction Result: βœ… Not Anomaly")
147
+
148
+ # Explain the results
149
+ # Explain the results
150
+ st.write("The location plot visualizes the anomaly detection result based on longitude and latitude.")
151
+ if your_anomaly_score < min_threshold or your_anomaly_score > max_threshold:
152
+ st.write("The input data point is marked as Suspicious 🚩 due to its anomaly score.")
153
+ st.write("The red 'x' marker indicates a suspicious location.")
154
+ else:
155
+ st.write("The input data point is marked as Valid βœ… due to its anomaly score.")
156
+ st.write("The green 'o' marker indicates a valid location.")
non_anomaly_data.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ numpy
3
+ scikit-learn
4
+ pandas
5
+ matplotlib
6
+ seaborn