sunbal7's picture
Update app.py
cc36677 verified
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
import plotly.express as px
# Set page configuration
st.set_page_config(page_title="ISP Network Anomaly Detection", layout="wide")
# App Title and Description
st.title("ISP Network Anomaly Detection Dashboard")
st.markdown("This application monitors network latency data in real time and detects anomalies using advanced AI algorithms.")
# Sidebar: Data Simulation Settings
st.sidebar.header("Data Simulation Settings")
num_rows = st.sidebar.number_input("Number of Data Rows (for synthetic data)", min_value=1000, max_value=100000, value=10000, step=1000)
mean_latency = st.sidebar.slider("Mean Latency (ms)", min_value=20, max_value=100, value=50)
std_latency = st.sidebar.slider("Latency Standard Deviation (ms)", min_value=1, max_value=20, value=5)
anomaly_fraction = st.sidebar.slider("Fraction of Anomalies", min_value=0.01, max_value=0.2, value=0.05, step=0.01)
anomaly_increase = st.sidebar.slider("Anomaly Latency Increase (ms)", min_value=10, max_value=50, value=30)
# Sidebar: Model Settings
st.sidebar.header("Model Settings")
model_choice = st.sidebar.selectbox("Select Anomaly Detection Model", ("Isolation Forest", "Local Outlier Factor"))
if model_choice == "Isolation Forest":
contamination = st.sidebar.slider("Contamination Rate", min_value=0.01, max_value=0.2, value=anomaly_fraction, step=0.01)
elif model_choice == "Local Outlier Factor":
n_neighbors = st.sidebar.slider("Number of Neighbors", min_value=5, max_value=50, value=20, step=1)
# Sidebar: Data Input Option
st.sidebar.header("Data Input")
uploaded_file = st.sidebar.file_uploader("Upload CSV file with 'timestamp' and 'latency' columns", type=["csv"])
# Function to generate synthetic data
@st.cache_data
def generate_synthetic_data(n, mean_latency, std_latency, anomaly_fraction, anomaly_increase):
date_rng = pd.date_range(start='2021-01-01', periods=n, freq='T')
latency = np.random.normal(loc=mean_latency, scale=std_latency, size=n)
anomaly_indices = np.random.choice(n, size=int(anomaly_fraction * n), replace=False)
latency[anomaly_indices] += np.random.normal(loc=anomaly_increase, scale=std_latency, size=len(anomaly_indices))
data = pd.DataFrame({'timestamp': date_rng, 'latency': latency})
return data
# Load data: either from upload or generate synthetic data
if uploaded_file is not None:
try:
data = pd.read_csv(uploaded_file, parse_dates=['timestamp'])
st.sidebar.success("File uploaded successfully!")
except Exception as e:
st.sidebar.error("Error reading file. Using synthetic data instead.")
data = generate_synthetic_data(num_rows, mean_latency, std_latency, anomaly_fraction, anomaly_increase)
else:
st.info("No file uploaded. Using synthetic data generated based on your settings.")
data = generate_synthetic_data(num_rows, mean_latency, std_latency, anomaly_fraction, anomaly_increase)
# Anomaly Detection Functions
def detect_anomalies_isolation_forest(data, contamination):
model = IsolationForest(contamination=contamination, random_state=42)
data['anomaly'] = model.fit_predict(data[['latency']])
data['anomaly_flag'] = data['anomaly'].apply(lambda x: True if x == -1 else False)
return data
def detect_anomalies_lof(data, n_neighbors):
model = LocalOutlierFactor(n_neighbors=n_neighbors)
predictions = model.fit_predict(data[['latency']])
data['anomaly'] = predictions
data['anomaly_flag'] = data['anomaly'].apply(lambda x: True if x == -1 else False)
return data
# Apply the selected anomaly detection model
if model_choice == "Isolation Forest":
data = detect_anomalies_isolation_forest(data, contamination)
elif model_choice == "Local Outlier Factor":
data = detect_anomalies_lof(data, n_neighbors)
# Summary Statistics
total_points = len(data)
anomaly_points = data['anomaly_flag'].sum()
st.markdown("### Summary Statistics")
st.markdown(f"**Total Data Points:** {total_points}")
st.markdown(f"**Detected Anomalies:** {anomaly_points}")
st.markdown(f"**Anomaly Percentage:** {(anomaly_points/total_points*100):.2f}%")
# Visualization using Plotly
data['latency_ma'] = data['latency'].rolling(window=30, min_periods=1).mean()
fig = px.scatter(data, x='timestamp', y='latency', color=data['anomaly_flag'].map({True: "Anomaly", False: "Normal"}),
title="Network Latency and Detected Anomalies", labels={"color": "Status"})
fig.add_scatter(x=data['timestamp'], y=data['latency_ma'], mode='lines', name='Moving Average', line=dict(color='orange'))
st.plotly_chart(fig, use_container_width=True)
# Download option for anomaly detection results
@st.cache_data
def convert_df(df):
return df.to_csv(index=False).encode('utf-8')
csv = convert_df(data)
st.download_button(
label="Download Anomaly Detection Results as CSV",
data=csv,
file_name='anomaly_detection_results.csv',
mime='text/csv',
)
# Alert Section: Display anomaly timestamps only if a file is uploaded
if uploaded_file is not None:
if anomaly_points > 0:
st.markdown("### Alert: Anomalies Detected!")
anomaly_times = data.loc[data['anomaly_flag'], 'timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').tolist()
st.write("Anomalies were detected at the following timestamps (first 10 shown):")
st.write(anomaly_times[:10])
else:
st.markdown("### No anomalies detected in the uploaded data.")