|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
from sklearn.ensemble import IsolationForest |
|
from sklearn.neighbors import LocalOutlierFactor |
|
import plotly.express as px |
|
|
|
|
|
st.set_page_config(page_title="ISP Network Anomaly Detection", layout="wide") |
|
|
|
|
|
st.title("ISP Network Anomaly Detection Dashboard") |
|
st.markdown("This application monitors network latency data in real time and detects anomalies using advanced AI algorithms.") |
|
|
|
|
|
st.sidebar.header("Data Simulation Settings") |
|
num_rows = st.sidebar.number_input("Number of Data Rows (for synthetic data)", min_value=1000, max_value=100000, value=10000, step=1000) |
|
mean_latency = st.sidebar.slider("Mean Latency (ms)", min_value=20, max_value=100, value=50) |
|
std_latency = st.sidebar.slider("Latency Standard Deviation (ms)", min_value=1, max_value=20, value=5) |
|
anomaly_fraction = st.sidebar.slider("Fraction of Anomalies", min_value=0.01, max_value=0.2, value=0.05, step=0.01) |
|
anomaly_increase = st.sidebar.slider("Anomaly Latency Increase (ms)", min_value=10, max_value=50, value=30) |
|
|
|
|
|
st.sidebar.header("Model Settings") |
|
model_choice = st.sidebar.selectbox("Select Anomaly Detection Model", ("Isolation Forest", "Local Outlier Factor")) |
|
if model_choice == "Isolation Forest": |
|
contamination = st.sidebar.slider("Contamination Rate", min_value=0.01, max_value=0.2, value=anomaly_fraction, step=0.01) |
|
elif model_choice == "Local Outlier Factor": |
|
n_neighbors = st.sidebar.slider("Number of Neighbors", min_value=5, max_value=50, value=20, step=1) |
|
|
|
|
|
st.sidebar.header("Data Input") |
|
uploaded_file = st.sidebar.file_uploader("Upload CSV file with 'timestamp' and 'latency' columns", type=["csv"]) |
|
|
|
|
|
@st.cache_data |
|
def generate_synthetic_data(n, mean_latency, std_latency, anomaly_fraction, anomaly_increase): |
|
date_rng = pd.date_range(start='2021-01-01', periods=n, freq='T') |
|
latency = np.random.normal(loc=mean_latency, scale=std_latency, size=n) |
|
anomaly_indices = np.random.choice(n, size=int(anomaly_fraction * n), replace=False) |
|
latency[anomaly_indices] += np.random.normal(loc=anomaly_increase, scale=std_latency, size=len(anomaly_indices)) |
|
data = pd.DataFrame({'timestamp': date_rng, 'latency': latency}) |
|
return data |
|
|
|
|
|
if uploaded_file is not None: |
|
try: |
|
data = pd.read_csv(uploaded_file, parse_dates=['timestamp']) |
|
st.sidebar.success("File uploaded successfully!") |
|
except Exception as e: |
|
st.sidebar.error("Error reading file. Using synthetic data instead.") |
|
data = generate_synthetic_data(num_rows, mean_latency, std_latency, anomaly_fraction, anomaly_increase) |
|
else: |
|
st.info("No file uploaded. Using synthetic data generated based on your settings.") |
|
data = generate_synthetic_data(num_rows, mean_latency, std_latency, anomaly_fraction, anomaly_increase) |
|
|
|
|
|
def detect_anomalies_isolation_forest(data, contamination): |
|
model = IsolationForest(contamination=contamination, random_state=42) |
|
data['anomaly'] = model.fit_predict(data[['latency']]) |
|
data['anomaly_flag'] = data['anomaly'].apply(lambda x: True if x == -1 else False) |
|
return data |
|
|
|
def detect_anomalies_lof(data, n_neighbors): |
|
model = LocalOutlierFactor(n_neighbors=n_neighbors) |
|
predictions = model.fit_predict(data[['latency']]) |
|
data['anomaly'] = predictions |
|
data['anomaly_flag'] = data['anomaly'].apply(lambda x: True if x == -1 else False) |
|
return data |
|
|
|
|
|
if model_choice == "Isolation Forest": |
|
data = detect_anomalies_isolation_forest(data, contamination) |
|
elif model_choice == "Local Outlier Factor": |
|
data = detect_anomalies_lof(data, n_neighbors) |
|
|
|
|
|
total_points = len(data) |
|
anomaly_points = data['anomaly_flag'].sum() |
|
st.markdown("### Summary Statistics") |
|
st.markdown(f"**Total Data Points:** {total_points}") |
|
st.markdown(f"**Detected Anomalies:** {anomaly_points}") |
|
st.markdown(f"**Anomaly Percentage:** {(anomaly_points/total_points*100):.2f}%") |
|
|
|
|
|
data['latency_ma'] = data['latency'].rolling(window=30, min_periods=1).mean() |
|
fig = px.scatter(data, x='timestamp', y='latency', color=data['anomaly_flag'].map({True: "Anomaly", False: "Normal"}), |
|
title="Network Latency and Detected Anomalies", labels={"color": "Status"}) |
|
fig.add_scatter(x=data['timestamp'], y=data['latency_ma'], mode='lines', name='Moving Average', line=dict(color='orange')) |
|
st.plotly_chart(fig, use_container_width=True) |
|
|
|
|
|
@st.cache_data |
|
def convert_df(df): |
|
return df.to_csv(index=False).encode('utf-8') |
|
|
|
csv = convert_df(data) |
|
st.download_button( |
|
label="Download Anomaly Detection Results as CSV", |
|
data=csv, |
|
file_name='anomaly_detection_results.csv', |
|
mime='text/csv', |
|
) |
|
|
|
|
|
if uploaded_file is not None: |
|
if anomaly_points > 0: |
|
st.markdown("### Alert: Anomalies Detected!") |
|
anomaly_times = data.loc[data['anomaly_flag'], 'timestamp'].dt.strftime('%Y-%m-%d %H:%M:%S').tolist() |
|
st.write("Anomalies were detected at the following timestamps (first 10 shown):") |
|
st.write(anomaly_times[:10]) |
|
else: |
|
st.markdown("### No anomalies detected in the uploaded data.") |
|
|