Spaces:
Runtime error
Runtime error
File size: 4,794 Bytes
aaf6ffd 73a3abc e62a721 1dcf872 1f82d2d e62a721 1dcf872 e62a721 1dcf872 e62a721 287c811 836879e e62a721 1dcf872 e62a721 73a3abc 1f82d2d 05bf296 1f82d2d 05bf296 1f82d2d 05bf296 1f82d2d 05bf296 1f82d2d 05bf296 1f82d2d aaf6ffd 62b762e aaf6ffd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
from huggingface_hub import from_pretrained_keras
import pandas as pd
import numpy as np
import json
from matplotlib import pyplot as plt
f = open('scaler.json')
scaler = json.load(f)
TIME_STEPS = 288
# Generated training sequences for use in the model.
def create_sequences(values, time_steps=TIME_STEPS):
output = []
for i in range(len(values) - time_steps + 1):
output.append(values[i : (i + time_steps)])
return np.stack(output)
def normalize_data(data):
df_test_value = (data - scaler["mean"]) / scaler["std"]
return df_test_value
def plot_test_data(df_test_value):
fig, ax = plt.subplots(figsize=(12, 6))
df_test_value.plot(legend=False, ax=ax)
ax.set_xlabel("Time")
ax.set_ylabel("Value")
ax.set_title("Input Test Data")
return fig
def get_anomalies(df_test_value):
# Create sequences from test values.
x_test = create_sequences(df_test_value.values)
model = from_pretrained_keras("keras-io/timeseries-anomaly-detection")
# Get test MAE loss.
x_test_pred = model.predict(x_test)
test_mae_loss = np.mean(np.abs(x_test_pred - x_test), axis=1)
test_mae_loss = test_mae_loss.reshape((-1))
# Detect all the samples which are anomalies.
anomalies = test_mae_loss > scaler["threshold"]
return anomalies
def plot_anomalies(df_test_value, data, anomalies):
# data i is an anomaly if samples [(i - timesteps + 1) to (i)] are anomalies
anomalous_data_indices = []
for data_idx in range(TIME_STEPS - 1, len(df_test_value) - TIME_STEPS + 1):
if np.all(anomalies[data_idx - TIME_STEPS + 1 : data_idx]):
anomalous_data_indices.append(data_idx)
df_subset = data.iloc[anomalous_data_indices]
fig, ax = plt.subplots(figsize=(12, 6))
data.plot(legend=False, ax=ax)
df_subset.plot(legend=False, ax=ax, color="r")
ax.set_xlabel("Time")
ax.set_ylabel("Value")
ax.set_title("Anomalous Data Points")
return fig
def clean_data(df):
# Check if the DataFrame already contains the correct columns
if "timestamp" in df.columns and "value" in df.columns:
df["timestamp"] = pd.to_datetime(df["timestamp"])
return df
# Check if DataFrame contains the columns to be converted
elif "Date" in df.columns and "Hour" in df.columns and "Hourly_Labor_Hours_Total" in df.columns:
# Convert "Date" and "Hour" columns into datetime format
df["timestamp"] = pd.to_datetime(df["Date"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
# Handle the case where hour is 24
df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] + pd.DateOffset(days=1)
df["timestamp"] = df["timestamp"].dt.floor('h')
# Keep only necessary columns
df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
# Rename column
df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
elif "Date_CY" in df.columns and "Hour" in df.columns and "Net_Sales_CY" in df.columns:
# Convert "Date_CY" and "Hour" columns into datetime format
df["timestamp"] = pd.to_datetime(df["Date_CY"]) + pd.to_timedelta(df["Hour"].astype(int), unit='h')
# Handle the case where hour is 24
df.loc[df["timestamp"].dt.hour == 24, "timestamp"] = df["timestamp"] + pd.DateOffset(days=1)
df["timestamp"] = df["timestamp"].dt.floor('h')
# Keep only necessary columns
df = df[["timestamp", "Net_Sales_CY"]]
# Rename column
df.rename(columns={"Net_Sales_CY": "value"}, inplace=True)
return df
else:
raise ValueError("Dataframe does not contain necessary columns.")
def master(file):
# read file
data = pd.read_csv(file.name)
# clean data
data = clean_data(data)
# Convert timestamp to datetime after cleaning
data['timestamp'] = pd.to_datetime(data['timestamp'])
data.set_index("timestamp", inplace=True)
# Check if data has enough records to create sequences
if len(data) < TIME_STEPS:
return "Not enough data to create sequences. Need at least {} records.".format(TIME_STEPS)
df_test_value = normalize_data(data)
# plot input test data
plot1 = plot_test_data(df_test_value)
# predict
anomalies = get_anomalies(df_test_value)
#plot anomalous data points
plot2 = plot_anomalies(df_test_value, data, anomalies)
return plot2
outputs = gr.outputs.Image()
iface = gr.Interface(
fn=master,
inputs=gr.inputs.File(label="CSV File"),
outputs=outputs,
examples=["art_daily_jumpsup.csv","labor_hourly_short.csv", "sales_hourly_short.csv"],
title="Timeseries Anomaly Detection Using an Autoencoder",
description="Anomaly detection of timeseries data."
)
iface.launch()
|