Zack commited on
Commit
73a3abc
·
1 Parent(s): aaf6ffd

feat: Add data cleaning method

Browse files
Files changed (2) hide show
  1. .idea/workspace.xml +95 -0
  2. app.py +19 -2
.idea/workspace.xml ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="AutoImportSettings">
4
+ <option name="autoReloadType" value="SELECTIVE" />
5
+ </component>
6
+ <component name="ChangeListManager">
7
+ <list default="true" id="72819ac5-9a78-4a8a-aab7-199c2790b503" name="Changes" comment="feat: Attempt to get time-series model to work with our data">
8
+ <change beforePath="$PROJECT_DIR$/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/app.py" afterDir="false" />
9
+ </list>
10
+ <option name="SHOW_DIALOG" value="false" />
11
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
12
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
13
+ <option name="LAST_RESOLUTION" value="IGNORE" />
14
+ </component>
15
+ <component name="ComposerSettings">
16
+ <execution />
17
+ </component>
18
+ <component name="Git.Settings">
19
+ <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
20
+ </component>
21
+ <component name="MarkdownSettingsMigration">
22
+ <option name="stateVersion" value="1" />
23
+ </component>
24
+ <component name="ProjectId" id="2TDHetankSPisemgNRZBEqcbHLu" />
25
+ <component name="ProjectViewState">
26
+ <option name="autoscrollFromSource" value="true" />
27
+ <option name="autoscrollToSource" value="true" />
28
+ <option name="hideEmptyMiddlePackages" value="true" />
29
+ <option name="showLibraryContents" value="true" />
30
+ </component>
31
+ <component name="PropertiesComponent">{
32
+ &quot;keyToString&quot;: {
33
+ &quot;RunOnceActivity.OpenProjectViewOnStart&quot;: &quot;true&quot;,
34
+ &quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
35
+ &quot;RunOnceActivity.laravel-idea.search-composer-json&quot;: &quot;true&quot;,
36
+ &quot;WebServerToolWindowFactoryState&quot;: &quot;false&quot;,
37
+ &quot;last_opened_file_path&quot;: &quot;//wsl$/Ubuntu/home/zack/code/timeseries-anomaly-detection-autoencoders&quot;,
38
+ &quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
39
+ &quot;node.js.detected.package.tslint&quot;: &quot;true&quot;,
40
+ &quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;,
41
+ &quot;node.js.selected.package.tslint&quot;: &quot;(autodetect)&quot;,
42
+ &quot;vue.rearranger.settings.migration&quot;: &quot;true&quot;
43
+ }
44
+ }</component>
45
+ <component name="RecentsManager">
46
+ <key name="CopyFile.RECENT_KEYS">
47
+ <recent name="\\wsl$\Ubuntu\home\zack\code\timeseries-anomaly-detection-autoencoders" />
48
+ </key>
49
+ <key name="MoveFile.RECENT_KEYS">
50
+ <recent name="\\wsl$\Ubuntu\home\zack\code\timeseries-anomaly-detection-autoencoders" />
51
+ </key>
52
+ </component>
53
+ <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="application-level" UseSingleDictionary="true" transferred="true" />
54
+ <component name="TaskManager">
55
+ <task active="true" id="Default" summary="Default task">
56
+ <changelist id="72819ac5-9a78-4a8a-aab7-199c2790b503" name="Changes" comment="" />
57
+ <created>1690570831325</created>
58
+ <option name="number" value="Default" />
59
+ <option name="presentableId" value="Default" />
60
+ <updated>1690570831325</updated>
61
+ <workItem from="1690570831453" duration="3175000" />
62
+ <workItem from="1690584980736" duration="631000" />
63
+ <workItem from="1690595464187" duration="1660000" />
64
+ <workItem from="1690814760676" duration="5257000" />
65
+ </task>
66
+ <task id="LOCAL-00001" summary="feat: Attempt to get time-series model to work with our data">
67
+ <option name="closed" value="true" />
68
+ <created>1690572412668</created>
69
+ <option name="number" value="00001" />
70
+ <option name="presentableId" value="LOCAL-00001" />
71
+ <option name="project" value="LOCAL" />
72
+ <updated>1690572412668</updated>
73
+ </task>
74
+ <option name="localTasksCounter" value="2" />
75
+ <servers />
76
+ </component>
77
+ <component name="TypeScriptGeneratedFilesManager">
78
+ <option name="version" value="3" />
79
+ </component>
80
+ <component name="Vcs.Log.Tabs.Properties">
81
+ <option name="TAB_STATES">
82
+ <map>
83
+ <entry key="MAIN">
84
+ <value>
85
+ <State />
86
+ </value>
87
+ </entry>
88
+ </map>
89
+ </option>
90
+ </component>
91
+ <component name="VcsManagerConfiguration">
92
+ <MESSAGE value="feat: Attempt to get time-series model to work with our data" />
93
+ <option name="LAST_COMMIT_MESSAGE" value="feat: Attempt to get time-series model to work with our data" />
94
+ </component>
95
+ </project>
app.py CHANGED
@@ -58,10 +58,27 @@ def plot_anomalies(df_test_value, data, anomalies):
58
  ax.set_ylabel("Value")
59
  ax.set_title("Anomalous Data Points")
60
  return fig
61
-
 
 
 
 
 
 
 
 
 
 
 
 
62
  def master(file):
63
  # read file
64
- data = pd.read_csv(file, parse_dates=True, index_col="timestamp")
 
 
 
 
 
65
  df_test_value = normalize_data(data)
66
  # plot input test data
67
  plot1 = plot_test_data(df_test_value)
 
58
  ax.set_ylabel("Value")
59
  ax.set_title("Anomalous Data Points")
60
  return fig
61
+
62
+ def clean_data(df):
63
+ # Convert "Date" and "Hour" columns into datetime format
64
+ df["timestamp"] = pd.to_datetime(df["Date"] + ' ' + df["Hour"].astype(str) + ":00:00")
65
+
66
+ # Keep only necessary columns
67
+ df = df[["timestamp", "Hourly_Labor_Hours_Total"]]
68
+
69
+ # Rename column
70
+ df.rename(columns={"Hourly_Labor_Hours_Total": "value"}, inplace=True)
71
+
72
+ return df
73
+
74
  def master(file):
75
  # read file
76
+ data = pd.read_csv(file)
77
+
78
+ # clean data
79
+ data = clean_data(data)
80
+ data.set_index("timestamp", inplace=True)
81
+
82
  df_test_value = normalize_data(data)
83
  # plot input test data
84
  plot1 = plot_test_data(df_test_value)