diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..2622016a3a31b297f75a504bbd39a77c7a4b746f Binary files /dev/null and b/.DS_Store differ diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..fce99e90624450e05cd60b7550e3ef48d1673a7e --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +Weather forecasting using machine learning diff --git a/training/.DS_Store b/training/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..c8bd0e4ab5306c9285117392c99c9c68852f2bec Binary files /dev/null and b/training/.DS_Store differ diff --git a/training/.dockerignore b/training/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..6b8710a711f3b689885aa5c26c6c06bde348e82b --- /dev/null +++ b/training/.dockerignore @@ -0,0 +1 @@ +.git diff --git a/training/.gitignore b/training/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..cdb93cd5ebbb0b464b7cec99bbc254d23aac6815 --- /dev/null +++ b/training/.gitignore @@ -0,0 +1 @@ +.python-version diff --git a/training/data/weather-data.tar.gz b/training/data/weather-data.tar.gz deleted file mode 100644 index b0118bee517bf49485360db0862365ce9150be5c..0000000000000000000000000000000000000000 --- a/training/data/weather-data.tar.gz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8b8096982477f4058aee79f5b4b41f8dacf9e5bcda5d97434ccbf2a573189e14 -size 38169226 diff --git a/training/src/.DS_Store b/training/src/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..a096ccd743256d79beb810ea5c459f1285dcb55e Binary files /dev/null and b/training/src/.DS_Store differ diff --git a/training/src/.ipynb_checkpoints/MSML-602-Final-Project-checkpoint.ipynb b/training/src/.ipynb_checkpoints/MSML-602-Final-Project-checkpoint.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..c02243cef9540eda141019040222c726ea8815f7 --- /dev/null +++ b/training/src/.ipynb_checkpoints/MSML-602-Final-Project-checkpoint.ipynb @@ -0,0 +1,1085 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 16, + "id": "fb7daf67-de7e-4626-a194-417aa210c959", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "!pip install keras tensorflow seaborn requests-cache" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c005096b-eaca-4244-998d-a92338d22902", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import sqlite3\n", + "import IPython\n", + "import IPython.display\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import seaborn as sns\n", + "import tensorflow as tf\n", + "import requests\n", + "import requests_cache\n", + "from requests_cache.backends.sqlite import SQLiteCache\n", + "import sqlite3\n", + "import datetime\n", + "from datetime import date, timedelta, timezone\n", + "import time\n", + "import pytz\n", + "\n", + "local_tz = pytz.timezone('America/New_York')\n", + "\n", + "\n", + "mpl.rcParams['figure.figsize'] = (8, 6)\n", + "mpl.rcParams['axes.grid'] = False\n", + "\n", + "# initialize cache\n", + "requests_cache.install_cache('./data/weather_api_cache')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "2e054aef-ce49-47a8-ba20-dc2e6928600e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Starting Date: 2022-12-07\n", + "--------------------------------\n", + "Already up to date!\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tempobs_timestamppressurewspd
050.02000-01-02 00:51:0030.22NaN
145.02000-01-02 01:51:0030.215.0
247.02000-01-02 02:51:0030.215.0
346.02000-01-02 03:51:0030.21NaN
447.02000-01-02 04:51:0030.205.0
\n", + "
" + ], + "text/plain": [ + " temp obs_timestamp pressure wspd\n", + "0 50.0 2000-01-02 00:51:00 30.22 NaN\n", + "1 45.0 2000-01-02 01:51:00 30.21 5.0\n", + "2 47.0 2000-01-02 02:51:00 30.21 5.0\n", + "3 46.0 2000-01-02 03:51:00 30.21 NaN\n", + "4 47.0 2000-01-02 04:51:00 30.20 5.0" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "# API_KEY = os.env.get(\"API_KEY\")\n", + "DATE_FORMAT = \"%Y-%m-%d %H:%M:%S\"\n", + "API_KEY = \"e1f10a1e78da46f5b10a1e78da96f525\"\n", + "BASE_URL = \"https://api.weather.com/v1/location/KDCA:9:US/observations/historical.json?apiKey={api_key}&units=e&startDate={start_date}&endDate={end_date}\"\n", + "\n", + "# Field descriptions here\n", + "# https://www.research-collection.ethz.ch/bitstream/handle/20.500.11850/511371/1/LapamonpinyoEtAl_engrXiv_2021.pdf \n", + "# Sample\n", + "# {\n", + "# 'key': 'KDCA', 'class': 'observation', 'expire_time_gmt': 946709460, 'obs_id': 'KDCA', 'obs_name': 'Washington/Natl', \n", + "# 'valid_time_gmt': 946702260, 'day_ind': 'N', 'temp': 41, 'wx_icon': 33, 'icon_extd': 3300, 'wx_phrase': 'Fair', 'pressure_tend': None,\n", + "# 'pressure_desc': None, 'dewPt': 34, 'heat_index': 41, 'rh': 76, 'pressure': 30.19, 'vis': 5, 'wc': 41, 'wdir': None, \n", + "# 'wdir_cardinal': 'CALM', 'gust': None, 'wspd': None, 'max_temp': 57, 'min_temp': 41, 'precip_total': None, \n", + "# 'precip_hrly': None, 'snow_hrly': None, 'uv_desc': 'Low', 'feels_like': 41, 'uv_index': 0, 'qualifier': None, 'qualifier_svrty': None,\n", + "# 'blunt_phrase': None, 'terse_phrase': None, 'clds': 'CLR', 'water_temp': None, 'primary_wave_period': None, 'primary_wave_height': None, \n", + "# 'primary_swell_period': None, 'primary_swell_height': None, 'primary_swell_direction': None, 'secondary_swell_period': None, \n", + "# 'secondary_swell_height': None, 'secondary_swell_direction': None\n", + "# }\n", + "\n", + "conn = sqlite3.connect(\"./data/weather-raw.db\")\n", + "cur = conn.cursor()\n", + "\n", + "\n", + "def create_weather_table(cur, table_name):\n", + " cur.execute(\"\"\"\n", + " CREATE TABLE IF NOT EXISTS {table_name}(\n", + " key, class, expire_time_gmt, obs_id, obs_name, valid_time_gmt INTEGER NOT NULL PRIMARY KEY, day_ind, temp, wx_icon, icon_extd, \n", + " wx_phrase, pressure_tend, pressure_desc, dewPt, heat_index, rh, pressure, vis, wc, wdir, wdir_cardinal, \n", + " gust, wspd, max_temp, min_temp, precip_total, precip_hourly, snow_hrly, uv_desc, feels_like, uv_index,\n", + " qualifier, qualifier_svrty, blunt_phrase, terse_phrase, clds, water_temp, primary_wave_period, \n", + " primary_wave_height, primary_swell_period, primary_swell_height, primary_swell_direction, \n", + " secondary_swell_period, secondary_swell_height, secondary_swell_direction, obs_timestamp)\n", + " \"\"\".format(table_name=table_name))\n", + " cur.execute(\"\"\"\n", + " CREATE INDEX idx_obs_timestamp ON weather_raw(obs_timestamp);\n", + " \"\"\")\n", + " cur.execute(\"\"\"\n", + " CREATE INDEX idx_obs_timestamp_month ON weather_raw(STRFTIME('%M', obs_timestamp));\n", + " \"\"\")\n", + " cur.execute(\"\"\"\n", + " CREATE INDEX idx_obs_timestamp_date ON weather_raw(STRFTIME('%Y-%m-%d', wr.obs_timestamp));\n", + " \"\"\")\n", + " \n", + "# Create tables for raw & cleaned data respectively (if they don't exist already)\n", + "create_weather_table(cur, \"weather_raw\")\n", + "\n", + "\n", + "# Get the latest date that data has been downloaded for and start downloading new data from that timestamp\n", + "cur.execute(\"SELECT MAX(obs_timestamp) FROM weather_raw\")\n", + "max_date = cur.fetchone()[0]\n", + "target_date = datetime.datetime.strptime(max_date, DATE_FORMAT).date() if max_date else date(2000, 1, 1)\n", + "\n", + "print(f\"Starting Date: {target_date}\") \n", + "print(\"--------------------------------\")\n", + "defaults = {\n", + " 'key': None,'class': None,'expire_time_gmt': None,'obs_id': None,'obs_name': None,'valid_time_gmt': None,\n", + " 'day_ind': None,'temp': None,'wx_icon': None,'icon_extd': None,'wx_phrase': None,'pressure_tend': None,\n", + " 'pressure_desc': None,'dewPt': None,'heat_index': None,'rh': None,'pressure': None,'vis': None,'wc': None,\n", + " 'wdir': None,'wdir_cardinal': None,'gust': None,'wspd': None,'max_temp': None,'min_temp': None,'precip_total': None,\n", + " 'precip_hrly': None,'snow_hrly': None,'uv_desc': None,'feels_like': None,'uv_index': None,'qualifier': None,\n", + " 'qualifier_svrty': None,'blunt_phrase': None,'terse_phrase': None,'clds': None,'water_temp': None,\n", + " 'primary_wave_period': None,'primary_wave_height': None,'primary_swell_period': None,'primary_swell_height': None,\n", + " 'primary_swell_direction': None,'secondary_swell_period': None,'secondary_swell_height': None,'secondary_swell_direction': None\n", + "}\n", + "\n", + "\n", + "INSERT_SQL = \"\"\"\n", + "INSERT OR IGNORE INTO weather_raw VALUES (\n", + " :key, :class, :expire_time_gmt, :obs_id, :obs_name, :valid_time_gmt, :day_ind, :temp, :wx_icon, :icon_extd, :wx_phrase,\n", + " :pressure_tend, :pressure_desc, :dewPt, :heat_index, :rh, :pressure, :vis, :wc, :wdir, :wdir_cardinal,\n", + " :gust, :wspd, :max_temp, :min_temp, :precip_total, :precip_hrly, :snow_hrly, :uv_desc, :feels_like, :uv_index,\n", + " :qualifier, :qualifier_svrty, :blunt_phrase, :terse_phrase, :clds, :water_temp, :primary_wave_period,\n", + " :primary_wave_height, :primary_swell_period, :primary_swell_height, :primary_swell_direction,\n", + " :secondary_swell_period, :secondary_swell_height, :secondary_swell_direction, :obs_timestamp\n", + ")\n", + "\"\"\"\n", + "\n", + "def augment_data(rec):\n", + " rec[\"obs_timestamp\"] = datetime.datetime.fromtimestamp(rec[\"valid_time_gmt\"]).strftime(DATE_FORMAT)\n", + " return rec\n", + "\n", + "today = datetime.date.today()\n", + "if target_date == today:\n", + " print(\"Already up to date!\")\n", + " \n", + "while target_date != today:\n", + " end_date = target_date + timedelta(days=1) \n", + " start_date_str = target_date.strftime(\"%Y%m%d\")\n", + " end_date_str = end_date.strftime(\"%Y%m%d\")\n", + " target_url = BASE_URL.format(api_key=API_KEY, start_date=start_date_str, end_date=start_date_str)\n", + " res = requests.get(target_url)\n", + " target_date = end_date\n", + "\n", + " data = res.json()\n", + " if not \"observations\" in data:\n", + " print(f\"Date: {target_date} NF\", end=\"\\r\")\n", + " continue\n", + " params = ({k: d.get(k, defaults[k]) for k in defaults} for d in data[\"observations\"])\n", + " params = list(map(augment_data, params))\n", + "\n", + " cur.executemany(INSERT_SQL, params)\n", + " conn.commit()\n", + " # time.sleep(0.05)\n", + " # was_cached = \"Cache HIT\" if res.from_cache else \"Cache MISS\"\n", + " print(f\"Date: {target_date} OK\", end=\"\\r\")\n", + " target_date = end_date\n", + "\n", + "SQL_CLEANED_DATA = \"\"\"\n", + " WITH RECURSIVE generate_series(x) AS (\n", + " SELECT 0\n", + " UNION ALL\n", + " SELECT x+1 FROM generate_series LIMIT 24\n", + " ), distinct_dates AS (\n", + " SELECT DISTINCT DATE(obs_timestamp) AS obs_date\n", + " FROM weather_raw wr \n", + " WHERE obs_timestamp >= '2000-01-01'\n", + " ), hours AS (\n", + " SELECT x AS hrs FROM generate_series\n", + " )\n", + " SELECT w.temp, w.obs_timestamp, w.pressure, w.wspd\n", + " FROM weather_raw w \n", + " JOIN (\n", + " SELECT d.obs_date, COUNT(*)\n", + " FROM distinct_dates d \n", + " CROSS JOIN hours h \n", + " INNER JOIN (SELECT * FROM weather_raw WHERE STRFTIME('%M', obs_timestamp) in ('51', '52')) wr \n", + " ON DATE(STRFTIME('%Y-%m-%d', wr.obs_timestamp)) = d.obs_date \n", + " AND CAST(STRFTIME('%H', obs_timestamp) AS INTEGER) = h.hrs\n", + " GROUP BY d.obs_date\n", + " HAVING COUNT(*) = 24\n", + " ) d ON d.obs_date = STRFTIME('%Y-%m-%d', w.obs_timestamp) \n", + " WHERE STRFTIME('%M', w.obs_timestamp) in ('51', '52');\n", + "\"\"\"\n", + "df = pd.read_sql(SQL_CLEANED_DATA, conn, parse_dates=[\"obs_timestamp\"]) \n", + "df.head() " + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "7233dd6b-241b-46c6-a206-184b33d43792", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
temppressurewspdday_sinday_cosyear_sinyear_cos
145.030.215.00.4656150.8849880.0137980.999905
247.030.215.00.6788010.7343230.0145140.999895
447.030.205.00.9550200.2965420.0159480.999873
544.030.193.00.9992290.0392600.0166640.999861
648.030.1812.00.975342-0.2206970.0173810.999849
\n", + "
" + ], + "text/plain": [ + " temp pressure wspd day_sin day_cos year_sin year_cos\n", + "1 45.0 30.21 5.0 0.465615 0.884988 0.013798 0.999905\n", + "2 47.0 30.21 5.0 0.678801 0.734323 0.014514 0.999895\n", + "4 47.0 30.20 5.0 0.955020 0.296542 0.015948 0.999873\n", + "5 44.0 30.19 3.0 0.999229 0.039260 0.016664 0.999861\n", + "6 48.0 30.18 12.0 0.975342 -0.220697 0.017381 0.999849" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def prepare_dataframe(_df):\n", + " _df = _df.astype(\n", + " {\n", + " 'temp': 'float',\n", + " 'pressure': 'float',\n", + " 'wspd': 'float'\n", + " },\n", + " )\n", + " _df = _df.dropna()\n", + " _df = _df.sort_values(by=['obs_timestamp'])\n", + " date_time = _df.pop('obs_timestamp')\n", + " timestamp_s = date_time.map(pd.Timestamp.timestamp)\n", + " day = 24*60*60\n", + " year = (365.2425)*day\n", + " \n", + " _df['day_sin'] = np.sin(timestamp_s * (2 * np.pi / day))\n", + " _df['day_cos'] = np.cos(timestamp_s * (2 * np.pi / day))\n", + " _df['year_sin'] = np.sin(timestamp_s * (2 * np.pi / year))\n", + " _df['year_cos'] = np.cos(timestamp_s * (2 * np.pi / year))\n", + "\n", + " \n", + " return _df, date_time\n", + "\n", + "# Field descriptions here\n", + "# https://www.research-collection.ethz.ch/bitstream/handle/20.500.11850/511371/1/LapamonpinyoEtAl_engrXiv_2021.pdf \n", + "df, date_time = prepare_dataframe(df)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "049cf91d-30f9-4c42-b705-fdfb48f0d1e2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_64/4039997715.py:7: FutureWarning: The behavior of `series[i:j]` with an integer-dtype index is deprecated. In a future version, this will be treated as *label-based* indexing, consistent with e.g. `series[i]` lookups. To retain the old behavior, use `series.iloc[i:j]`. To get the future behavior, use `series.loc[i:j]`.\n", + " plot_features.index = date_time[-480:]\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
countmeanstdmin25%50%75%max
temp179231.059.13872017.7687876.00000045.00000060.00000074.000000118.000000
pressure179231.030.0342280.21662328.61000029.90000030.03000030.17000030.860000
wspd179231.08.9847684.7451240.0000006.0000008.00000012.00000045.000000
day_sin179231.0-0.0150760.706559-0.999391-0.731354-0.0348990.6819980.999391
day_cos179231.00.0071890.707461-0.999391-0.6819980.0348990.7313540.999391
year_sin179231.00.0041240.709370-1.000000-0.7053280.0077870.7174891.000000
year_cos179231.0-0.0039870.704817-1.000000-0.708857-0.0049020.6969131.000000
\n", + "
" + ], + "text/plain": [ + " count mean std min 25% 50% \\\n", + "temp 179231.0 59.138720 17.768787 6.000000 45.000000 60.000000 \n", + "pressure 179231.0 30.034228 0.216623 28.610000 29.900000 30.030000 \n", + "wspd 179231.0 8.984768 4.745124 0.000000 6.000000 8.000000 \n", + "day_sin 179231.0 -0.015076 0.706559 -0.999391 -0.731354 -0.034899 \n", + "day_cos 179231.0 0.007189 0.707461 -0.999391 -0.681998 0.034899 \n", + "year_sin 179231.0 0.004124 0.709370 -1.000000 -0.705328 0.007787 \n", + "year_cos 179231.0 -0.003987 0.704817 -1.000000 -0.708857 -0.004902 \n", + "\n", + " 75% max \n", + "temp 74.000000 118.000000 \n", + "pressure 30.170000 30.860000 \n", + "wspd 12.000000 45.000000 \n", + "day_sin 0.681998 0.999391 \n", + "day_cos 0.731354 0.999391 \n", + "year_sin 0.717489 1.000000 \n", + "year_cos 0.696913 1.000000 " + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plot_cols = ['temp', 'pressure', 'wspd']\n", + "plot_features = df[plot_cols]\n", + "plot_features.index = date_time\n", + "_ = plot_features.plot(subplots=True)\n", + "\n", + "plot_features = df[plot_cols][-480:]\n", + "plot_features.index = date_time[-480:]\n", + "_ = plot_features.plot(subplots=True)\n", + "df.describe().transpose()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "f8fd563c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 1.0, 'Year signal')" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axes = plt.subplots(2, 1)\n", + "fig.tight_layout(h_pad=5)\n", + "axes[0].plot(np.array(df['day_sin'])[:150])\n", + "axes[0].plot(np.array(df['day_cos'])[:150])\n", + "axes[0].set_xlabel('Time [h]')\n", + "axes[0].set_title('Time of day signal')\n", + "\n", + "axes[1].plot(np.array(df['year_sin'])[:15000])\n", + "axes[1].plot(np.array(df['year_cos'])[:15000])\n", + "axes[1].set_xlabel('Time [h]')\n", + "axes[1].set_title('Year signal')" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "a56d8d86", + "metadata": {}, + "outputs": [], + "source": [ + "# Split data\n", + "column_indices = {name: i for i, name in enumerate(df.columns)}\n", + "\n", + "n = len(df)\n", + "train_df = df[0:int(n*0.7)]\n", + "val_df = df[int(n*0.7):int(n*0.9)]\n", + "test_df = df[int(n*0.9):]\n", + "\n", + "num_features = df.shape[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "d710186c-3f8c-4d35-842f-361508b2c781", + "metadata": {}, + "outputs": [], + "source": [ + "# Normalization\n", + "\n", + "train_mean = train_df.mean()\n", + "train_std = train_df.std()\n", + "\n", + "train_df = (train_df - train_mean) / train_std\n", + "val_df = (val_df - train_mean) / train_std\n", + "test_df = (test_df - train_mean) / train_std" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "1613147d-946e-4e5c-949c-1e30bd46adc6", + "metadata": {}, + "outputs": [], + "source": [ + "class WindowGenerator():\n", + " def __init__(self, input_width, label_width, shift,\n", + " train_df=train_df, val_df=val_df, test_df=test_df,\n", + " label_columns=\"temp\"):\n", + " # Store the raw data.\n", + " self.train_df = train_df\n", + " self.val_df = val_df\n", + " self.test_df = test_df\n", + "\n", + " # Work out the label column indices.\n", + " self.label_columns = label_columns\n", + " if label_columns is not None:\n", + " self.label_columns_indices = {name: i for i, name in\n", + " enumerate(label_columns)}\n", + " self.column_indices = {name: i for i, name in\n", + " enumerate(train_df.columns)}\n", + "\n", + " # Work out the window parameters.\n", + " self.input_width = input_width\n", + " self.label_width = label_width\n", + " self.shift = shift\n", + "\n", + " self.total_window_size = input_width + shift\n", + "\n", + " self.input_slice = slice(0, input_width)\n", + " self.input_indices = np.arange(self.total_window_size)[self.input_slice]\n", + "\n", + " self.label_start = self.total_window_size - self.label_width\n", + " self.labels_slice = slice(self.label_start, None)\n", + " self.label_indices = np.arange(self.total_window_size)[self.labels_slice]\n", + "\n", + " def split_window(self, features):\n", + " inputs = features[:, self.input_slice, :]\n", + " labels = features[:, self.labels_slice, :]\n", + " if self.label_columns is not None:\n", + " labels = tf.stack(\n", + " [labels[:, :, self.column_indices[name]] for name in self.label_columns],\n", + " axis=-1)\n", + "\n", + " # Slicing doesn't preserve static shape information, so set the shapes\n", + " # manually. This way the `tf.data.Datasets` are easier to inspect.\n", + " inputs.set_shape([None, self.input_width, None])\n", + " labels.set_shape([None, self.label_width, None])\n", + " return inputs, labels\n", + "\n", + " def plot(self, model=None, plot_col='temp', max_subplots=3):\n", + " inputs, labels = self.example\n", + " plt.figure(figsize=(12, 8))\n", + " plot_col_index = self.column_indices[plot_col]\n", + " max_n = min(max_subplots, len(inputs))\n", + " for n in range(max_n):\n", + " plt.subplot(max_n, 1, n+1)\n", + " plt.ylabel(f'{plot_col} [normed]')\n", + " plt.plot(self.input_indices, inputs[n, :, plot_col_index],\n", + " label='Inputs', marker='.', zorder=-10)\n", + "\n", + " if self.label_columns:\n", + " label_col_index = self.label_columns_indices.get(plot_col, None)\n", + " else:\n", + " label_col_index = plot_col_index\n", + "\n", + " if label_col_index is None:\n", + " continue\n", + "\n", + " plt.scatter(self.label_indices, labels[n, :, label_col_index],\n", + " edgecolors='k', label='Labels', c='#2ca02c', s=64)\n", + " if model is not None:\n", + " predictions = model(inputs)\n", + " plt.scatter(self.label_indices, predictions[n, :, label_col_index],\n", + " marker='X', edgecolors='k', label='Predictions',\n", + " c='#ff7f0e', s=64)\n", + "\n", + " if n == 0:\n", + " plt.legend()\n", + "\n", + " plt.xlabel('Time [h]')\n", + "\n", + " def make_dataset(self, data):\n", + " data = np.array(data, dtype=np.float32)\n", + " ds = tf.keras.utils.timeseries_dataset_from_array(\n", + " data=data,\n", + " targets=None,\n", + " sequence_length=self.total_window_size,\n", + " sequence_stride=1,\n", + " shuffle=False,\n", + " batch_size=32,)\n", + "\n", + " ds = ds.map(self.split_window)\n", + "\n", + " return ds\n", + "\n", + " @property\n", + " def train(self):\n", + " return self.make_dataset(self.train_df)\n", + "\n", + " @property\n", + " def val(self):\n", + " return self.make_dataset(self.val_df)\n", + "\n", + " @property\n", + " def test(self):\n", + " return self.make_dataset(self.test_df)\n", + "\n", + " @property\n", + " def example(self):\n", + " \"\"\"Get and cache an example batch of `inputs, labels` for plotting.\"\"\"\n", + " result = getattr(self, '_example', None)\n", + " if result is None:\n", + " # No example batch was found, so get one from the `.train` dataset\n", + " result = next(iter(self.train))\n", + " # And cache it for next time\n", + " self._example = result\n", + " return result\n", + "\n", + " def __repr__(self):\n", + " return '\\n'.join([\n", + " f'Total window size: {self.total_window_size}',\n", + " f'Input indices: {self.input_indices}',\n", + " f'Label indices: {self.label_indices}',\n", + " f'Label column name(s): {self.label_columns}'])\n", + "\n", + "MAX_EPOCHS = 30\n", + "\n", + "def compile_and_fit(model, window, patience=2, epochs=MAX_EPOCHS):\n", + " early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',\n", + " patience=patience,\n", + " mode='min')\n", + "\n", + " model.compile(\n", + " loss=tf.keras.losses.MeanSquaredError(),\n", + " optimizer=tf.keras.optimizers.Adam(),\n", + " metrics=[tf.keras.metrics.MeanAbsoluteError()]\n", + " )\n", + " \n", + " history = model.fit(window.train, epochs=epochs,\n", + " validation_data=window.val,\n", + " callbacks=[early_stopping])\n", + " return history" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "82b6ec85-5684-49b1-9a86-348f8d465f4f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Total window size: 192\n", + "Input indices: [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17\n", + " 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35\n", + " 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53\n", + " 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71\n", + " 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89\n", + " 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107\n", + " 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125\n", + " 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143\n", + " 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161\n", + " 162 163 164 165 166 167]\n", + "Label indices: [168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185\n", + " 186 187 188 189 190 191]\n", + "Label column name(s): ['temp']" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "OUT_STEPS = 24\n", + "multi_window = WindowGenerator(\n", + " input_width=168, label_width=OUT_STEPS, shift=OUT_STEPS,\n", + " label_columns=['temp']\n", + ")\n", + "multi_window.plot()\n", + "multi_window" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "6279e34f-b48e-420d-8d44-48d11353ebf7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1115/1115 [==============================] - 4s 4ms/step - loss: 2.1419 - mean_absolute_error: 1.1699\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "class MultiStepLastBaseline(tf.keras.Model):\n", + " def call(self, inputs):\n", + " return tf.tile(inputs[:, -1:, :], [1, OUT_STEPS, 1])\n", + "\n", + "last_baseline = MultiStepLastBaseline()\n", + "last_baseline.compile(loss=tf.keras.losses.MeanSquaredError(),\n", + " metrics=[tf.keras.metrics.MeanAbsoluteError()])\n", + "\n", + "multi_val_performance = {}\n", + "multi_performance = {}\n", + "\n", + "multi_val_performance['Last'] = last_baseline.evaluate(multi_window.val)\n", + "multi_performance['Last'] = last_baseline.evaluate(multi_window.test, verbose=0)\n", + "multi_window.plot(last_baseline)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "e8f4aa02-1978-4fa3-b23a-d731594d8cd8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1119/1119 [==============================] - 6s 5ms/step - loss: 2.1284 - mean_absolute_error: 1.1630\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "repeat_window = WindowGenerator(\n", + " input_width=OUT_STEPS, label_width=OUT_STEPS, shift=OUT_STEPS,\n", + " label_columns=['temp']\n", + ")\n", + "\n", + "class RepeatBaseline(tf.keras.Model):\n", + " def call(self, inputs):\n", + " return inputs\n", + "\n", + "repeat_baseline = RepeatBaseline()\n", + "repeat_baseline.compile(loss=tf.keras.losses.MeanSquaredError(),\n", + " metrics=[tf.keras.metrics.MeanAbsoluteError()])\n", + "\n", + "multi_val_performance['Repeat'] = repeat_baseline.evaluate(repeat_window.val)\n", + "multi_performance['Repeat'] = repeat_baseline.evaluate(repeat_window.test, verbose=0)\n", + "repeat_window.plot(repeat_baseline)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9279cea8-0f0f-44a9-a255-2d4cb1e56fd1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 1/30\n", + "3915/3915 [==============================] - 333s 84ms/step - loss: 0.1327 - mean_absolute_error: 0.2702 - val_loss: 0.0802 - val_mean_absolute_error: 0.2103\n", + "Epoch 2/30\n", + "3915/3915 [==============================] - 348s 89ms/step - loss: 0.0916 - mean_absolute_error: 0.2263 - val_loss: 0.0747 - val_mean_absolute_error: 0.2001\n", + "Epoch 3/30\n", + "3915/3915 [==============================] - 325s 83ms/step - loss: 0.0883 - mean_absolute_error: 0.2206 - val_loss: 0.0724 - val_mean_absolute_error: 0.1961\n", + "Epoch 4/30\n", + "3915/3915 [==============================] - 340s 87ms/step - loss: 0.0865 - mean_absolute_error: 0.2175 - val_loss: 0.0715 - val_mean_absolute_error: 0.1940\n", + "Epoch 5/30\n", + "3915/3915 [==============================] - 350s 89ms/step - loss: 0.0853 - mean_absolute_error: 0.2154 - val_loss: 0.0713 - val_mean_absolute_error: 0.1936\n", + "Epoch 6/30\n", + "3915/3915 [==============================] - 343s 88ms/step - loss: 0.0844 - mean_absolute_error: 0.2139 - val_loss: 0.0708 - val_mean_absolute_error: 0.1925\n", + "Epoch 7/30\n", + "3707/3915 [===========================>..] - ETA: 16s - loss: 0.0839 - mean_absolute_error: 0.2131" + ] + } + ], + "source": [ + "multi_lstm_model = tf.keras.Sequential([\n", + " # Shape [batch, time, features] => [batch, lstm_units].\n", + " # Adding more `lstm_units` just overfits more quickly.\n", + " tf.keras.layers.LSTM(32, return_sequences=False),\n", + " # Shape => [batch, out_steps*features].\n", + " tf.keras.layers.Dense(OUT_STEPS,\n", + " kernel_initializer=tf.initializers.zeros()),\n", + " tf.keras.layers.Reshape([OUT_STEPS, 1])\n", + "])\n", + "\n", + "history = compile_and_fit(multi_lstm_model, multi_window)\n", + "\n", + "IPython.display.clear_output()\n", + "\n", + "multi_val_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.val)\n", + "multi_performance['LSTM'] = multi_lstm_model.evaluate(multi_window.test, verbose=0)\n", + "multi_window.plot(multi_lstm_model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4388d821-d3d2-4dc6-9906-b755897522ef", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd23a70d-a219-4953-8754-63a209566f40", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10d24123-9822-4d13-a49f-40e1d0a4b572", + "metadata": {}, + "outputs": [], + "source": [ + "# Comparing the forecasts with the actual values\n", + "yhat = [x[0] for x in model.predict(Xval)]\n", + "y = [y[0] for y in Yval]\n", + "# Creating the frame to store both predictions\n", + "days = df['obs_timestamp'].values[-len(y):]\n", + "frame = pd.concat([\n", + " pd.DataFrame({'day': days, 'temp': y, 'type': 'original'}),\n", + " pd.DataFrame({'day': days, 'temp': yhat, 'type': 'forecast'})\n", + "])\n", + "# Creating the unscaled values column\n", + "frame['temp_absolute'] = [(x * train_std['temp']) + train_mean['temp'] for x in frame['temp']]\n", + "# Pivoting\n", + "pivoted = frame.pivot_table(index='day', columns='type')\n", + "pivoted.columns = ['_'.join(x).strip() for x in pivoted.columns.values]\n", + "pivoted['res'] = pivoted['temp_absolute_original'] - pivoted['temp_absolute_forecast']\n", + "pivoted['res_abs'] = [abs(x) for x in pivoted['res']]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bc48d05-c104-463e-a702-42cd545b55d5", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "vscode": { + "interpreter": { + "hash": "015e5ba16a4bb6c2d4406ff94ae7629b76540da55dd7d929cadb2b51e4d9f31f" + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/training/src/.ipynb_checkpoints/MSML-602-HW1-Q4-checkpoint.ipynb b/training/src/.ipynb_checkpoints/MSML-602-HW1-Q4-checkpoint.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..8d9fb3593cc439e1399390c03279943df06f24d0 --- /dev/null +++ b/training/src/.ipynb_checkpoints/MSML-602-HW1-Q4-checkpoint.ipynb @@ -0,0 +1,283 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 96, + "id": "d10fb797-f9d4-40c7-ba48-08e4ece334f9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Data for sum\n", + " x P(X) x * P(X)\n", + "0 10 0.125000 1.250000\n", + "1 11 0.125000 1.375000\n", + "2 9 0.115741 1.041667\n", + "3 12 0.115741 1.388889\n", + "4 8 0.097222 0.777778\n", + "5 13 0.097222 1.263889\n", + "6 7 0.069444 0.486111\n", + "7 14 0.069444 0.972222\n", + "8 6 0.046296 0.277778\n", + "9 15 0.046296 0.694444\n", + "10 5 0.027778 0.138889\n", + "11 16 0.027778 0.444444\n", + "12 4 0.013889 0.055556\n", + "13 17 0.013889 0.236111\n", + "14 3 0.004630 0.013889\n", + "15 18 0.004630 0.083333\n", + "\n", + "Expectation for sum: 10.5\n", + "\n", + "Data for product\n", + " x P(X) x * P(X)\n", + "0 12 0.069444 0.833333\n", + "1 24 0.069444 1.666667\n", + "2 30 0.055556 1.666667\n", + "3 60 0.055556 3.333333\n", + "4 36 0.055556 2.000000\n", + "5 18 0.041667 0.750000\n", + "6 72 0.041667 3.000000\n", + "7 6 0.041667 0.250000\n", + "8 48 0.041667 2.000000\n", + "9 20 0.041667 0.833333\n", + "10 8 0.032407 0.259259\n", + "11 90 0.027778 2.500000\n", + "12 40 0.027778 1.111111\n", + "13 16 0.027778 0.444444\n", + "14 10 0.027778 0.277778\n", + "15 4 0.027778 0.111111\n", + "16 15 0.027778 0.416667\n", + "17 120 0.027778 3.333333\n", + "18 32 0.013889 0.444444\n", + "19 25 0.013889 0.347222\n", + "20 108 0.013889 1.500000\n", + "21 100 0.013889 1.388889\n", + "22 96 0.013889 1.333333\n", + "23 3 0.013889 0.041667\n", + "24 80 0.013889 1.111111\n", + "25 75 0.013889 1.041667\n", + "26 150 0.013889 2.083333\n", + "27 144 0.013889 2.000000\n", + "28 5 0.013889 0.069444\n", + "29 180 0.013889 2.500000\n", + "30 50 0.013889 0.694444\n", + "31 9 0.013889 0.125000\n", + "32 45 0.013889 0.625000\n", + "33 2 0.013889 0.027778\n", + "34 54 0.013889 0.750000\n", + "35 1 0.004630 0.004630\n", + "36 125 0.004630 0.578704\n", + "37 64 0.004630 0.296296\n", + "38 27 0.004630 0.125000\n", + "39 216 0.004630 1.000000\n", + "Expectation for product 42.875\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + " \n", + "dice1 = [1, 2, 3, 4, 5, 6]\n", + "dice2 = [1, 2, 3, 4, 5, 6]\n", + "dice3 = [1, 2, 3, 4, 5, 6]\n", + "\n", + "\n", + "data = [[d1, d2, d3, d1 + d2 + d3, d1 * d2 * d3] for d1 in dice1 for d2 in dice2 for d3 in dice3]\n", + "\n", + "df = pd.DataFrame(data, columns=[\"dice1\", \"dice2\", \"dice3\", \"sum_of_dots\", \"product_of_dots\"])\n", + "total_count = len(dice1) * len(dice2) * len(dice3)\n", + "\n", + "# Expectation of sum of number of dots on the three rolls\n", + "sum_pmf = [(x[0], count_x / (total_count * 1.0)) for x, count_x in df.value_counts([\"sum_of_dots\"]).items()]\n", + "sum_pmf_df = pd.DataFrame(sum_pmf, columns=[\"x\", \"P(X)\"])\n", + "sum_pmf_df[\"x * P(X)\"] = sum_pmf_df[\"x\"] * sum_pmf_df[\"P(X)\"] \n", + "expectation_sum = sum_pmf_df[\"x * P(X)\"].sum()\n", + "\n", + "# Expectation of product of number of dots on the three rolls\n", + "product_pmf = [(x[0], count_x / (total_count * 1.0)) for x, count_x in df.value_counts([\"product_of_dots\"]).items()]\n", + "product_pmf_df = pd.DataFrame(product_pmf, columns=[\"x\", \"P(X)\"])\n", + "product_pmf_df[\"x * P(X)\"] = product_pmf_df[\"x\"] * product_pmf_df[\"P(X)\"] \n", + "expectation_product = product_pmf_df[\"x * P(X)\"].sum()\n", + "\n", + "print(\"\\nData for sum\")\n", + "print(sum_pmf_df)\n", + "print(\"\\nExpectation for sum: {}\".format(expectation_sum))\n", + "\n", + "print(\"\\nData for product\")\n", + "print(product_pmf_df)\n", + "print(\"Expectation for product {}\".format(expectation_product))" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "301ba7da-d523-48eb-8a6d-25e8d6fa5ffe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
xP(X)
0100.125000
1110.125000
290.115741
3120.115741
480.097222
\n", + "
" + ], + "text/plain": [ + " x P(X)\n", + "0 10 0.125000\n", + "1 11 0.125000\n", + "2 9 0.115741\n", + "3 12 0.115741\n", + "4 8 0.097222" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "42954464-bab8-44fc-9087-8c4f07bac679", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "4de2391b-e130-405b-bc26-b04584b2ed9c", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "56b9fd02-79ec-4c7d-9eca-17c04a7df44d", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "09e2316e-e242-48ab-9803-ef9e9212ea94", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "45a28a91-dc74-469b-8a1c-caa43ed0b907", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2268\n" + ] + } + ], + "source": [ + "print(expectation)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4b63417-d541-44d1-98ad-2027865e1818", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/training/src/.ipynb_checkpoints/MSML-602-HW1-checkpoint.ipynb b/training/src/.ipynb_checkpoints/MSML-602-HW1-checkpoint.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..c9a90596744e018c8ebbbfbeca060a6dcbdd781b --- /dev/null +++ b/training/src/.ipynb_checkpoints/MSML-602-HW1-checkpoint.ipynb @@ -0,0 +1,1114 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 13, + "id": "90b406fa-994c-4ce8-9d8d-2cf7e9358915", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Data for sum\n", + " x P(X) x * P(X)\n", + "0 10 0.125000 1.250000\n", + "1 11 0.125000 1.375000\n", + "2 9 0.115741 1.041667\n", + "3 12 0.115741 1.388889\n", + "4 8 0.097222 0.777778\n", + "5 13 0.097222 1.263889\n", + "6 7 0.069444 0.486111\n", + "7 14 0.069444 0.972222\n", + "8 6 0.046296 0.277778\n", + "9 15 0.046296 0.694444\n", + "10 5 0.027778 0.138889\n", + "11 16 0.027778 0.444444\n", + "12 4 0.013889 0.055556\n", + "13 17 0.013889 0.236111\n", + "14 3 0.004630 0.013889\n", + "15 18 0.004630 0.083333\n", + "\n", + "Expectation for sum: 10.5\n", + "\n", + "Data for product\n", + " x P(X) x * P(X)\n", + "0 12 0.069444 0.833333\n", + "1 24 0.069444 1.666667\n", + "2 30 0.055556 1.666667\n", + "3 60 0.055556 3.333333\n", + "4 36 0.055556 2.000000\n", + "5 18 0.041667 0.750000\n", + "6 72 0.041667 3.000000\n", + "7 6 0.041667 0.250000\n", + "8 48 0.041667 2.000000\n", + "9 20 0.041667 0.833333\n", + "10 8 0.032407 0.259259\n", + "11 90 0.027778 2.500000\n", + "12 40 0.027778 1.111111\n", + "13 16 0.027778 0.444444\n", + "14 10 0.027778 0.277778\n", + "15 4 0.027778 0.111111\n", + "16 15 0.027778 0.416667\n", + "17 120 0.027778 3.333333\n", + "18 32 0.013889 0.444444\n", + "19 25 0.013889 0.347222\n", + "20 108 0.013889 1.500000\n", + "21 100 0.013889 1.388889\n", + "22 96 0.013889 1.333333\n", + "23 3 0.013889 0.041667\n", + "24 80 0.013889 1.111111\n", + "25 75 0.013889 1.041667\n", + "26 150 0.013889 2.083333\n", + "27 144 0.013889 2.000000\n", + "28 5 0.013889 0.069444\n", + "29 180 0.013889 2.500000\n", + "30 50 0.013889 0.694444\n", + "31 9 0.013889 0.125000\n", + "32 45 0.013889 0.625000\n", + "33 2 0.013889 0.027778\n", + "34 54 0.013889 0.750000\n", + "35 1 0.004630 0.004630\n", + "36 125 0.004630 0.578704\n", + "37 64 0.004630 0.296296\n", + "38 27 0.004630 0.125000\n", + "39 216 0.004630 1.000000\n", + "\n", + " Expectation for product 42.875\n" + ] + } + ], + "source": [ + "# Problem 4\n", + "\n", + "import pandas as pd\n", + " \n", + "dice1 = [1, 2, 3, 4, 5, 6]\n", + "dice2 = [1, 2, 3, 4, 5, 6]\n", + "dice3 = [1, 2, 3, 4, 5, 6]\n", + "\n", + "\n", + "data = [[d1, d2, d3, d1 + d2 + d3, d1 * d2 * d3] for d1 in dice1 for d2 in dice2 for d3 in dice3]\n", + "\n", + "df = pd.DataFrame(data, columns=[\"dice1\", \"dice2\", \"dice3\", \"sum_of_dots\", \"product_of_dots\"])\n", + "total_count = len(dice1) * len(dice2) * len(dice3)\n", + "\n", + "# Expectation of sum of number of dots on the three rolls\n", + "sum_pmf = [(x[0], count_x / (total_count * 1.0)) for x, count_x in df.value_counts([\"sum_of_dots\"]).items()]\n", + "sum_pmf_df = pd.DataFrame(sum_pmf, columns=[\"x\", \"P(X)\"])\n", + "sum_pmf_df[\"x * P(X)\"] = sum_pmf_df[\"x\"] * sum_pmf_df[\"P(X)\"] \n", + "expectation_sum = sum_pmf_df[\"x * P(X)\"].sum()\n", + "\n", + "# Expectation of product of number of dots on the three rolls\n", + "product_pmf = [(x[0], count_x / (total_count * 1.0)) for x, count_x in df.value_counts([\"product_of_dots\"]).items()]\n", + "product_pmf_df = pd.DataFrame(product_pmf, columns=[\"x\", \"P(X)\"])\n", + "product_pmf_df[\"x * P(X)\"] = product_pmf_df[\"x\"] * product_pmf_df[\"P(X)\"] \n", + "expectation_product = product_pmf_df[\"x * P(X)\"].sum()\n", + "\n", + "print(\"\\nData for sum\")\n", + "print(sum_pmf_df)\n", + "print(\"\\nExpectation for sum: {}\".format(expectation_sum))\n", + "\n", + "print(\"\\nData for product\")\n", + "print(product_pmf_df)\n", + "print(\"\\n Expectation for product {}\".format(expectation_product))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "74441428-8bb6-4f30-8f48-52feafa735d6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Welcome to the CBC MILP Solver \n", + "Version: 2.10.3 \n", + "Build Date: Dec 15 2019 \n", + "\n", + "command line - /opt/conda/lib/python3.9/site-packages/pulp/apis/../solverdir/cbc/linux/64/cbc /tmp/b6c9e0371f914493bd8e01c99d488dc1-pulp.mps max timeMode elapsed branch printingOptions all solution /tmp/b6c9e0371f914493bd8e01c99d488dc1-pulp.sol (default strategy 1)\n", + "At line 2 NAME MODEL\n", + "At line 3 ROWS\n", + "At line 9 COLUMNS\n", + "At line 20 RHS\n", + "At line 25 BOUNDS\n", + "At line 29 ENDATA\n", + "Problem MODEL has 4 rows, 3 columns and 7 elements\n", + "Coin0008I MODEL read with 0 errors\n", + "Option for timeMode changed from cpu to elapsed\n", + "Presolve 0 (-4) rows, 0 (-3) columns and 0 (-7) elements\n", + "Empty problem - 0 rows, 0 columns and 0 elements\n", + "Optimal - objective value 148.4\n", + "After Postsolve, objective 148.4, infeasibilities - dual 0 (0), primal 0 (0)\n", + "Optimal objective 148.4 - 0 iterations time 0.002, Presolve 0.00\n", + "Option for printingOptions changed from normal to all\n", + "Total time (CPU seconds): 0.00 (Wallclock seconds): 0.00\n", + "\n", + "Solution:\n" + ] + }, + { + "data": { + "text/html": [ + "
VariableValue
X8.6
Y8.4
Z2.6
Objective148.4
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "## Problem 5\n", + "from pulp import *\n", + "from IPython.display import HTML, display\n", + "\n", + "def display_table(table):\n", + " display(HTML(\n", + " '{}
'.format(\n", + " ''.join(\n", + " '{}'.format(''.join(str(_) for _ in row)) for row in table)\n", + " )\n", + " ))\n", + " \n", + "problem = LpProblem('MSML_602_PCS2_HW1_Q5', LpMaximize)\n", + "\n", + "X = LpVariable('X', cat='Continuous')\n", + "Y = LpVariable('Y', cat='Continuous')\n", + "Z = LpVariable('Z', cat='Continuous')\n", + "\n", + "problem += 15 * X + 2 * Y + Z, \"Objective Function\"\n", + "problem += X <= 10, \"Constraint X\"\n", + "problem += X + Y <= 17, \"Constraint X, Y\"\n", + "problem += 2 * X + 3 * Z <= 25, \"Constraint X, Z\"\n", + "problem += Y + Z >= 11, \"Constraint Y, Z\"\n", + "\n", + "problem.solve()\n", + "print(\"Solution:\")\n", + "\n", + "data = [[\"Variable\", \"Value\"]] + [[v.name, v.varValue] for v in problem.variables()]\n", + "data += [[\"Objective\", problem.objective.value()]]\n", + "\n", + "display_table(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "96979ae9-7c40-41e9-bef1-ec1ef9e89e9e", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Welcome to the CBC MILP Solver \n", + "Version: 2.10.3 \n", + "Build Date: Dec 15 2019 \n", + "\n", + "command line - /opt/conda/lib/python3.9/site-packages/pulp/apis/../solverdir/cbc/linux/64/cbc /tmp/7efaabba2ec74d7a9fa959ed5c88312d-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/7efaabba2ec74d7a9fa959ed5c88312d-pulp.sol (default strategy 1)\n", + "At line 2 NAME MODEL\n", + "At line 3 ROWS\n", + "At line 10 COLUMNS\n", + "At line 65 RHS\n", + "At line 71 BOUNDS\n", + "At line 84 ENDATA\n", + "Problem MODEL has 5 rows, 12 columns and 18 elements\n", + "Coin0008I MODEL read with 0 errors\n", + "Option for timeMode changed from cpu to elapsed\n", + "Continuous objective value is 19 - 0.00 seconds\n", + "Cgl0004I processed model has 5 rows, 11 columns (11 integer (11 of which binary)) and 18 elements\n", + "Cutoff increment increased from 1e-05 to 0.9999\n", + "Cbc0038I Initial state - 0 integers unsatisfied sum - 0\n", + "Cbc0038I Solution found of 19\n", + "Cbc0038I Before mini branch and bound, 11 integers at bound fixed and 0 continuous\n", + "Cbc0038I Mini branch and bound did not improve solution (0.00 seconds)\n", + "Cbc0038I After 0.00 seconds - Feasibility pump exiting with objective of 19 - took 0.00 seconds\n", + "Cbc0012I Integer solution of 19 found by feasibility pump after 0 iterations and 0 nodes (0.00 seconds)\n", + "Cbc0001I Search completed - best objective 19, took 0 iterations and 0 nodes (0.00 seconds)\n", + "Cbc0035I Maximum depth 0, 0 variables fixed on reduced cost\n", + "Cuts at root node changed objective from 19 to 19\n", + "Probing was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "Gomory was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "Knapsack was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "Clique was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "MixedIntegerRounding2 was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "FlowCover was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "TwoMirCuts was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "ZeroHalf was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "\n", + "Result - Optimal solution found\n", + "\n", + "Objective value: 19.00000000\n", + "Enumerated nodes: 0\n", + "Total iterations: 0\n", + "Time (CPU seconds): 0.00\n", + "Time (Wallclock seconds): 0.00\n", + "\n", + "Option for printingOptions changed from normal to all\n", + "Total time (CPU seconds): 0.00 (Wallclock seconds): 0.00\n", + "\n", + "Shortest distance from v1 to v5 = 19.0\n", + "['v1->v3', 'v3->v4', 'v4->v5']\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/pulp/pulp.py:1352: UserWarning: Spaces are not permitted in the name. Converted to '_'\n", + " warnings.warn(\"Spaces are not permitted in the name. Converted to '_'\")\n" + ] + } + ], + "source": [ + "# HW1 Problem 6\n", + "\n", + "from pulp import *\n", + "import networkx as nx\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "\n", + "# I assumed the following arbitrary graph\n", + "G = nx.Graph()\n", + "G.add_edge(\"v1\", \"v2\", weight=12)\n", + "G.add_edge(\"v1\", \"v3\", weight=5)\n", + "G.add_edge(\"v1\", \"v5\", weight=25)\n", + "G.add_edge(\"v2\", \"v5\", weight=10)\n", + "G.add_edge(\"v3\", \"v4\", weight=6)\n", + "G.add_edge(\"v4\", \"v5\", weight=8)\n", + "\n", + "# I am finding the shortest path from vertex 1 to vertex 5\n", + "source = \"v1\"\n", + "target = \"v5\"\n", + "\n", + "elarge = [(u, v) for (u, v, d) in G.edges(data=True) if d[\"weight\"] > 0.5]\n", + "esmall = [(u, v) for (u, v, d) in G.edges(data=True) if d[\"weight\"] <= 0.5]\n", + "\n", + "pos = nx.spring_layout(G, seed=7) \n", + "\n", + "# nodes\n", + "nx.draw_networkx_nodes(G, pos, node_size=700)\n", + "\n", + "# edges\n", + "nx.draw_networkx_edges(G, pos, edgelist=elarge, width=6)\n", + "nx.draw_networkx_edges(\n", + " G, pos, edgelist=esmall, width=6, alpha=0.5, edge_color=\"b\", style=\"dashed\"\n", + ")\n", + "\n", + "# node labels\n", + "nx.draw_networkx_labels(G, pos, font_size=20, font_family=\"sans-serif\")\n", + "# edge weight labels\n", + "edge_labels = nx.get_edge_attributes(G, \"weight\")\n", + "nx.draw_networkx_edge_labels(G, pos, edge_labels)\n", + "\n", + "ax = plt.gca()\n", + "ax.margins(0.08)\n", + "plt.axis(\"off\")\n", + "plt.tight_layout()\n", + "plt.show()\n", + "\n", + "\n", + "prob = pulp.LpProblem(\"Shortest Path Problem\", LpMinimize)\n", + "cost = nx.get_edge_attributes(G, \"weight\")\n", + "target_vars = {}\n", + "\n", + "for i, j in G.edges:\n", + " x = LpVariable(\"x_{0}_{1}\".format(i,j), cat=\"Binary\")\n", + " y = LpVariable(\"x_{0}_{1}\".format(j, i), cat=\"Binary\")\n", + " target_vars[i, j] = x\n", + " target_vars[j, i] = y\n", + "\n", + "prob += lpSum([cost[i, j] * target_vars[i, j] for i, j in G.edges] + [cost[i, j] * target_vars[j, i] for i, j in G.edges]), \"Objective function\"\n", + "\n", + "for node in G.nodes:\n", + " if node == source:\n", + " prob += pulp.lpSum([target_vars[i, j] for i, j in target_vars if i == node]) == 1\n", + " elif node == target:\n", + " prob += pulp.lpSum([target_vars[i, j] for i, j in target_vars if j == node]) == 1\n", + " else:\n", + " prob += pulp.lpSum([target_vars[i, j] for i, j in target_vars if i == node]) - pulp.lpSum([target_vars[i, j] for i, j in target_vars if j == node]) == 0\n", + "\n", + "prob.solve()\n", + "print(\"Shortest distance from {0} to {1} = \".format(source, target), value(prob.objective))\n", + "\n", + "chosen_vars = list(filter(lambda v: v.varValue > 0, prob.variables()))\n", + "routes = list(map(lambda x: x.name.replace(\"x_\", \"\").replace(\"_\", \"->\"), chosen_vars))\n", + "print(routes)\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "dc74b8c2-e352-4284-b9f6-c936d1a7604d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('job1', 'cpu1'), ('job1', 'cpu2'), ('job1', 'cpu3'), ('job1', 'cpu4'), ('job2', 'cpu1'), ('job2', 'cpu2'), ('job2', 'cpu3'), ('job2', 'cpu4'), ('job3', 'cpu1'), ('job3', 'cpu2'), ('job3', 'cpu3'), ('job3', 'cpu4'), ('job4', 'cpu1'), ('job4', 'cpu2'), ('job4', 'cpu3'), ('job4', 'cpu4')]\n", + "Welcome to the CBC MILP Solver \n", + "Version: 2.10.3 \n", + "Build Date: Dec 15 2019 \n", + "\n", + "command line - /opt/conda/lib/python3.9/site-packages/pulp/apis/../solverdir/cbc/linux/64/cbc /tmp/c85182008d6145a5a1478dcbce27ffa7-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/c85182008d6145a5a1478dcbce27ffa7-pulp.sol (default strategy 1)\n", + "At line 2 NAME MODEL\n", + "At line 3 ROWS\n", + "At line 13 COLUMNS\n", + "At line 94 RHS\n", + "At line 103 BOUNDS\n", + "At line 120 ENDATA\n", + "Problem MODEL has 8 rows, 16 columns and 32 elements\n", + "Coin0008I MODEL read with 0 errors\n", + "Option for timeMode changed from cpu to elapsed\n", + "Continuous objective value is 10 - 0.00 seconds\n", + "Cgl0004I processed model has 8 rows, 16 columns (16 integer (16 of which binary)) and 32 elements\n", + "Cutoff increment increased from 1e-05 to 0.9999\n", + "Cbc0038I Initial state - 0 integers unsatisfied sum - 0\n", + "Cbc0038I Solution found of 10\n", + "Cbc0038I Before mini branch and bound, 16 integers at bound fixed and 0 continuous\n", + "Cbc0038I Mini branch and bound did not improve solution (0.00 seconds)\n", + "Cbc0038I After 0.00 seconds - Feasibility pump exiting with objective of 10 - took 0.00 seconds\n", + "Cbc0012I Integer solution of 10 found by feasibility pump after 0 iterations and 0 nodes (0.00 seconds)\n", + "Cbc0001I Search completed - best objective 10, took 0 iterations and 0 nodes (0.00 seconds)\n", + "Cbc0035I Maximum depth 0, 0 variables fixed on reduced cost\n", + "Cuts at root node changed objective from 10 to 10\n", + "Probing was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "Gomory was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "Knapsack was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "Clique was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "MixedIntegerRounding2 was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "FlowCover was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "TwoMirCuts was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "ZeroHalf was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "\n", + "Result - Optimal solution found\n", + "\n", + "Objective value: 10.00000000\n", + "Enumerated nodes: 0\n", + "Total iterations: 0\n", + "Time (CPU seconds): 0.00\n", + "Time (Wallclock seconds): 0.00\n", + "\n", + "Option for printingOptions changed from normal to all\n", + "Total time (CPU seconds): 0.00 (Wallclock seconds): 0.00\n", + "\n", + "############ TIME COST MATRIX\n", + "{'job1': {'cpu1': 2, 'cpu2': 6, 'cpu3': 5, 'cpu4': 4}, 'job2': {'cpu1': 4, 'cpu2': 6, 'cpu3': 7, 'cpu4': 9}, 'job3': {'cpu1': 8, 'cpu2': 3, 'cpu3': 4, 'cpu4': 1}, 'job4': {'cpu1': 2, 'cpu2': 3, 'cpu3': 1, 'cpu4': 1}}\n", + "################## VARIABLES\n", + "{'job1': {'cpu1': time_job1_cpu1, 'cpu2': time_job1_cpu2, 'cpu3': time_job1_cpu3, 'cpu4': time_job1_cpu4}, 'job2': {'cpu1': time_job2_cpu1, 'cpu2': time_job2_cpu2, 'cpu3': time_job2_cpu3, 'cpu4': time_job2_cpu4}, 'job3': {'cpu1': time_job3_cpu1, 'cpu2': time_job3_cpu2, 'cpu3': time_job3_cpu3, 'cpu4': time_job3_cpu4}, 'job4': {'cpu1': time_job4_cpu1, 'cpu2': time_job4_cpu2, 'cpu3': time_job4_cpu3, 'cpu4': time_job4_cpu4}}\n", + "########### VALUES ##########\n", + "time_job1_cpu1 = 1.0\n", + "time_job1_cpu2 = 0.0\n", + "time_job1_cpu3 = 0.0\n", + "time_job1_cpu4 = 0.0\n", + "time_job2_cpu1 = 0.0\n", + "time_job2_cpu2 = 1.0\n", + "time_job2_cpu3 = 0.0\n", + "time_job2_cpu4 = 0.0\n", + "time_job3_cpu1 = 0.0\n", + "time_job3_cpu2 = 0.0\n", + "time_job3_cpu3 = 0.0\n", + "time_job3_cpu4 = 1.0\n", + "time_job4_cpu1 = 0.0\n", + "time_job4_cpu2 = 0.0\n", + "time_job4_cpu3 = 1.0\n", + "time_job4_cpu4 = 0.0\n", + "\n", + "####### JOB ASSIGNMENTS ######\n", + "\n", + "job1 is assigned to ['cpu1']\n", + "job2 is assigned to ['cpu2']\n", + "job3 is assigned to ['cpu4']\n", + "job4 is assigned to ['cpu3']\n", + "\n", + "Value of Objective Function = 10.0\n" + ] + } + ], + "source": [ + "## Problem 8a\n", + "\n", + "from pulp import *\n", + "import random\n", + "\n", + "cpus=[\"cpu1\", \"cpu2\", \"cpu3\", \"cpu4\"]\n", + "jobs=[\"job1\", \"job2\", \"job3\", \"job4\"]\n", + "\n", + "\n", + "\n", + "prob = LpProblem(\"CPU Assignment\", LpMinimize) \n", + "time_values = {\n", + " 'job1': {'cpu1': 2, 'cpu2': 6, 'cpu3': 5, 'cpu4': 4}, \n", + " 'job2': {'cpu1': 4, 'cpu2': 6, 'cpu3': 7, 'cpu4': 9}, \n", + " 'job3': {'cpu1': 8, 'cpu2': 3, 'cpu3': 4, 'cpu4': 1}, \n", + " 'job4': {'cpu1': 2, 'cpu2': 3, 'cpu3': 1, 'cpu4': 1}\n", + "}\n", + "time_vars = {}\n", + "for j in jobs:\n", + " time_vars[j] = {}\n", + " for c in cpus:\n", + " time_vars[j][c] = LpVariable(\"time_{0}_{1}\".format(j,c), 0, None, LpInteger)\n", + "\n", + " \n", + "job_cpu_combinations = [(j, c) for j in jobs for c in cpus]\n", + "print(job_cpu_combinations)\n", + "\n", + "prob += (\n", + " lpSum([time_vars[j][c] * time_values[j][c] for (j, c) in job_cpu_combinations]),\n", + " \"Sum_of_Assignment_Costs\",\n", + ")\n", + "\n", + "\n", + "for j in jobs:\n", + " prob+= lpSum(time_vars[j][c] for c in cpus) == 1\n", + "\n", + "for c in cpus:\n", + " prob+= lpSum(time_vars[j][c] for j in jobs) == 1\n", + " \n", + "prob.solve()\n", + "\n", + "print(\"############ TIME COST MATRIX\")\n", + "print(time_values)\n", + "print(\"################## VARIABLES\")\n", + "print(time_vars)\n", + "print(\"########### VALUES ##########\")\n", + "for v in prob.variables():\n", + " print(v.name, \"=\", v.varValue)\n", + "\n", + "\n", + "print(\"\\n####### JOB ASSIGNMENTS ######\\n\")\n", + "assignments = { }\n", + "for job in time_vars:\n", + " cpus = time_vars[job]\n", + " assigned = []\n", + " for cpu in cpus:\n", + " if cpus[cpu].varValue == 1:\n", + " assigned.append(cpu)\n", + " assignments[job] = assigned\n", + " print(\"{0} is assigned to {1}\".format(job, assigned))\n", + "\n", + "print(\"\\nValue of Objective Function = \", value(prob.objective))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "fb2eebbd-59a0-4301-8057-e725ff3cf2ae", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "############ TIME COST MATRIX\n", + "{'cpu1': {'job1': 2, 'job2': 7}, 'cpu2': {'job1': 1, 'job2': 3}}\n", + "################## VARIABLES\n", + "{'cpu1': {'job1': time_job1_cpu1, 'job2': time_job2_cpu1}, 'cpu2': {'job1': time_job1_cpu2, 'job2': time_job2_cpu2}}\n", + "#############################\n", + "[('cpu1', 'job1'), ('cpu1', 'job2'), ('cpu2', 'job1'), ('cpu2', 'job2')]\n", + "Welcome to the CBC MILP Solver \n", + "Version: 2.10.3 \n", + "Build Date: Dec 15 2019 \n", + "\n", + "command line - /opt/conda/lib/python3.9/site-packages/pulp/apis/../solverdir/cbc/linux/64/cbc /tmp/06652fcfafa94d56aa62c1066888fd9d-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/06652fcfafa94d56aa62c1066888fd9d-pulp.sol (default strategy 1)\n", + "At line 2 NAME MODEL\n", + "At line 3 ROWS\n", + "At line 9 COLUMNS\n", + "At line 30 RHS\n", + "At line 35 BOUNDS\n", + "At line 40 ENDATA\n", + "Problem MODEL has 4 rows, 4 columns and 8 elements\n", + "Coin0008I MODEL read with 0 errors\n", + "Option for timeMode changed from cpu to elapsed\n", + "Continuous objective value is 5 - 0.00 seconds\n", + "Cgl0004I processed model has 0 rows, 0 columns (0 integer (0 of which binary)) and 0 elements\n", + "Cbc3007W No integer variables - nothing to do\n", + "Cuts at root node changed objective from 5 to -1.79769e+308\n", + "Probing was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "Gomory was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "Knapsack was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "Clique was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "MixedIntegerRounding2 was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "FlowCover was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "TwoMirCuts was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "ZeroHalf was tried 0 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "\n", + "Result - Optimal solution found\n", + "\n", + "Objective value: 5.00000000\n", + "Enumerated nodes: 0\n", + "Total iterations: 0\n", + "Time (CPU seconds): 0.00\n", + "Time (Wallclock seconds): 0.00\n", + "\n", + "Option for printingOptions changed from normal to all\n", + "Total time (CPU seconds): 0.00 (Wallclock seconds): 0.00\n", + "\n", + "time_job1_cpu1 = 1.0\n", + "time_job1_cpu2 = 0.0\n", + "time_job2_cpu1 = 0.0\n", + "time_job2_cpu2 = 1.0\n", + "Value of Objective Function = 5.0\n" + ] + } + ], + "source": [ + "# Problem 8b\n", + "from pulp import *\n", + "import random\n", + "\n", + "cpus=[\"cpu1\", \"cpu2\"]\n", + "jobs=[\"job1\", \"job2\"]\n", + "\n", + "time_values = {\n", + " \"cpu1\": {\"job1\": 2, \"job2\": 7 },\n", + " \"cpu2\": {\"job1\": 1, \"job2\": 3 }\n", + "}\n", + "\n", + "prob = LpProblem(\"CPU Assignment\", LpMinimize) \n", + "time_vars = {}\n", + "for c in cpus:\n", + " time_vars[c] = {}\n", + " for j in jobs:\n", + " time_vars[c][j] = LpVariable(\"time_{0}_{1}\".format(j,c), 0, cat=\"Integer\")\n", + "\n", + "print(\"############ TIME COST MATRIX\")\n", + "print(time_values)\n", + "print(\"################## VARIABLES\")\n", + "print(time_vars)\n", + "print(\"#############################\")\n", + "cpu_job_combinations = [(c, j) for c in cpus for j in jobs]\n", + "print(cpu_job_combinations)\n", + "\n", + "prob += (\n", + " lpSum([time_vars[c][j] * time_values[c][j] for (c, j) in cpu_job_combinations]),\n", + " \"Sum_of_Assignment_Costs\",\n", + ")\n", + "\n", + "\n", + "for j in jobs:\n", + " prob+= lpSum(time_vars[c][j] for c in cpus) == 1\n", + "\n", + "for c in cpus:\n", + " prob+= lpSum(time_vars[c][j] for j in jobs) == 1\n", + "prob.solve()\n", + "\n", + "for v in prob.variables():\n", + " print(v.name, \"=\", v.varValue)\n", + " \n", + "print(\"Value of Objective Function = \", value(prob.objective))" + ] + }, + { + "cell_type": "markdown", + "id": "ae8280c4-63a0-43d6-b879-89aa85d542ba", + "metadata": {}, + "source": [ + "### Integer LP VS LP Relaxation \n", + "\n", + "I have assumed the following costs for each job, cpu combination:\n", + "\n", + "| | cpu1 | cpu2 |\n", + "|-|------|------|\n", + "|job1|2|1|\n", + "|job2|7|3|\n", + "\n", + "I have set up the following variables in PulPfor ILP & LP Relaxation respectively:\n", + "\n", + "**For ILP**\n", + "\n", + "The variable category is set as Integer \n", + "\n", + "| | cpu1 | cpu2 |\n", + "|-|------|------|\n", + "|job1|x-j1c1-integer|x-j1c2-integer|\n", + "|job2|x-j2c1-integer|x-j2c2-integer|\n", + "\n", + "**For LP Relaxation**\n", + "\n", + "The variable category is set as Continuous\n", + "\n", + "| | cpu1 | cpu2 |\n", + "|-|------|------|\n", + "|job1|x-j1c1-continuous|x-j1c2-continuous|\n", + "|job2|x-j2c1-continuous|x-j2c2-continuous|\n", + "\n", + "I defined two different problems:\n", + "- `prob_integer` for ILP\n", + "- `prob_relaxed` for LP Relaxation \n", + "\n", + "\n", + "### Findings:\n", + "I get the same solution for bot ILP and LP Relaxation. I could not find an optimal solution for LP Relaxation that is lower than the ILP solution.\n", + "\n", + "Here were my results:\n", + "\n", + "**For ILP**\n", + "\n", + "| | cpu1 | cpu2 |\n", + "|-|------|------|\n", + "|job1|1|0|\n", + "|job2|0|1|\n", + "\n", + "Value of Objective Function: 5.0\n", + "\n", + "**For LP Relaxation**\n", + "\n", + "| | cpu1 | cpu2 |\n", + "|-|------|------|\n", + "|job1|1|0|\n", + "|job2|0|1|\n", + "\n", + "Value of Objective Function: 5.0" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "0fc0adbe-7980-4ae3-b5f0-9b73e64b902b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU_Assignment_Relaxed:\n", + "MINIMIZE\n", + "2.0*job_1_cpu_1_cont + 1.0*job_1_cpu_2_cont + 7.0*job_2_cpu_1_cont + 3.0*job_2_cpu_2_cont + 0.0\n", + "SUBJECT TO\n", + "_C1: job_1_cpu_1_cont >= 0\n", + "\n", + "_C2: job_1_cpu_1_cont <= 1\n", + "\n", + "_C3: job_2_cpu_1_cont >= 0\n", + "\n", + "_C4: job_2_cpu_1_cont <= 1\n", + "\n", + "_C5: job_1_cpu_2_cont >= 0\n", + "\n", + "_C6: job_1_cpu_2_cont <= 1\n", + "\n", + "_C7: job_2_cpu_2_cont >= 0\n", + "\n", + "_C8: job_2_cpu_2_cont <= 1\n", + "\n", + "_C9: job_1_cpu_1_cont + job_1_cpu_2_cont = 1\n", + "\n", + "_C10: job_2_cpu_1_cont + job_2_cpu_2_cont = 1\n", + "\n", + "_C11: job_1_cpu_1_cont + job_2_cpu_1_cont = 1\n", + "\n", + "_C12: job_1_cpu_2_cont + job_2_cpu_2_cont = 1\n", + "\n", + "VARIABLES\n", + "job_1_cpu_1_cont free Continuous\n", + "job_1_cpu_2_cont free Continuous\n", + "job_2_cpu_1_cont free Continuous\n", + "job_2_cpu_2_cont free Continuous\n", + "\n", + "Welcome to the CBC MILP Solver \n", + "Version: 2.10.3 \n", + "Build Date: Dec 15 2019 \n", + "\n", + "command line - /opt/conda/lib/python3.9/site-packages/pulp/apis/../solverdir/cbc/linux/64/cbc /tmp/1ae75808cf8a4b5f8ae6348d6e1229dc-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/1ae75808cf8a4b5f8ae6348d6e1229dc-pulp.sol (default strategy 1)\n", + "At line 2 NAME MODEL\n", + "At line 3 ROWS\n", + "At line 9 COLUMNS\n", + "At line 30 RHS\n", + "At line 35 BOUNDS\n", + "At line 40 ENDATA\n", + "Problem MODEL has 4 rows, 4 columns and 8 elements\n", + "Coin0008I MODEL read with 0 errors\n", + "Option for timeMode changed from cpu to elapsed\n", + "Problem is unbounded - 0.00 seconds\n", + "Option for printingOptions changed from normal to all\n", + "Total time (CPU seconds): 0.00 (Wallclock seconds): 0.00\n", + "\n", + "Welcome to the CBC MILP Solver \n", + "Version: 2.10.3 \n", + "Build Date: Dec 15 2019 \n", + "\n", + "command line - /opt/conda/lib/python3.9/site-packages/pulp/apis/../solverdir/cbc/linux/64/cbc /tmp/57fe04c5ac14421b9bed79f136159511-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/57fe04c5ac14421b9bed79f136159511-pulp.sol (default strategy 1)\n", + "At line 2 NAME MODEL\n", + "At line 3 ROWS\n", + "At line 17 COLUMNS\n", + "At line 38 RHS\n", + "At line 51 BOUNDS\n", + "At line 56 ENDATA\n", + "Problem MODEL has 12 rows, 4 columns and 16 elements\n", + "Coin0008I MODEL read with 0 errors\n", + "Option for timeMode changed from cpu to elapsed\n", + "Presolve 0 (-12) rows, 0 (-4) columns and 0 (-16) elements\n", + "Empty problem - 0 rows, 0 columns and 0 elements\n", + "Optimal - objective value 5\n", + "After Postsolve, objective 5, infeasibilities - dual 0 (0), primal 0 (0)\n", + "Optimal objective 5 - 0 iterations time 0.002, Presolve 0.00\n", + "Option for printingOptions changed from normal to all\n", + "Total time (CPU seconds): 0.00 (Wallclock seconds): 0.00\n", + "\n", + "\n", + "############### Integer LP #################### \n", + "\n", + "job_1_cpu_1_int = 1.0\n", + "job_1_cpu_2_int = 0.0\n", + "job_2_cpu_1_int = 0.0\n", + "job_2_cpu_2_int = 1.0\n", + "Value of Objective Function (Integer LP) = 5.0\n", + "\n", + "############### LP Relaxation #################### \n", + "\n", + "job_1_cpu_1_cont = 1.0\n", + "job_1_cpu_2_cont = 0.0\n", + "job_2_cpu_1_cont = 0.0\n", + "job_2_cpu_2_cont = 1.0\n", + "Value of Objective Function (Relaxed LP) = 5.0\n" + ] + } + ], + "source": [ + "## LP Relaxed\n", + "from pulp import *\n", + "import random\n", + "\n", + "cpus=[\"cpu1\", \"cpu2\"]\n", + "jobs=[\"job1\", \"job2\"]\n", + "\n", + "combinations = [\n", + " (1,1),\n", + " (1,2),\n", + " (2,1),\n", + " (2,2),\n", + "]\n", + "\n", + "# Cost (job, cpu)\n", + "costs = {\n", + " (1,1): 2.0, \n", + " (1,2): 1.0,\n", + " (2,1): 7.0,\n", + " (2,2): 3.0\n", + "}\n", + "variables_integer = {\n", + " (1,1): LpVariable(\"x-j1c1-integer\", cat=\"Integer\"),\n", + " (1,2): LpVariable(\"x-j1c2-integer\", cat=\"Integer\"),\n", + " (2,1): LpVariable(\"x-j1c2-integer\", cat=\"Integer\"),\n", + " (2,2): LpVariable(\"x-j1c2-integer\", cat=\"Integer\")\n", + "}\n", + " \n", + "variables_relaxed = {\n", + " (1,1): LpVariable(\"x-j1c1-continuous\", cat=\"Continuous\"),\n", + " (1,2): LpVariable(\"x-j1c2-continuous\", cat=\"Continuous\"),\n", + " (2,1): LpVariable(\"x-j1c2-continuous\", cat=\"Continuous\"),\n", + " (2,2): LpVariable(\"x-j1c2-continuous\", cat=\"Continuous\")\n", + "}\n", + " \n", + "prob_integer = LpProblem(\"CPU Assignment Integer\", LpMinimize) \n", + "prob_relaxed = LpProblem(\"CPU Assignment Relaxed\", LpMinimize)\n", + "\n", + "prob_integer += (\n", + " lpSum([variables_integer[(j, c)] * costs[(j, c)] for (j, c) in combinations]),\n", + " \"Sum_of_Assignment_Costs (Integer LP)\",\n", + ")\n", + "prob_relaxed += (\n", + " lpSum([variables_relaxed[(j, c)] * costs[(j, c)] for (j, c) in combinations]),\n", + " \"Sum_of_Assignment_Costs (LP Relaxed)\",\n", + ")\n", + "\n", + "prob_integer += lpSum([variables_integer[(1,1)], variables_integer[(1,2)]]) == 1 \n", + "prob_integer += lpSum([variables_integer[(2,1)], variables_integer[(2,2)]]) == 1 \n", + "prob_integer += lpSum([variables_integer[(1,1)], variables_integer[(2,1)]]) == 1 \n", + "prob_integer += lpSum([variables_integer[(1,2)], variables_integer[(2,2)]]) == 1\n", + "\n", + "prob_relaxed += variables_relaxed[(1,1)] >= 0\n", + "prob_relaxed += variables_relaxed[(1,1)] <= 1\n", + "prob_relaxed += variables_relaxed[(2,1)] >= 0\n", + "prob_relaxed += variables_relaxed[(2,1)] <= 1\n", + "prob_relaxed += variables_relaxed[(1,2)] >= 0\n", + "prob_relaxed += variables_relaxed[(1,2)] <= 1\n", + "prob_relaxed += variables_relaxed[(2,2)] >= 0\n", + "prob_relaxed += variables_relaxed[(2,2)] <= 1\n", + "prob_relaxed += lpSum([variables_relaxed[(1,1)], variables_relaxed[(1,2)]]) == 1 \n", + "prob_relaxed += lpSum([variables_relaxed[(2,1)], variables_relaxed[(2,2)]]) == 1 \n", + "prob_relaxed += lpSum([variables_relaxed[(1,1)], variables_relaxed[(2,1)]]) == 1 \n", + "prob_relaxed += lpSum([variables_relaxed[(1,2)], variables_relaxed[(2,2)]]) == 1\n", + "print(prob_relaxed)\n", + "\n", + "prob_integer.solve()\n", + "prob_relaxed.solve()\n", + "\n", + "print(\"\\n############### Integer LP #################### \\n\")\n", + "for v in prob_integer.variables():\n", + " print(v.name, \"=\", v.varValue)\n", + " \n", + "print(\"Value of Objective Function (Integer LP) = \", value(prob_integer.objective))\n", + "\n", + "print(\"\\n############### LP Relaxation #################### \\n\")\n", + "for v in prob_relaxed.variables():\n", + " print(v.name, \"=\", v.varValue)\n", + " \n", + "print(\"Value of Objective Function (Relaxed LP) = \", value(prob_relaxed.objective))" + ] + }, + { + "cell_type": "markdown", + "id": "0ce857f3-4a5b-42b9-b454-7c4a14820320", + "metadata": {}, + "source": [ + "### Problem 9\n", + "\n", + "I set up an arbitrary graph using Networkx and implemented the constraints that I wrote in the homework sheet. The different colors of nodes in the graph represent the two groups the graph is bisected to. " + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "54768339-c33b-4b7f-bedf-d87d2167bfce", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{('v1', 'v2'): edge_v1_v2, ('v1', 'v3'): edge_v1_v3, ('v1', 'v5'): edge_v1_v5, ('v2', 'v5'): edge_v2_v5, ('v3', 'v6'): edge_v3_v6, ('v5', 'v4'): edge_v5_v4, ('v5', 'v6'): edge_v5_v6, ('v4', 'v6'): edge_v4_v6}\n", + "Bisection_problem:\n", + "MINIMIZE\n", + "1*edge_v1_v2 + 1*edge_v1_v3 + 1*edge_v1_v5 + 1*edge_v2_v5 + 1*edge_v3_v6 + 1*edge_v4_v6 + 1*edge_v5_v4 + 1*edge_v5_v6 + 0\n", + "SUBJECT TO\n", + "_C1: partition_v1 + partition_v2 + partition_v3 + partition_v4 + partition_v5\n", + " + partition_v6 = 3\n", + "\n", + "_C2: - edge_v1_v2 + partition_v1 - partition_v2 <= 0\n", + "\n", + "_C3: - edge_v1_v2 - partition_v1 + partition_v2 <= 0\n", + "\n", + "_C4: - edge_v1_v3 + partition_v1 - partition_v3 <= 0\n", + "\n", + "_C5: - edge_v1_v3 - partition_v1 + partition_v3 <= 0\n", + "\n", + "_C6: - edge_v1_v5 + partition_v1 - partition_v5 <= 0\n", + "\n", + "_C7: - edge_v1_v5 - partition_v1 + partition_v5 <= 0\n", + "\n", + "_C8: - edge_v2_v5 + partition_v2 - partition_v5 <= 0\n", + "\n", + "_C9: - edge_v2_v5 - partition_v2 + partition_v5 <= 0\n", + "\n", + "_C10: - edge_v3_v6 + partition_v3 - partition_v6 <= 0\n", + "\n", + "_C11: - edge_v3_v6 - partition_v3 + partition_v6 <= 0\n", + "\n", + "_C12: - edge_v5_v4 - partition_v4 + partition_v5 <= 0\n", + "\n", + "_C13: - edge_v5_v4 + partition_v4 - partition_v5 <= 0\n", + "\n", + "_C14: - edge_v5_v6 + partition_v5 - partition_v6 <= 0\n", + "\n", + "_C15: - edge_v5_v6 - partition_v5 + partition_v6 <= 0\n", + "\n", + "_C16: - edge_v4_v6 + partition_v4 - partition_v6 <= 0\n", + "\n", + "_C17: - edge_v4_v6 - partition_v4 + partition_v6 <= 0\n", + "\n", + "VARIABLES\n", + "edge_v1_v2 free Continuous\n", + "edge_v1_v3 free Continuous\n", + "edge_v1_v5 free Continuous\n", + "edge_v2_v5 free Continuous\n", + "edge_v3_v6 free Continuous\n", + "edge_v4_v6 free Continuous\n", + "edge_v5_v4 free Continuous\n", + "edge_v5_v6 free Continuous\n", + "partition_v1 free Integer\n", + "partition_v2 free Integer\n", + "partition_v3 free Integer\n", + "partition_v4 free Integer\n", + "partition_v5 free Integer\n", + "partition_v6 free Integer\n", + "\n", + "Welcome to the CBC MILP Solver \n", + "Version: 2.10.3 \n", + "Build Date: Dec 15 2019 \n", + "\n", + "command line - /opt/conda/lib/python3.9/site-packages/pulp/apis/../solverdir/cbc/linux/64/cbc /tmp/96a3ea460af84356a66114e3afc70b73-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/96a3ea460af84356a66114e3afc70b73-pulp.sol (default strategy 1)\n", + "At line 2 NAME MODEL\n", + "At line 3 ROWS\n", + "At line 22 COLUMNS\n", + "At line 97 RHS\n", + "At line 115 BOUNDS\n", + "At line 130 ENDATA\n", + "Problem MODEL has 17 rows, 14 columns and 54 elements\n", + "Coin0008I MODEL read with 0 errors\n", + "Option for timeMode changed from cpu to elapsed\n", + "Continuous objective value is 0 - 0.00 seconds\n", + "Cgl0003I 0 fixed, 12 tightened bounds, 0 strengthened rows, 0 substitutions\n", + "Cgl0004I processed model has 17 rows, 14 columns (6 integer (0 of which binary)) and 54 elements\n", + "Cbc0012I Integer solution of 3 found by DiveCoefficient after 0 iterations and 0 nodes (0.00 seconds)\n", + "Cbc0031I 10 added rows had average density of 13.7\n", + "Cbc0013I At root node, 10 cuts changed objective from 0 to 2.8785087 in 93 passes\n", + "Cbc0014I Cut generator 0 (Probing) - 0 row cuts average 0.0 elements, 0 column cuts (0 active) in 0.003 seconds - new frequency is -100\n", + "Cbc0014I Cut generator 1 (Gomory) - 182 row cuts average 13.4 elements, 0 column cuts (0 active) in 0.007 seconds - new frequency is 1\n", + "Cbc0014I Cut generator 2 (Knapsack) - 0 row cuts average 0.0 elements, 0 column cuts (0 active) in 0.001 seconds - new frequency is -100\n", + "Cbc0014I Cut generator 3 (Clique) - 0 row cuts average 0.0 elements, 0 column cuts (0 active) in 0.000 seconds - new frequency is -100\n", + "Cbc0014I Cut generator 4 (MixedIntegerRounding2) - 0 row cuts average 0.0 elements, 0 column cuts (0 active) in 0.002 seconds - new frequency is -100\n", + "Cbc0014I Cut generator 5 (FlowCover) - 0 row cuts average 0.0 elements, 0 column cuts (0 active) in 0.001 seconds - new frequency is -100\n", + "Cbc0014I Cut generator 6 (TwoMirCuts) - 25 row cuts average 11.4 elements, 0 column cuts (0 active) in 0.002 seconds - new frequency is 1\n", + "Cbc0010I After 0 nodes, 1 on tree, 3 best solution, best possible 2.8785087 (0.05 seconds)\n", + "Cbc0001I Search completed - best objective 3, took 711 iterations and 2 nodes (0.05 seconds)\n", + "Cbc0032I Strong branching done 20 times (158 iterations), fathomed 2 nodes and fixed 0 variables\n", + "Cbc0035I Maximum depth 0, 0 variables fixed on reduced cost\n", + "Cuts at root node changed objective from 0 to 2.87851\n", + "Probing was tried 93 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.003 seconds)\n", + "Gomory was tried 103 times and created 200 cuts of which 0 were active after adding rounds of cuts (0.008 seconds)\n", + "Knapsack was tried 93 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.001 seconds)\n", + "Clique was tried 93 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "MixedIntegerRounding2 was tried 93 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.002 seconds)\n", + "FlowCover was tried 93 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.001 seconds)\n", + "TwoMirCuts was tried 103 times and created 46 cuts of which 0 were active after adding rounds of cuts (0.003 seconds)\n", + "ZeroHalf was tried 1 times and created 0 cuts of which 0 were active after adding rounds of cuts (0.000 seconds)\n", + "\n", + "Result - Optimal solution found\n", + "\n", + "Objective value: 3.00000000\n", + "Enumerated nodes: 2\n", + "Total iterations: 711\n", + "Time (CPU seconds): 0.05\n", + "Time (Wallclock seconds): 0.05\n", + "\n", + "Option for printingOptions changed from normal to all\n", + "Total time (CPU seconds): 0.05 (Wallclock seconds): 0.06\n", + "\n", + "edge_v1_v2 = 0.0\n", + "edge_v1_v3 = 1.0\n", + "edge_v1_v5 = 0.0\n", + "edge_v2_v5 = 0.0\n", + "edge_v3_v6 = 0.0\n", + "edge_v4_v6 = 0.0\n", + "edge_v5_v4 = 1.0\n", + "edge_v5_v6 = 1.0\n", + "partition_v1 = 1.0\n", + "partition_v2 = 1.0\n", + "partition_v3 = 0.0\n", + "partition_v4 = 0.0\n", + "partition_v5 = 1.0\n", + "partition_v6 = 0.0\n" + ] + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# HW1 Problem 9\n", + "\n", + "from pulp import *\n", + "import networkx as nx\n", + "import matplotlib.pyplot as plt\n", + "\n", + "G = nx.Graph()\n", + "G.add_edge(\"v1\", \"v2\")\n", + "G.add_edge(\"v1\", \"v3\")\n", + "G.add_edge(\"v1\", \"v5\")\n", + "G.add_edge(\"v2\", \"v5\")\n", + "G.add_edge(\"v4\", \"v5\")\n", + "G.add_edge(\"v3\", \"v6\")\n", + "G.add_edge(\"v4\", \"v6\")\n", + "G.add_edge(\"v5\", \"v6\")\n", + "\n", + "partitions = {}\n", + "for node in G.nodes:\n", + " partitions[node] = LpVariable(\"partition_{0}\".format(node), cat=\"Integer\")\n", + "\n", + "\n", + "prob = pulp.LpProblem(\"Bisection problem\", LpMinimize)\n", + "\n", + "edge_vars = {}\n", + "\n", + "for edge in G.edges:\n", + " edge_vars[edge] = LpVariable(\"edge_{0}_{1}\".format(edge[0],edge[1]))\n", + " \n", + "print(edge_vars)\n", + "\n", + "prob+= lpSum([edge_vars[edge] for edge in G.edges]), \"Our objective statement\"\n", + "prob+= lpSum([partitions[node] for node in G.nodes]) == 3\n", + " \n", + "for x, y in G.edges:\n", + " prob += partitions[x] - partitions[y] <= edge_vars[(x,y)]\n", + " prob += partitions[y] - partitions[x] <= edge_vars[(x,y)]\n", + " \n", + "print(prob) \n", + "\n", + "prob.solve()\n", + "for v in prob.variables():\n", + " print(v.name, \"=\", v.varValue)\n", + " \n", + "pos = nx.spring_layout(G, seed=7) # positions for all nodes - seed for reproducibility\n", + "\n", + "# nodes\n", + "\n", + "colors = [\"tab:red\" if partitions[node].varValue > 0 else \"tab:blue\" for node in G.nodes()]\n", + "nx.draw_networkx_nodes(G, pos, node_size=700, node_color=colors)\n", + "\n", + "# edges\n", + "nx.draw_networkx_edges(G, pos, width=6)\n", + "nx.draw_networkx_edges(\n", + " G, pos, width=6, alpha=0.5, edge_color=\"b\", style=\"dashed\"\n", + ")\n", + "\n", + "# node labels\n", + "nx.draw_networkx_labels(G, pos, font_size=20, font_family=\"sans-serif\")\n", + "\n", + "ax = plt.gca()\n", + "ax.margins(0.08)\n", + "plt.axis(\"off\")\n", + "plt.tight_layout()\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/training/src/.ipynb_checkpoints/MSML602_Midterm_Karki-checkpoint.ipynb b/training/src/.ipynb_checkpoints/MSML602_Midterm_Karki-checkpoint.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..38f4f92cc158bb0f543acd1e1645d1074a3553c1 --- /dev/null +++ b/training/src/.ipynb_checkpoints/MSML602_Midterm_Karki-checkpoint.ipynb @@ -0,0 +1,831 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": 82, + "metadata": { + "cellView": "form", + "id": "AXI2uCSkxx7m" + }, + "outputs": [], + "source": [ + "#@title Setup\n", + "\n", + "%%capture\n", + "!pip install networkx pulp numpy pandas\n", + "\n", + "!rm -rf ./data/\n", + "!mkdir -p ./data/\n", + "!wget -c -O ./data/lastfm_asia.zip \"https://snap.stanford.edu/data/lastfm_asia.zip\"\n", + "!unzip -q ./data/lastfm_asia.zip -d ./data/" + ] + }, + { + "cell_type": "code", + "source": [ + "#@title Problem 3: Linear Programming\n", + "\n", + "\n", + "from pulp import *\n", + "from IPython.display import HTML, display\n", + "\n", + "def display_table(table):\n", + " display(HTML(\n", + " '{}
'.format(\n", + " ''.join(\n", + " '{}'.format(''.join(str(_) for _ in row)) for row in table)\n", + " )\n", + " ))\n", + "\n", + "problem = LpProblem(\"MSML_602_Midterm_Q3\", LpMaximize)\n", + "\n", + "X = LpVariable(\"X\", cat=\"Integer\")\n", + "Y = LpVariable(\"Y\", cat=\"Integer\")\n", + "\n", + "problem += (5 * X) + (3 * Y), \"Objective\"\n", + "problem += X + (2 * Y) <= 14, \"Constraint 1\"\n", + "problem += (3* X) - Y >= 0, \"Constraint 2\"\n", + "problem += X - Y <= 2, \"Constraint 3\"\n", + "\n", + "problem.solve()\n", + "print(\"Solution:\\n\")\n", + "\n", + "data = [[\"Variable\", \"Value\"]] + [[v.name, v.varValue] for v in problem.variables()]\n", + "data += [[\"Max value for objective function: \", problem.objective.value()]]\n", + "display_table(data)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 140 + }, + "cellView": "form", + "id": "ALmlZnbcx-9e", + "outputId": "45e2c507-3265-4b22-e21a-2d6dbd72f05f" + }, + "execution_count": 83, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Solution:\n", + "\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "
VariableValue
X6.0
Y4.0
Max value for objective function: 42.0
" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "code", + "source": [ + "#@title Problem 5: Graph Metrics\n", + "\n", + "import pandas as pd \n", + "import networkx as nx\n", + "import matplotlib.pyplot as plt\n", + "\n", + "df = pd.read_csv(\"/content/data/lasftm_asia/lastfm_asia_edges.csv\")\n", + "G = nx.from_pandas_edgelist(df, source=\"node_1\", target=\"node_2\")\n", + "shortest_path = nx.shortest_path_length(G, 0)\n", + "del shortest_path[0]\n", + "num = len(shortest_path)\n", + "total_length = sum([shortest_path[k] for k in shortest_path])\n", + "avg_shortest_path = total_length / num\n", + "print(f\"The average shortest path length from node 0 to all other nodes is: {avg_shortest_path}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "cellView": "form", + "id": "87mMC-B1yJoq", + "outputId": "83ff28ec-7d51-4f6a-ced6-358538a58f83" + }, + "execution_count": 84, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "The average shortest path length from node 0 to all other nodes is: 5.651974288337925\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Problem 6: Extracting Webpage Data" + ], + "metadata": { + "id": "mAiJRhb5iW5O" + } + }, + { + "cell_type": "code", + "source": [ + "#@title Scraping result\n", + "\n", + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import pandas as pd \n", + "import numpy as np\n", + "\n", + "page = requests.get(\"https://www.worldometers.info/coronavirus/#countries\")\n", + "html = page.content\n", + "\n", + "soup = BeautifulSoup(html, 'html.parser')\n", + "table = soup.find(\"table\", {\"id\": \"main_table_countries_today\"})\n", + "\n", + "cols = [\n", + " '#', 'Country', 'TotalCases', 'NewCases', 'TotalDeaths', 'NewDeaths', 'TotalRecovered',\n", + " 'NewRecovered','ActiveCases','Serious,Critical','TotalCases/1M pop','Deaths/1M pop', \n", + " 'TotalTests', 'Tests/1M pop', 'Population', 'Continent', '1 Case every X ppl', '1 Death every X ppl',\n", + " '1 Test every X ppl', 'New Cases/1M pop', 'New Deaths/1M pop', 'Active Cases/1M pop'\n", + "]\n", + "\n", + "tbody = table.find(\"tbody\")\n", + "rows = tbody.find_all(\"tr\")\n", + "\n", + "data = []\n", + "for row in rows:\n", + " cells = row.find_all(\"td\")\n", + " values = [c.text for c in cells]\n", + " data.append(values)\n", + "\n", + "def sanitize_country_number(row):\n", + " val = row[\"#\"]\n", + " if not val.strip():\n", + " return np.NaN\n", + " else:\n", + " return val\n", + "\n", + "def fill_active_cases(row):\n", + " val = row[\"ActiveCases\"]\n", + " if not np.isnan(val):\n", + " return val\n", + " active_per_1_mil = row[\"Active Cases/1M pop\"]\n", + " if np.isnan(active_per_1_mil):\n", + " return np.nan\n", + " population = row[\"Population\"]\n", + " return (active_per_1_mil/1000000) * population\n", + "\n", + "def to_float(col):\n", + " def mapper(row):\n", + " if row[col] == \"N/A\":\n", + " return np.NaN\n", + " val = row[col]\n", + " val = val.replace(\",\", \"\").strip()\n", + " if not val:\n", + " return np.NaN\n", + " return float(val)\n", + " return mapper \n", + "\n", + "df = pd.DataFrame(data, columns=cols)\n", + "df.replace(r\"\\n\", \"\", regex=True, inplace=True)\n", + "\n", + "df.head()\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 386 + }, + "cellView": "form", + "id": "Ay-ceRkkzcVg", + "outputId": "3b1e8535-f211-45ba-9b90-85c279e522ec" + }, + "execution_count": 85, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " # Country TotalCases NewCases TotalDeaths NewDeaths \\\n", + "0 North America 118,308,960 +16,354 1,557,219 +76 \n", + "1 Asia 195,343,819 +168,618 1,491,630 +230 \n", + "2 Europe 235,496,414 +41,389 1,948,669 +165 \n", + "3 South America 64,557,158 +10,126 1,333,737 +79 \n", + "4 Oceania 12,691,699 +3,057 21,779 +9 \n", + "\n", + " TotalRecovered NewRecovered ActiveCases Serious,Critical ... TotalTests \\\n", + "0 113,762,872 +16,362 2,988,869 7,881 ... \n", + "1 188,186,652 +78,736 5,665,537 9,159 ... \n", + "2 229,427,346 +175,758 4,120,399 7,685 ... \n", + "3 62,884,992 +7,699 338,429 10,119 ... \n", + "4 12,512,305 157,615 97 ... \n", + "\n", + " Tests/1M pop Population Continent 1 Case every X ppl \\\n", + "0 North America \n", + "1 Asia \n", + "2 Europe \n", + "3 South America \n", + "4 Australia/Oceania \n", + "\n", + " 1 Death every X ppl 1 Test every X ppl New Cases/1M pop New Deaths/1M pop \\\n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "\n", + " Active Cases/1M pop \n", + "0 \n", + "1 \n", + "2 \n", + "3 \n", + "4 \n", + "\n", + "[5 rows x 22 columns]" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
#CountryTotalCasesNewCasesTotalDeathsNewDeathsTotalRecoveredNewRecoveredActiveCasesSerious,Critical...TotalTestsTests/1M popPopulationContinent1 Case every X ppl1 Death every X ppl1 Test every X pplNew Cases/1M popNew Deaths/1M popActive Cases/1M pop
0North America118,308,960+16,3541,557,219+76113,762,872+16,3622,988,8697,881...North America
1Asia195,343,819+168,6181,491,630+230188,186,652+78,7365,665,5379,159...Asia
2Europe235,496,414+41,3891,948,669+165229,427,346+175,7584,120,3997,685...Europe
3South America64,557,158+10,1261,333,737+7962,884,992+7,699338,42910,119...South America
4Oceania12,691,699+3,05721,779+912,512,305157,61597...Australia/Oceania
\n", + "

5 rows × 22 columns

\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 85 + } + ] + }, + { + "cell_type": "code", + "source": [ + "#@title Data sanitization / generation\n", + "\n", + "#@markdown Some of the countries (actually ships, in this case) did not have any population data, so I excluded those records from the dataset.\n", + "\n", + "#@markdown Some countries didn't have data for exact active cases, but had data for **active cases per 1 million population**. \n", + "#@markdown For these countries, I calculated their active cases by using the active cases per 1 million population data as follows: \n", + "\n", + "#@markdown ```Active Cases = (Active cases per 1 million population / 1,000,000) * Population```\n", + "\n", + "df[\"country_number\"] = df.apply(sanitize_country_number, axis=1)\n", + "\n", + "data_by_country = df[df[\"country_number\"].notna()].copy()\n", + "data_by_country[\"ActiveCases\"] = data_by_country.apply(to_float(\"ActiveCases\"), axis=1)\n", + "data_by_country[\"Active Cases/1M pop\"] = data_by_country.apply(to_float(\"Active Cases/1M pop\"), axis=1)\n", + "data_by_country[\"Population\"] = data_by_country.apply(to_float(\"Population\"), axis=1)\n", + "data_by_country[\"ActiveCases\"] = data_by_country.apply(fill_active_cases, axis=1)\n", + "data_by_country[data_by_country[\"ActiveCases\"] == \"N/A\"].head(20)\n", + "aggregated = data_by_country.groupby(\"Country\").agg({'ActiveCases':'mean', 'Population':'sum'}, as_index=False)\n", + "aggregated.reset_index(inplace=True)\n", + "dropped_countries = aggregated[aggregated[\"Population\"] == 0 ]\n", + "aggregated = aggregated[aggregated[\"Population\"] != 0 ]\n", + "aggregated[\"PercentageInfected\"] = aggregated.apply(lambda x: x[\"ActiveCases\"]/x[\"Population\"], axis=1)\n", + "aggregated.sort_values([\"PercentageInfected\"], ascending=False, inplace=True)\n", + "\n", + "print(\"These were the countries(ships) that didn't have population data:\\n\")\n", + "print(dropped_countries)\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "cellView": "form", + "id": "Roitzj22-VO5", + "outputId": "5dc61fa2-31c9-4fa4-828c-c9f88fb449e2" + }, + "execution_count": 86, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "These were the countries(ships) that didn't have population data:\n", + "\n", + " Country ActiveCases Population\n", + "56 Diamond Princess 0.0 0.0\n", + "120 MS Zaandam 0.0 0.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "#@title Average active cases & the proportion of the total population affected\n", + "\n", + "from IPython.display import HTML, display\n", + "\n", + "def display_table(table):\n", + " display(HTML(\n", + " '{}
'.format(\n", + " ''.join(\n", + " '{}'.format(''.join(str(_) for _ in row)) for row in table)\n", + " )\n", + " ))\n", + "\n", + "avg_active_cases = aggregated[\"ActiveCases\"].mean()\n", + "\n", + "aggr = aggregated.agg({\"ActiveCases\": \"sum\", \"Population\": \"sum\"}, as_index=False)\n", + "final_df = aggr.to_frame().T\n", + "final_df[\"PercentageInfected\"] = final_df.apply(lambda x: (x[\"ActiveCases\"]/x[\"Population\"]) * 100, axis=1)\n", + "percentage_infected = final_df[\"PercentageInfected\"].to_numpy()[0]\n", + "\n", + "display(HTML(\n", + " \"\"\"\n", + "

Result:

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Average active cases:{0:.2f}
Proportion of total
population currently infected:
{1:.2f}%
\n", + "
\n", + " \"\"\".format(avg_active_cases, percentage_infected))\n", + ")\n", + "\n", + "\n", + "print(\"\"\"\n", + "I was unsure whether the problem wanted the percentage of the population\n", + "affected for each country, so I have included the percentage for each country \n", + "as well, just in case:\n", + "\"\"\")\n", + "aggregated.head()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 439 + }, + "cellView": "form", + "id": "lRtwSfqSAPAY", + "outputId": "a7037d5d-fbd6-48b3-e47b-32090720dfd1" + }, + "execution_count": 87, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "

Result:

\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Average active cases:60038.20
Proportion of total
population currently infected:
0.17%
\n", + "
\n", + " " + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "I was unsure whether the problem wanted the percentage of the population\n", + "affected for each country, so I have included the percentage for each country \n", + "as well, just in case:\n", + "\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + " Country ActiveCases Population PercentageInfected\n", + "129 Martinique 222576.901869 374087.0 0.594987\n", + "68 Faeroe Islands 26936.998989 49233.0 0.547133\n", + "195 St. Barth 4854.999825 9945.0 0.488185\n", + "84 Guadeloupe 193026.939904 399794.0 0.482816\n", + "93 Iceland 130899.111498 345393.0 0.378986" + ], + "text/html": [ + "\n", + "
\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CountryActiveCasesPopulationPercentageInfected
129Martinique222576.901869374087.00.594987
68Faeroe Islands26936.99898949233.00.547133
195St. Barth4854.9998259945.00.488185
84Guadeloupe193026.939904399794.00.482816
93Iceland130899.111498345393.00.378986
\n", + "
\n", + " \n", + " \n", + " \n", + "\n", + " \n", + "
\n", + "
\n", + " " + ] + }, + "metadata": {}, + "execution_count": 87 + } + ] + } + ] +} \ No newline at end of file diff --git a/training/MSML-602-Final-Project-Final-Version.ipynb b/training/src/MSML-602-Final-Project-Final-Version.ipynb similarity index 100% rename from training/MSML-602-Final-Project-Final-Version.ipynb rename to training/src/MSML-602-Final-Project-Final-Version.ipynb diff --git a/training/MSML-602-Final-Project.ipynb b/training/src/MSML-602-Final-Project.ipynb similarity index 100% rename from training/MSML-602-Final-Project.ipynb rename to training/src/MSML-602-Final-Project.ipynb diff --git a/training/downloader.py b/training/src/downloader.py similarity index 100% rename from training/downloader.py rename to training/src/downloader.py diff --git a/web/.DS_Store b/web/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..118b7cef83a8c160b967ea3632dd5706d0c96202 Binary files /dev/null and b/web/.DS_Store differ diff --git a/web/.dockerignore b/web/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..85dcc16df69a9860507592c89f31f438c8fe7b41 --- /dev/null +++ b/web/.dockerignore @@ -0,0 +1,2 @@ +.git +node_modules diff --git a/web/.gitignore b/web/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..cdb93cd5ebbb0b464b7cec99bbc254d23aac6815 --- /dev/null +++ b/web/.gitignore @@ -0,0 +1 @@ +.python-version diff --git a/web/Dockerfile.backend b/web/Dockerfile.backend new file mode 100644 index 0000000000000000000000000000000000000000..56d7c3abb0d3191c96928f570bdfb087bac9706c --- /dev/null +++ b/web/Dockerfile.backend @@ -0,0 +1,11 @@ +FROM tensorflow/tensorflow:2.11.0 + +WORKDIR /app + +COPY ./requirements.txt /app/requirements.txt + +RUN pip install -r requirements.txt + +COPY ./src /app + +CMD ["python3", "-m", "flask", "run", "--host=0.0.0.0"] diff --git a/web/Dockerfile.frontend b/web/Dockerfile.frontend new file mode 100644 index 0000000000000000000000000000000000000000..3cf56759b56f7255cad33f93dc2184cdbf6ebee1 --- /dev/null +++ b/web/Dockerfile.frontend @@ -0,0 +1,13 @@ +FROM node:18-alpine3.15 + +WORKDIR /app + +COPY package.json ./ + +COPY package-lock.json ./ + +RUN npm update && npm install + +COPY . . + +CMD ["npm", "start"] diff --git a/web/Makefile b/web/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..11e44fa440eb4dda3a019b2331a822715c045732 --- /dev/null +++ b/web/Makefile @@ -0,0 +1,19 @@ +.PHONY: build-backend +build-backend: ## Build backend services + docker build -f Dockerfile.backend -t pred-backend ./backend + +.PHONY: build-frontend +build-frontend: ## Build frontend services + docker build -f Dockerfile.frontend -t pred-frontend ./frontend + +.PHONY: up-prod +up-prod: ## Build backend services + docker build -f Dockerfile.backend -t pred-backend ./backend + docker build -f Dockerfile.frontend -t pred-frontend ./frontend + docker-compose -f docker-compose.yml -f docker-compose.prod.yml up -d + +.PHONY: up-dev +up-dev: ## Build backend services + docker build -f Dockerfile.backend -t pred-backend ./backend + docker build -f Dockerfile.frontend -t pred-frontend ./frontend + docker-compose up -d diff --git a/web/README.md b/web/README.md new file mode 100644 index 0000000000000000000000000000000000000000..101b6c4e2510bc9a88cc7aeb0522db4d81347118 --- /dev/null +++ b/web/README.md @@ -0,0 +1 @@ +MSML 602 Assignments/Projects diff --git a/web/backend/.DS_Store b/web/backend/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..43b0834167ba7ae34323804f8857283a145c62e0 Binary files /dev/null and b/web/backend/.DS_Store differ diff --git a/web/backend/.dockerignore b/web/backend/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..6b8710a711f3b689885aa5c26c6c06bde348e82b --- /dev/null +++ b/web/backend/.dockerignore @@ -0,0 +1 @@ +.git diff --git a/web/backend/.gitignore b/web/backend/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..d35ef6f723159707e9228f1fd58cf9bc7f91595d --- /dev/null +++ b/web/backend/.gitignore @@ -0,0 +1,2 @@ +.python-version +__pycache__ diff --git a/web/backend/README.md b/web/backend/README.md new file mode 100644 index 0000000000000000000000000000000000000000..101b6c4e2510bc9a88cc7aeb0522db4d81347118 --- /dev/null +++ b/web/backend/README.md @@ -0,0 +1 @@ +MSML 602 Assignments/Projects diff --git a/web/backend/requirements.txt b/web/backend/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..76d1d6414d1e33a91ca898a596b3a7f51ff49915 --- /dev/null +++ b/web/backend/requirements.txt @@ -0,0 +1,7 @@ +flask==2.2.2 +flask-cors==3.0.10 +pandas==1.5.0 +numpy==1.23.3 +scipy==1.9.1 +scikit-learn==1.1.2 +requests-cache==0.9.7 diff --git a/web/backend/src/.DS_Store b/web/backend/src/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..1d8f5849d5dca4c4e7d71dc165035cefeec86a14 Binary files /dev/null and b/web/backend/src/.DS_Store differ diff --git a/web/backend/src/app.py b/web/backend/src/app.py new file mode 100644 index 0000000000000000000000000000000000000000..9db5907ccf7313e144fe23c24ac998a3e463b915 --- /dev/null +++ b/web/backend/src/app.py @@ -0,0 +1,244 @@ +from flask import Flask, request +import tensorflow as tf +from datetime import datetime, timedelta +import logging +import requests +import requests_cache +import pandas as pd +import json +import numpy as np +import pickle +import math +import pytz + +from flask_cors import CORS, cross_origin + + +session = requests_cache.CachedSession('requests-cache') + +app = Flask(__name__) +cors = CORS(app) +app.config['CORS_HEADERS'] = 'Content-Type' + +app.logger.setLevel(logging.INFO) +DATE_FORMAT = "%Y-%m-%d %H:%M:%S" +API_KEY = "e1f10a1e78da46f5b10a1e78da96f525" +BASE_URL = "https://api.weather.com/v1/location/KDCA:9:US/observations/historical.json?apiKey={api_key}&units=e&startDate={start_date}&endDate={end_date}" +model = tf.keras.models.load_model('/app/model', compile=False) + +scaler = pickle.load(open('./model/scaler.pkl','rb')) +cols_to_scale = ["pressure", "wspd","heat_index","dewPt", "rh", "vis", "wc", "wdir_degree", "clds_ordinal", + "day_sin", "day_cos", "year_sin", "year_cos", "wdir_sin", "wdir_cos"] + +def get_NaN_counts(df): + nan_counts = df.isna().sum() + return pd.concat([nan_counts, ((nan_counts/len(df))*100).round(2)], + axis=1, + keys=["NaN count", "Percentage"]) + +def clds_to_ordinal(row): + mapping = { + "SKC": 0, + "CLR": 0, + "FEW": 1, + "SCT": 2, + "BKN": 3, + "OVC": 4, + "VV": 5 + } + clds = row["clds"] + if pd.isnull(clds): + return np.NaN + return mapping[clds] + +def clean_wspd(row): + if row["wdir_cardinal"] == "CALM": + return 0 + return row["wspd"] + +def restrict_wspd(row): + if row["wspd"] < 0: + return 0 + return row["wspd"] + +def restrict_rh(row): + if row["rh"] < 0: + return 0 + if row["rh"] > 100: + return 100 + return row["rh"] + +def clean_wdir(row): + if row["wdir_cardinal"] == "CALM": + return 0 + return row["wdir"] + +def wdir_cardinal_to_deg(row): + wdir = row["wdir"] + if not pd.isnull(wdir): + return wdir + cardinal_directions = { + 'N': 0, + 'NNE': 22.5, + 'NE': 45, + 'ENE': 67.5, + 'E': 90, + 'ESE': 112.5, + 'SE': 135, + 'SSE': 157.5, + 'S': 180, + 'SSW': 202.5, + 'SW': 225, + 'WSW': 247.5, + 'W': 270, + 'WNW': 292.5, + 'NW': 315, + 'NNW': 337.5, + 'CALM': 0, + 'VAR': -1 + } + wdir_cardinal = row["wdir_cardinal"] + + return cardinal_directions[wdir_cardinal] if wdir_cardinal in cardinal_directions else np.NaN + +def prepare_dataframe(_df, start_timestamp, end_timestamp): + dates_df = pd.DataFrame() + dates_df["obs_timestamp"] = pd.date_range(start_timestamp, end_timestamp, freq="H") + + _df = dates_df.merge(_df, how='left', on='obs_timestamp') + _df = _df.astype( + { + 'temp': 'float', + 'pressure': 'float', + 'wspd': 'float', + 'heat_index': 'float' + }, + ) + + _df["wdir_cardinal"].fillna(method="bfill", inplace=True) + _df["wdir_degree"] = _df.apply(wdir_cardinal_to_deg, axis=1) + _df["clds_ordinal"] = _df.apply(clds_to_ordinal, axis=1) + _df["temp"].interpolate("polynomial", order=2, inplace=True) + _df["pressure"].interpolate("polynomial", order=2, inplace=True) + _df["heat_index"].interpolate("polynomial", order=2, inplace=True) + _df["wdir"].fillna(method="bfill", inplace=True) + _df["wdir"] = _df.apply(clean_wdir, axis=1) + _df["wspd"] = _df.apply(clean_wspd, axis=1) + _df["wspd"].interpolate("polynomial", order=2, inplace=True) + _df["wspd"] = _df.apply(restrict_wspd, axis=1) + _df["clds"].fillna(method="bfill", inplace=True) + _df["clds_ordinal"].interpolate("linear", inplace=True) + _df["dewPt"].interpolate("polynomial", order=2, inplace=True) + _df["rh"].interpolate("polynomial", order=2, inplace=True) + _df["rh"] = _df.apply(restrict_rh, axis=1) + _df["wc"].interpolate("polynomial", order=2, inplace=True) + _df["vis"].fillna(method="bfill", inplace=True) + _df.drop(["wdir", "wdir_cardinal", "clds"], axis=1, inplace=True) + + _df = _df.dropna() + + _df = _df.sort_values(by=['obs_timestamp']) + date_time = _df.pop('obs_timestamp') + timestamp_s = date_time.map(pd.Timestamp.timestamp) + day = 24*60*60 + year = (365.2425)*day + + _df['day_sin'] = np.sin(timestamp_s * (2 * np.pi / day)) + _df['day_cos'] = np.cos(timestamp_s * (2 * np.pi / day)) + _df['year_sin'] = np.sin(timestamp_s * (2 * np.pi / year)) + _df['year_cos'] = np.cos(timestamp_s * (2 * np.pi / year)) + _df['wdir_sin'] = np.sin(_df["wdir_degree"]) + _df['wdir_cos'] = np.cos(_df["wdir_degree"]) + + return _df, date_time + + +def map_data_to_dataframe(data, target_date): + end_timestamp = target_date - timedelta(minutes=8) + start_timestamp = end_timestamp - timedelta(days=8) + timedelta(hours=1) + + df = pd.read_json(json.dumps(data)) + df["obs_timestamp"] = df.apply(lambda x: datetime.fromtimestamp(x["valid_time_gmt"]).strftime(DATE_FORMAT), axis=1) + df = df.astype({'obs_timestamp': 'datetime64[ns]'}) + initial_cols = ["temp", "obs_timestamp", "pressure", "wspd", "heat_index", "dewPt", "rh", "vis", "wc", "wdir", "wdir_cardinal", "clds" ] + df = df[initial_cols] + + df, _ = prepare_dataframe(df, start_timestamp.strftime(DATE_FORMAT), end_timestamp.strftime(DATE_FORMAT)) + return df + + +def map_to_timestamp(predictions, target_date): + start = target_date + timedelta(hours=1) + end = start + timedelta(hours=23) + target_hours = [x.to_pydatetime().strftime(DATE_FORMAT) for x in pd.date_range(start, end, freq="H")] + return { h: predictions[idx] for idx, h in enumerate(target_hours)} + +def predict(df): + predict_df = df[-168:] + predict_df_features = predict_df[cols_to_scale] + predict_df_features = scaler.transform(predict_df_features.values) + predict_df[cols_to_scale] = predict_df_features + predictions = model(predict_df.to_numpy().reshape(1, 168, 16)) + return predictions + +def predict_for_date(target_date): + date_format = "%Y%m%d" + start_date = target_date - timedelta(days=9) + res = session.get(BASE_URL.format(api_key=API_KEY, start_date=start_date.strftime(date_format), end_date=target_date.strftime(date_format))) + data = res.json() + df = map_data_to_dataframe(data["observations"], target_date) + predictions = predict(df) + flattened = list(map(lambda x: math.floor(x), predictions.numpy().flatten().tolist())) + return map_to_timestamp(flattened, target_date) + +def get_actual_temperatures(target_date): + date_format = "%Y%m%d" + start_date = target_date - timedelta(days=1) #Because api uses utc + end_date = target_date + timedelta(days=1) + start_date_str = (start_date - timedelta(days=1)).strftime(date_format) + end_date_str = end_date.strftime(date_format) + today = datetime.today().astimezone(pytz.timezone("America/New_York")).date() + req_url = BASE_URL.format(api_key=API_KEY, start_date=start_date_str, end_date=end_date_str) + if target_date.date() < today: + res = session.get(req_url) + else: + res = requests.get(req_url) + start_timestamp = target_date + timedelta(minutes=52) + end_timestamp = end_date + timedelta(days=1) - timedelta(minutes=8) + + + data = res.json() + df = pd.read_json(json.dumps(data["observations"])) + df["obs_timestamp"] = df.apply(lambda x: datetime.fromtimestamp(x["valid_time_gmt"]).astimezone(pytz.timezone("America/New_York")).strftime(DATE_FORMAT), axis=1) + df = df.astype({'obs_timestamp': 'datetime64[ns]'}) + initial_cols = ["temp", "obs_timestamp"] + df = df[initial_cols] + dates_df = pd.DataFrame() + dates_df["obs_timestamp"] = pd.date_range(start_timestamp, end_timestamp, freq="H") + df = dates_df.merge(df, how='left', on='obs_timestamp') + + df["obs_timestamp"] = df.apply(lambda x: (x["obs_timestamp"] + timedelta(minutes=8)).strftime(DATE_FORMAT), axis=1) + dicts = df.to_dict("records") + reduced = { k["obs_timestamp"]: k["temp"] for k in dicts} + for k in reduced: + if np.isnan(reduced[k]): + reduced[k] = None + return reduced + +@app.route("/predictions") +@cross_origin() +def get_predictions(): + today = datetime.today().astimezone(pytz.timezone("America/New_York")).date() + target_date = datetime.strptime(request.args["target_date"], "%Y-%m-%d") + app.logger.info(today) + app.logger.info(target_date) + # target_dates = list(filter(lambda x: x < today, [x.to_pydatetime() for x in pd.date_range(start_date, end_date, freq="D").to_list()])) + predictions = predict_for_date(target_date) + actual_temp = get_actual_temperatures(target_date) if target_date.date() <= today else None + + merged = { k: {"predicted": predictions[k], "actual": actual_temp[k] if actual_temp else None} for k in predictions} + response = app.response_class(response=json.dumps(merged), + status=200, + mimetype='application/json') + return response + diff --git a/web/backend/src/model/keras_metadata.pb b/web/backend/src/model/keras_metadata.pb new file mode 100644 index 0000000000000000000000000000000000000000..3c430558dd2991b086a220a96537f6cc10450c6c --- /dev/null +++ b/web/backend/src/model/keras_metadata.pb @@ -0,0 +1,8 @@ + +�%root"_tf_keras_sequential*�%{"name": "sequential", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 168, 16]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "lstm_input"}}, {"class_name": "LSTM", "config": {"name": "lstm", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 12, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 24, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Zeros", "config": {}}, "bias_initializer": {"class_name": "Zeros", "config": {}}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}}, {"class_name": "Reshape", "config": {"name": "reshape", "trainable": true, "dtype": "float32", "target_shape": {"class_name": "__tuple__", "items": [24, 1]}}}]}, "shared_object_id": 10, "build_input_shape": {"class_name": "TensorShape", "items": [null, 168, 16]}, "is_graph_network": true, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 168, 16]}, "float32", "lstm_input"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 168, 16]}, "float32", "lstm_input"]}, "keras_version": "2.9.0", "backend": "tensorflow", "model_config": {"class_name": "Sequential", "config": {"name": "sequential", "layers": [{"class_name": "InputLayer", "config": {"batch_input_shape": {"class_name": "__tuple__", "items": [null, 168, 16]}, "dtype": "float32", "sparse": false, "ragged": false, "name": "lstm_input"}, "shared_object_id": 0}, {"class_name": "LSTM", "config": {"name": "lstm", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 12, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}, "shared_object_id": 5}, {"class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 24, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 7}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 8}, {"class_name": "Reshape", "config": {"name": "reshape", "trainable": true, "dtype": "float32", "target_shape": {"class_name": "__tuple__", "items": [24, 1]}}, "shared_object_id": 9}]}}, "training_config": {"loss": {"class_name": "MeanSquaredError", "config": {"reduction": "auto", "name": "mean_squared_error"}, "shared_object_id": 11}, "metrics": null, "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Adam", "config": {"name": "Adam", "learning_rate": 0.0010000000474974513, "decay": 0.0, "beta_1": 0.8999999761581421, "beta_2": 0.9990000128746033, "epsilon": 1e-07, "amsgrad": false}}}}2 +� root.layer_with_weights-0"_tf_keras_rnn_layer*� +{"name": "lstm", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "LSTM", "config": {"name": "lstm", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 12, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}, "shared_object_id": 5, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 16]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 12}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 168, 16]}}2 +�root.layer_with_weights-1"_tf_keras_layer*�{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 24, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 7}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 8, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 12}}, "shared_object_id": 13}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 12]}}2 +� root.layer-2"_tf_keras_layer*�{"name": "reshape", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "Reshape", "config": {"name": "reshape", "trainable": true, "dtype": "float32", "target_shape": {"class_name": "__tuple__", "items": [24, 1]}}, "shared_object_id": 9}2 +�root.layer_with_weights-0.cell"_tf_keras_layer*�{"name": "lstm_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "class_name": "LSTMCell", "config": {"name": "lstm_cell", "trainable": true, "dtype": "float32", "units": 12, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 1}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 2}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 3}, "unit_forget_bias": true, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2}, "shared_object_id": 4}2 +�^root.keras_api.metrics.0"_tf_keras_metric*�{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 14}2 \ No newline at end of file diff --git a/web/backend/src/model/saved_model.pb b/web/backend/src/model/saved_model.pb new file mode 100644 index 0000000000000000000000000000000000000000..61914fa8cd611145798d1b1f4baa5b9577abb0de Binary files /dev/null and b/web/backend/src/model/saved_model.pb differ diff --git a/web/backend/src/model/scaler.pkl b/web/backend/src/model/scaler.pkl new file mode 100644 index 0000000000000000000000000000000000000000..c3e37aeb139bc6625373d9d05e0985c23fa86fa9 Binary files /dev/null and b/web/backend/src/model/scaler.pkl differ diff --git a/web/backend/src/model/variables/variables.data-00000-of-00001 b/web/backend/src/model/variables/variables.data-00000-of-00001 new file mode 100644 index 0000000000000000000000000000000000000000..14393b7ed4f3ddf02717455e8dc8c3c844bc8c54 Binary files /dev/null and b/web/backend/src/model/variables/variables.data-00000-of-00001 differ diff --git a/web/backend/src/model/variables/variables.index b/web/backend/src/model/variables/variables.index new file mode 100644 index 0000000000000000000000000000000000000000..9e2b3a5eadf5bc1f8b227ec9c791f21c9113cc6b Binary files /dev/null and b/web/backend/src/model/variables/variables.index differ diff --git a/web/docker-compose.prod.yml b/web/docker-compose.prod.yml new file mode 100644 index 0000000000000000000000000000000000000000..4e621ccbe6ce8d213015a942036aea9242904e48 --- /dev/null +++ b/web/docker-compose.prod.yml @@ -0,0 +1,10 @@ +version: "3.5" +services: + pred-backend: + ports: + - "81:5000" + + pred-frontend: + command: npm start + ports: + - "80:4200" diff --git a/web/docker-compose.yml b/web/docker-compose.yml new file mode 100644 index 0000000000000000000000000000000000000000..199afb5c286940b6c7835a3397b530db20fc518d --- /dev/null +++ b/web/docker-compose.yml @@ -0,0 +1,34 @@ +version: "3.5" +services: + pred-backend: + image: pred-backend:latest + ports: + - "3001:5000" + volumes: + - ./backend/src:/app + networks: + - prediction-project + logging: + driver: "json-file" + options: + max-size: "10m" + + pred-frontend: + image: pred-frontend:latest + command: npm run start-dev + ports: + - "4200:4200" + volumes: + - ./frontend/src:/app/src + networks: + - prediction-project + logging: + driver: "json-file" + options: + max-size: "10m" + +volumes: + db-volume: + +networks: + prediction-project: diff --git a/web/frontend/.browserslistrc b/web/frontend/.browserslistrc new file mode 100644 index 0000000000000000000000000000000000000000..4f9ac26980c156a3d525267010d5f78144b43519 --- /dev/null +++ b/web/frontend/.browserslistrc @@ -0,0 +1,16 @@ +# This file is used by the build system to adjust CSS and JS output to support the specified browsers below. +# For additional information regarding the format and rule options, please see: +# https://github.com/browserslist/browserslist#queries + +# For the full list of supported browsers by the Angular framework, please see: +# https://angular.io/guide/browser-support + +# You can see what browsers were selected by your queries by running: +# npx browserslist + +last 1 Chrome version +last 1 Firefox version +last 2 Edge major versions +last 2 Safari major versions +last 2 iOS major versions +Firefox ESR diff --git a/web/frontend/.dockerignore b/web/frontend/.dockerignore new file mode 100644 index 0000000000000000000000000000000000000000..70d5723693800f5cd32dd0db6d4d9d9ebfd4cba3 --- /dev/null +++ b/web/frontend/.dockerignore @@ -0,0 +1,3 @@ +node_modules +.github +.angular diff --git a/web/frontend/.editorconfig b/web/frontend/.editorconfig new file mode 100644 index 0000000000000000000000000000000000000000..59d9a3a3e73ffc640517ef488f6f89d6270195d1 --- /dev/null +++ b/web/frontend/.editorconfig @@ -0,0 +1,16 @@ +# Editor configuration, see https://editorconfig.org +root = true + +[*] +charset = utf-8 +indent_style = space +indent_size = 2 +insert_final_newline = true +trim_trailing_whitespace = true + +[*.ts] +quote_type = single + +[*.md] +max_line_length = off +trim_trailing_whitespace = false diff --git a/web/frontend/.github/CODE_OF_CONDUCT.md b/web/frontend/.github/CODE_OF_CONDUCT.md new file mode 100644 index 0000000000000000000000000000000000000000..64da87772ba970899e7b8ad3ee10be7bb7851b11 --- /dev/null +++ b/web/frontend/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,46 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at . The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ \ No newline at end of file diff --git a/web/frontend/.github/COMMIT_CONVENTION.md b/web/frontend/.github/COMMIT_CONVENTION.md new file mode 100644 index 0000000000000000000000000000000000000000..261c7ca20e4595661f530350ce622794137f9a33 --- /dev/null +++ b/web/frontend/.github/COMMIT_CONVENTION.md @@ -0,0 +1,83 @@ +## Git Commit Message Convention + +> This is adapted from [Angular's commit convention](https://github.com/conventional-changelog/conventional-changelog/blob/master/packages/conventional-changelog-angular/convention.md). + +#### Examples + +Appears under "Features" header, `compiler` subheader: + +``` +feat(compiler): add 'comments' option +``` + +Appears under "Bug Fixes" header, `sidebar` subheader, with a link to issue #28: + +``` +fix(sidebar): handle events on blur + +close #28 +``` + +Appears under "Performance Improvements" header, and under "Breaking Changes" with the breaking change explanation: + +``` +perf(core): improve vdom diffing by removing 'foo' option + +BREAKING CHANGE: The 'foo' option has been removed. +``` + +The following commit and commit `667ecc1` do not appear in the changelog if they are under the same release. If not, the revert commit appears under the "Reverts" header. + +``` +revert: feat(compiler): add 'comments' option + +This reverts commit 667ecc1654a317a13331b17617d973392f415f02. +``` + +### Full Message Format + +A commit message consists of a **header**, **body** and **footer**. The header has a **type**, **scope** and **subject**: + +``` +(): + + + +