Spaces:

OnsAouedi
/

MAGInet_demo

Sleeping

App Files Files Community

OnsAouedi commited on Sep 22

Commit

9a06568

•

1 Parent(s): 0108434

Delete MAGInet_final.ipynb

Browse files

Files changed (1) hide show

MAGInet_final.ipynb +0 -776

MAGInet_final.ipynb DELETED Viewed

@@ -1,776 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e2789c26-d501-41f2-98e1-3ef2940e3f80",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Final version...\n",
-    "import torch\n",
-    "import torch.nn as nn\n",
-    "import gradio as gr\n",
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "from sklearn.metrics import mean_absolute_error, mean_squared_error\n",
-    "import os\n",
-    "import logging\n",
-    "import joblib\n",
-    "from tqdm import tqdm\n",
-    "import tempfile\n",
-    "import json\n",
-    "from math import radians, cos, sin, asin, sqrt, atan2, degrees\n",
-    "import time\n",
-    "\n",
-    "# ============================\n",
-    "# Configure Logging\n",
-    "# ============================\n",
-    "\n",
-    "logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')\n",
-    "\n",
-    "# ============================\n",
-    "# Helper Functions\n",
-    "# ============================\n",
-    "\n",
-    "def add_time_decimal_feature(df):\n",
-    "    \"\"\"\n",
-    "    Add 'time_decimal' feature by combining 'hour' and 'minutes'.\n",
-    "\n",
-    "    :param df: DataFrame with 'hour' and 'minutes' columns.\n",
-    "    :return: DataFrame with 'time_decimal' and without 'hour' and 'minutes'.\n",
-    "    \"\"\"\n",
-    "    if 'hour' in df.columns and 'minutes' in df.columns:\n",
-    "        logging.info(\"Adding 'time_decimal' feature...\")\n",
-    "        df['time_decimal'] = df['hour'] + df['minutes'] / 60.0\n",
-    "        df = df.drop(columns=['hour', 'minutes'])  # Drop 'hour' and 'minutes' after creation\n",
-    "        logging.info(\"'time_decimal' feature added.\")\n",
-    "    else:\n",
-    "        logging.warning(\"'hour' and/or 'minutes' columns not found. Skipping 'time_decimal' feature addition.\")\n",
-    "    return df\n",
-    "\n",
-    "def haversine(lon1, lat1, lon2, lat2):\n",
-    "    \"\"\"\n",
-    "    Calculate the great-circle distance between two points on the Earth.\n",
-    "\n",
-    "    :param lon1: Longitude of point 1 (in decimal degrees)\n",
-    "    :param lat1: Latitude of point 1 (in decimal degrees)\n",
-    "    :param lon2: Longitude of point 2 (in decimal degrees)\n",
-    "    :param lat2: Latitude of point 2 (in decimal degrees)\n",
-    "    :return: Distance in kilometers\n",
-    "    \"\"\"\n",
-    "    # Convert decimal degrees to radians\n",
-    "    lon1_rad, lat1_rad, lon2_rad, lat2_rad = map(np.radians, [lon1, lat1, lon2, lat2])\n",
-    "\n",
-    "    # Haversine formula\n",
-    "    dlon = lon2_rad - lon1_rad \n",
-    "    dlat = lat2_rad - lat1_rad \n",
-    "    a = np.sin(dlat/2)**2 + np.cos(lat1_rad) * np.cos(lat2_rad) * np.sin(dlon/2)**2\n",
-    "    c = 2 * np.arcsin(np.sqrt(a)) \n",
-    "    r = 6371  # Radius of Earth in kilometers\n",
-    "    return c * r\n",
-    "\n",
-    "def calculate_bearing(lon1, lat1, lon2, lat2):\n",
-    "    \"\"\"\n",
-    "    Calculate the bearing between two points.\n",
-    "\n",
-    "    :param lon1: Longitude of point 1 (in decimal degrees)\n",
-    "    :param lat1: Latitude of point 1 (in decimal degrees)\n",
-    "    :param lon2: Longitude of point 2 (in decimal degrees)\n",
-    "    :param lat2: Latitude of point 2 (in decimal degrees)\n",
-    "    :return: Bearing in degrees\n",
-    "    \"\"\"\n",
-    "    # Convert decimal degrees to radians\n",
-    "    lon1_rad, lat1_rad, lon2_rad, lat2_rad = map(radians, [lon1, lat1, lon2, lat2])\n",
-    "\n",
-    "    dlon = lon2_rad - lon1_rad\n",
-    "    x = sin(dlon) * cos(lat2_rad)\n",
-    "    y = cos(lat1_rad) * sin(lat2_rad) - (sin(lat1_rad) * cos(lat2_rad) * cos(dlon))\n",
-    "\n",
-    "    initial_bearing = atan2(x, y)\n",
-    "\n",
-    "    # Convert from radians to degrees and normalize\n",
-    "    initial_bearing = degrees(initial_bearing)\n",
-    "    compass_bearing = (initial_bearing + 360) % 360\n",
-    "\n",
-    "    return compass_bearing\n",
-    "\n",
-    "def angular_divergence(bearing1, bearing2):\n",
-    "    \"\"\"\n",
-    "    Calculate the smallest angle difference between two bearings.\n",
-    "\n",
-    "    :param bearing1: First bearing in degrees\n",
-    "    :param bearing2: Second bearing in degrees\n",
-    "    :return: Angular divergence in degrees\n",
-    "    \"\"\"\n",
-    "    diff = abs(bearing1 - bearing2) % 360\n",
-    "    return min(diff, 360 - diff)\n",
-    "\n",
-    "def denormalize(scaled_lat, scaled_lon, scaler, lat_idx, lon_idx):\n",
-    "    \"\"\"\n",
-    "    Denormalize latitude and longitude using the scaler's parameters.\n",
-    "\n",
-    "    :param scaled_lat: Scaled latitude values (numpy array).\n",
-    "    :param scaled_lon: Scaled longitude values (numpy array).\n",
-    "    :param scaler: The scaler object used for normalization.\n",
-    "    :param lat_idx: Index of 'latitude_degrees' in the scaler's feature list.\n",
-    "    :param lon_idx: Index of 'longitude_degrees' in the scaler's feature list.\n",
-    "    :return: Tuple of (denormalized_lat, denormalized_lon).\n",
-    "    \"\"\"\n",
-    "    lat_min = scaler.data_min_[lat_idx]\n",
-    "    lat_max = scaler.data_max_[lat_idx]\n",
-    "    lon_min = scaler.data_min_[lon_idx]\n",
-    "    lon_max = scaler.data_max_[lon_idx]\n",
-    "\n",
-    "    denorm_lat = scaled_lat * (lat_max - lat_min) + lat_min\n",
-    "    denorm_lon = scaled_lon * (lon_max - lon_min) + lon_min\n",
-    "    return denorm_lat, denorm_lon\n",
-    "\n",
-    "def create_dataset_grouped_by_mmsi(df_scaled, seq_len, forecast_horizon, features_to_scale):\n",
-    "    \"\"\"\n",
-    "    Create input and output sequences grouped by original MMSI.\n",
-    "    Returns scaled last known positions.\n",
-    "    \"\"\"\n",
-    "    Xs, ys, mmsis = [], [], []\n",
-    "    last_known_positions_scaled = []\n",
-    "\n",
-    "    grouped = df_scaled.groupby('original_mmsi')\n",
-    "\n",
-    "    for mmsi, group in tqdm(grouped, desc=\"Creating sequences\"):\n",
-    "        if len(group) >= seq_len + forecast_horizon:\n",
-    "            for i in range(len(group) - seq_len - forecast_horizon + 1):\n",
-    "                # Select scaled features for the sequence\n",
-    "                sequence = group.iloc[i:(i + seq_len)][features_to_scale].to_numpy()\n",
-    "\n",
-    "                # Future positions to predict (scaled)\n",
-    "                future_positions = group[['latitude_degrees', 'longitude_degrees']].iloc[i + seq_len:i + seq_len + forecast_horizon].to_numpy()\n",
-    "\n",
-    "                # Future hour feature\n",
-    "                future_hour = group[['time_decimal']].iloc[i + seq_len].values[0]\n",
-    "                future_hour_feature = np.full((seq_len, 1), future_hour)\n",
-    "\n",
-    "                # Combine sequence with future_hour_feature\n",
-    "                sequence_with_future_hour = np.hstack((sequence, future_hour_feature))\n",
-    "\n",
-    "                Xs.append(sequence_with_future_hour)\n",
-    "                ys.append(future_positions)\n",
-    "                mmsis.append(mmsi)\n",
-    "\n",
-    "                # Store last known positions (scaled)\n",
-    "                last_lat_scaled = group['latitude_degrees'].iloc[i + seq_len - 1]\n",
-    "                last_lon_scaled = group['longitude_degrees'].iloc[i + seq_len - 1]\n",
-    "                last_known_positions_scaled.append((last_lat_scaled, last_lon_scaled))\n",
-    "\n",
-    "    return np.array(Xs, dtype=np.float32), np.array(ys, dtype=np.float32), np.array(mmsis), last_known_positions_scaled\n",
-    "\n",
-    "# ============================\n",
-    "# Model Definitions\n",
-    "# ============================\n",
-    "\n",
-    "class LSTMModelTeacher(nn.Module):\n",
-    "    def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=7, dropout=0.2):\n",
-    "        \"\"\"\n",
-    "        Teacher LSTM Model.\n",
-    "\n",
-    "        :param in_dim: Number of input features.\n",
-    "        :param hidden_dim: Number of hidden units.\n",
-    "        :param forecast_horizon: Number of future steps to predict.\n",
-    "        :param n_layers: Number of LSTM layers.\n",
-    "        :param dropout: Dropout rate.\n",
-    "        \"\"\"\n",
-    "        super(LSTMModelTeacher, self).__init__()\n",
-    "        self.forecast_horizon = forecast_horizon  # Store as an instance attribute\n",
-    "        self.embedding = nn.Linear(in_dim, hidden_dim)\n",
-    "        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers=n_layers, dropout=dropout, batch_first=True)\n",
-    "        self.fc = nn.Linear(hidden_dim, forecast_horizon * 2)\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        x = self.embedding(x)\n",
-    "        x, _ = self.lstm(x)\n",
-    "        x = self.fc(x[:, -1, :])  # Use the last timestep for prediction\n",
-    "        x = x.view(-1, self.forecast_horizon, 2)  # Shape: (batch_size, forecast_horizon, 2)\n",
-    "        return x\n",
-    "\n",
-    "class LSTMModelStudent(nn.Module):\n",
-    "    def __init__(self, in_dim, hidden_dim, forecast_horizon, n_layers=3, dropout=0.2):\n",
-    "        \"\"\"\n",
-    "        Student LSTM Model.\n",
-    "\n",
-    "        :param in_dim: Number of input features.\n",
-    "        :param hidden_dim: Number of hidden units.\n",
-    "        :param forecast_horizon: Number of future steps to predict.\n",
-    "        :param n_layers: Number of LSTM layers.\n",
-    "        :param dropout: Dropout rate.\n",
-    "        \"\"\"\n",
-    "        super(LSTMModelStudent, self).__init__()\n",
-    "        self.forecast_horizon = forecast_horizon  # Store as an instance attribute\n",
-    "        self.embedding = nn.Linear(in_dim, hidden_dim)\n",
-    "        self.lstm = nn.LSTM(hidden_dim, hidden_dim, num_layers=n_layers, dropout=dropout, batch_first=True)\n",
-    "        self.fc = nn.Linear(hidden_dim, forecast_horizon * 2)\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        x = self.embedding(x)\n",
-    "        x, _ = self.lstm(x)\n",
-    "        x = self.fc(x[:, -1, :])  # Use the last timestep for prediction\n",
-    "        x = x.view(-1, self.forecast_horizon, 2)  # Shape: (batch_size, forecast_horizon, 2)\n",
-    "        return x\n",
-    "\n",
-    "# ============================\n",
-    "# Model Loading Functions\n",
-    "# ============================\n",
-    "\n",
-    "def load_models(model_paths):\n",
-    "    \"\"\"\n",
-    "    Load teacher and student models, including submodels for North, Mid, and South areas.\n",
-    "\n",
-    "    :param model_paths: Dictionary containing paths to the models.\n",
-    "    :return: Dictionary of loaded models.\n",
-    "    \"\"\"\n",
-    "    models = {}\n",
-    "    logging.info(\"Loading Teacher model...\")\n",
-    "    # Load Teacher Model (Global)\n",
-    "    teacher = LSTMModelTeacher(in_dim=15, hidden_dim=200, forecast_horizon=1, n_layers=7, dropout=0.2)  # 15 features including 'future_hour_feature'\n",
-    "    teacher.load_state_dict(torch.load(model_paths['teacher'], map_location=torch.device('cpu')))\n",
-    "    teacher.eval()\n",
-    "    models['Teacher'] = teacher\n",
-    "    logging.info(\"Teacher model loaded successfully.\")\n",
-    "\n",
-    "    logging.info(\"Loading Student North model...\")\n",
-    "    # Load Student Models (Sub-areas)\n",
-    "    student_north = LSTMModelStudent(in_dim=15, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)\n",
-    "    student_north.load_state_dict(torch.load(model_paths['student_north'], map_location=torch.device('cpu')))\n",
-    "    student_north.eval()\n",
-    "    models['Student_North'] = student_north\n",
-    "    logging.info(\"Student North model loaded successfully.\")\n",
-    "\n",
-    "    logging.info(\"Loading Student Mid model...\")\n",
-    "    student_mid = LSTMModelStudent(in_dim=15, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)\n",
-    "    student_mid.load_state_dict(torch.load(model_paths['student_mid'], map_location=torch.device('cpu')))\n",
-    "    student_mid.eval()\n",
-    "    models['Student_Mid'] = student_mid\n",
-    "    logging.info(\"Student Mid model loaded successfully.\")\n",
-    "\n",
-    "    logging.info(\"Loading Student South model...\")\n",
-    "    student_south = LSTMModelStudent(in_dim=15, hidden_dim=200, forecast_horizon=1, n_layers=3, dropout=0.2)\n",
-    "    student_south.load_state_dict(torch.load(model_paths['student_south'], map_location=torch.device('cpu')))\n",
-    "    student_south.eval()\n",
-    "    models['Student_South'] = student_south\n",
-    "    logging.info(\"Student South model loaded successfully.\")\n",
-    "\n",
-    "    return models\n",
-    "\n",
-    "def load_scalers(scaler_paths):\n",
-    "    \"\"\"\n",
-    "    Load scalers for each model.\n",
-    "\n",
-    "    :param scaler_paths: Dictionary containing paths to the scaler files.\n",
-    "    :return: Dictionary of loaded scalers.\n",
-    "    \"\"\"\n",
-    "    loaded_scalers = {}\n",
-    "    for model_name, scaler_path in scaler_paths.items():\n",
-    "        if os.path.exists(scaler_path):\n",
-    "            loaded_scalers[model_name] = joblib.load(scaler_path)\n",
-    "            logging.info(f\"Loaded scaler for {model_name} from '{scaler_path}'.\")\n",
-    "        else:\n",
-    "            logging.error(f\"Scaler file for {model_name} not found at '{scaler_path}'.\")\n",
-    "            raise FileNotFoundError(f\"Scaler file for {model_name} not found at '{scaler_path}'. Please provide the correct path.\")\n",
-    "    return loaded_scalers\n",
-    "\n",
-    "# ============================\n",
-    "# Model Selection Logic\n",
-    "# ============================\n",
-    "\n",
-    "def determine_subarea(df):\n",
-    "    \"\"\"\n",
-    "    Determine the sub-area (North, Mid, South) based on latitude and longitude ranges.\n",
-    "\n",
-    "    :param df: DataFrame containing 'latitude_degrees' and 'longitude_degrees'.\n",
-    "    :return: String indicating the sub-area.\n",
-    "    \"\"\"\n",
-    "    # Define sub-area boundaries\n",
-    "    subareas = {\n",
-    "        'North': {'lat_min': 30, 'lat_max': 60, 'lon_min': -80, 'lon_max': -10},\n",
-    "        'Mid': {'lat_min': 0, 'lat_max': 30, 'lon_min': -80, 'lon_max': 10},\n",
-    "        'South': {'lat_min': -80, 'lat_max': 0, 'lon_min': -60, 'lon_max': 20}\n",
-    "    }\n",
-    "\n",
-    "    # Count the number of data points in each sub-area\n",
-    "    counts = {}\n",
-    "    for area, bounds in subareas.items():\n",
-    "        count = df[\n",
-    "            (df['latitude_degrees'] >= bounds['lat_min']) & (df['latitude_degrees'] <= bounds['lat_max']) &\n",
-    "            (df['longitude_degrees'] >= bounds['lon_min']) & (df['longitude_degrees'] <= bounds['lon_max'])\n",
-    "        ].shape[0]\n",
-    "        counts[area] = count\n",
-    "        logging.info(f\"Sub-area '{area}': {count} records.\")\n",
-    "\n",
-    "    # Determine the sub-area with the maximum count\n",
-    "    predominant_subarea = max(counts, key=counts.get)\n",
-    "    logging.info(f\"Predominant sub-area determined: {predominant_subarea}\")\n",
-    "\n",
-    "    # If no data points fall into any sub-area, default to Teacher\n",
-    "    if counts[predominant_subarea] == 0:\n",
-    "        logging.warning(\"No data points found in any sub-area. Defaulting to Teacher model.\")\n",
-    "        return 'Teacher'\n",
-    "\n",
-    "    return predominant_subarea\n",
-    "\n",
-    "def select_model(models, subarea):\n",
-    "    \"\"\"\n",
-    "    Select the appropriate model based on the sub-area.\n",
-    "\n",
-    "    :param models: Dictionary of loaded models.\n",
-    "    :param subarea: String indicating the sub-area.\n",
-    "    :return: Tuple of (selected_model, selected_model_name).\n",
-    "    \"\"\"\n",
-    "    if subarea in ['North', 'Mid', 'South']:\n",
-    "        selected_model = models.get(f'Student_{subarea}')\n",
-    "        selected_model_name = f'Student_{subarea}'\n",
-    "        logging.info(f\"Selected model: {selected_model_name}\")\n",
-    "        return selected_model, selected_model_name\n",
-    "    else:\n",
-    "        selected_model = models.get('Teacher')\n",
-    "        selected_model_name = 'Teacher'\n",
-    "        logging.info(f\"Selected model: {selected_model_name}\")\n",
-    "        return selected_model, selected_model_name\n",
-    "\n",
-    "# ============================\n",
-    "# Evaluation Metrics Calculation\n",
-    "# ============================\n",
-    "\n",
-    "def calculate_classic_metrics(y_true, y_pred):\n",
-    "    \"\"\"\n",
-    "    Calculate MAE, MSE, and RMSE directly on latitude/longitude pairs.\n",
-    "\n",
-    "    :param y_true: Ground truth positions (numpy array of shape (num_samples, 2)).\n",
-    "    :param y_pred: Predicted positions (numpy array of shape (num_samples, 2)).\n",
-    "    :return: Dictionary containing the classic metrics.\n",
-    "    \"\"\"\n",
-    "    # Calculate MAE\n",
-    "    mae = mean_absolute_error(y_true, y_pred)\n",
-    "\n",
-    "    # Calculate MSE\n",
-    "    mse = mean_squared_error(y_true, y_pred)\n",
-    "\n",
-    "    # Calculate RMSE\n",
-    "    rmse = np.sqrt(mse)\n",
-    "\n",
-    "    classic_metrics = {\n",
-    "        'MAE (degrees)': mae,\n",
-    "        'MSE (degrees^2)': mse,\n",
-    "        'RMSE (degrees)': rmse\n",
-    "    }\n",
-    "\n",
-    "    logging.info(f\"Calculated classic metrics: {classic_metrics}\")\n",
-    "    \n",
-    "    return classic_metrics\n",
-    "\n",
-    "def calculate_distance_metrics(y_true, y_pred):\n",
-    "    \"\"\"\n",
-    "    Calculate metrics based on distance (in kilometers).\n",
-    "\n",
-    "    :param y_true: Ground truth positions (numpy array of shape (num_samples, 2)).\n",
-    "    :param y_pred: Predicted positions (numpy array of shape (num_samples, 2)).\n",
-    "    :return: Dictionary containing the distance-based metrics.\n",
-    "    \"\"\"\n",
-    "    # Calculate haversine distance between predicted and true positions\n",
-    "    distances = np.array([\n",
-    "        haversine(y_true[i, 1], y_true[i, 0], y_pred[i, 1], y_pred[i, 0]) \n",
-    "        for i in range(len(y_true))\n",
-    "    ])  # Assuming columns are [latitude, longitude]\n",
-    "\n",
-    "    # Calculate MAE\n",
-    "    mae = np.mean(np.abs(distances))\n",
-    "\n",
-    "    # Calculate MSE\n",
-    "    mse = np.mean(np.square(distances))\n",
-    "\n",
-    "    # Calculate RMSE\n",
-    "    rmse = np.sqrt(mse)\n",
-    "\n",
-    "    # Calculate RSE (Relative Squared Error)\n",
-    "    variance = np.var(distances)\n",
-    "    rse = mse / variance if variance != 0 else float('inf')\n",
-    "\n",
-    "    metrics = {\n",
-    "        'MAE (km)': mae,\n",
-    "        'MSE (km^2)': mse,\n",
-    "        'RMSE (km)': rmse,\n",
-    "        'RSE': rse\n",
-    "    }\n",
-    "\n",
-    "    logging.info(f\"Calculated distance metrics: {metrics}\")\n",
-    "    \n",
-    "    return metrics\n",
-    "\n",
-    "# ============================\n",
-    "# Classical Metrics Prediction\n",
-    "# ============================\n",
-    "\n",
-    "def classical_prediction(file, model_choice, min_mmsi, max_mmsi, models, loaded_scalers):\n",
-    "    \"\"\"\n",
-    "    Preprocess the input CSV and make predictions using the selected model.\n",
-    "    Calculate classical evaluation metrics and include inference time.\n",
-    "    \"\"\"\n",
-    "    try:\n",
-    "        logging.info(\"Starting classical prediction...\")\n",
-    "\n",
-    "        # Load the uploaded CSV file and filter based on MMSI\n",
-    "        logging.info(\"Loading uploaded CSV file...\")\n",
-    "        df = pd.read_csv(file.name, delimiter=',')\n",
-    "        logging.info(f\"Uploaded CSV file loaded with {df.shape[0]} records.\")\n",
-    "        \n",
-    "        df = df[(df['mmsi'] >= min_mmsi) & (df['mmsi'] <= max_mmsi)]\n",
-    "        if df.empty:\n",
-    "            error_message = \"No data available after applying MMSI filters.\"\n",
-    "            logging.error(error_message)\n",
-    "            return {\"error\": error_message}, None, None\n",
-    "\n",
-    "        # Check if 'time_decimal' exists\n",
-    "        if 'time_decimal' not in df.columns:\n",
-    "            df = add_time_decimal_feature(df)\n",
-    "        else:\n",
-    "            logging.info(\"'time_decimal' feature already exists. Skipping creation.\")\n",
-    "\n",
-    "        expected_columns = [\n",
-    "            \"mmsi\", \"sog_kt\", \"latitude_degrees\", \"longitude_degrees\", \"cog_degrees\",\n",
-    "            \"dimension_a_m\", \"dimension_b_m\", \"dimension_c_m\", \"dimension_d_m\",\n",
-    "            \"ship_type\", \"day\", \"month\", \"year\", \"time_decimal\"\n",
-    "        ]\n",
-    "\n",
-    "        if list(df.columns) != expected_columns:\n",
-    "            error_message = (\n",
-    "                f\"Input data does not have the correct columns.\\n\"\n",
-    "                f\"Expected columns: {expected_columns}\\n\"\n",
-    "                f\"Got columns: {list(df.columns)}\"\n",
-    "            )\n",
-    "            logging.error(error_message)\n",
-    "            return {\"error\": error_message}, None, None\n",
-    "\n",
-    "        logging.info(\"Input CSV has the correct columns.\")\n",
-    "\n",
-    "        # Select the appropriate model and scaler\n",
-    "        if model_choice == \"Auto-Select\":\n",
-    "            temp_df = df.copy()\n",
-    "            subarea = determine_subarea(temp_df)\n",
-    "            selected_model, selected_model_name = select_model(models, subarea)\n",
-    "            scaler = loaded_scalers[selected_model_name]\n",
-    "        else:\n",
-    "            if model_choice in models:\n",
-    "                selected_model = models[model_choice]\n",
-    "                selected_model_name = model_choice\n",
-    "                scaler = loaded_scalers[selected_model_name]\n",
-    "            else:\n",
-    "                error_message = f\"Selected model '{model_choice}' is not available.\"\n",
-    "                logging.error(error_message)\n",
-    "                return {\"error\": error_message}, None, None\n",
-    "\n",
-    "        logging.info(f\"Using scaler for model: {selected_model_name}\")\n",
-    "\n",
-    "        # Normalize the data\n",
-    "        logging.info(\"Normalizing the data...\")\n",
-    "        features_to_scale = [\n",
-    "            \"mmsi\", \"sog_kt\", \"latitude_degrees\", \"longitude_degrees\", \"cog_degrees\",\n",
-    "            \"dimension_a_m\", \"dimension_b_m\", \"dimension_c_m\", \"dimension_d_m\",\n",
-    "            \"ship_type\", \"day\", \"month\", \"year\", \"time_decimal\"\n",
-    "        ]\n",
-    "        X_new = df[features_to_scale]\n",
-    "        X_scaled = scaler.transform(X_new)\n",
-    "        df_scaled = pd.DataFrame(X_scaled, columns=features_to_scale, index=df.index)\n",
-    "        df_scaled['original_mmsi'] = df['mmsi']\n",
-    "\n",
-    "        # Create sequences and get last known positions (scaled)\n",
-    "        seq_len = 24\n",
-    "        forecast_horizon = 1\n",
-    "        X, y, mmsi_seq, last_known_positions_scaled = create_dataset_grouped_by_mmsi(df_scaled, seq_len, forecast_horizon, features_to_scale)\n",
-    "\n",
-    "        if X.size == 0:\n",
-    "            error_message = \"Not enough data to create sequences.\"\n",
-    "            logging.error(error_message)\n",
-    "            return {\"error\": error_message}, None, None\n",
-    "\n",
-    "        logging.info(f\"Created {X.shape[0]} sequences.\")\n",
-    "\n",
-    "        # Inference\n",
-    "        logging.info(\"Starting model inference...\")\n",
-    "        test_dataset = torch.utils.data.TensorDataset(torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32))\n",
-    "        test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)\n",
-    "        all_predictions = []\n",
-    "        all_y_true = []\n",
-    "\n",
-    "        start_time = time.time()  # Start inference time tracking\n",
-    "\n",
-    "        with torch.no_grad():\n",
-    "            for batch in test_loader:\n",
-    "                X_batch, y_batch = batch\n",
-    "                predictions = selected_model(X_batch).cpu().numpy()\n",
-    "                all_predictions.append(predictions)\n",
-    "                all_y_true.append(y_batch.numpy())\n",
-    "\n",
-    "        inference_time = time.time() - start_time  # End inference time tracking\n",
-    "\n",
-    "        all_predictions = np.concatenate(all_predictions, axis=0)\n",
-    "        y_true = np.concatenate(all_y_true, axis=0)\n",
-    "        y_pred = all_predictions\n",
-    "\n",
-    "        logging.info(f\"Inference completed in {inference_time:.2f} seconds.\")\n",
-    "\n",
-    "        # Denormalize predictions and real values\n",
-    "        lat_idx = features_to_scale.index(\"latitude_degrees\")\n",
-    "        lon_idx = features_to_scale.index(\"longitude_degrees\")\n",
-    "        pred_lat, pred_lon = denormalize(y_pred[:, :, 0], y_pred[:, :, 1], scaler, lat_idx, lon_idx)\n",
-    "        true_lat, true_lon = denormalize(y_true[:, :, 0], y_true[:, :, 1], scaler, lat_idx, lon_idx)\n",
-    "\n",
-    "        # Denormalize last known positions\n",
-    "        last_lat_scaled = np.array([pos[0] for pos in last_known_positions_scaled])\n",
-    "        last_lon_scaled = np.array([pos[1] for pos in last_known_positions_scaled])\n",
-    "\n",
-    "        last_lat_denorm, last_lon_denorm = denormalize(\n",
-    "            last_lat_scaled, last_lon_scaled, scaler, lat_idx, lon_idx\n",
-    "        )\n",
-    "\n",
-    "        # Calculate the classic evaluation metrics\n",
-    "        y_true_pairs = np.column_stack((true_lat.flatten(), true_lon.flatten()))\n",
-    "        y_pred_pairs = np.column_stack((pred_lat.flatten(), pred_lon.flatten()))\n",
-    "        classic_metrics = calculate_classic_metrics(y_true=y_true_pairs, y_pred=y_pred_pairs)\n",
-    "        classic_metrics['Inference Time (seconds)'] = inference_time  # Include inference time\n",
-    "\n",
-    "        # Prepare metrics and output CSV\n",
-    "        metrics_df = pd.DataFrame([classic_metrics])\n",
-    "        metrics_json = metrics_df.to_json(orient=\"records\")\n",
-    "        metrics_json = json.loads(metrics_json)[0]\n",
-    "\n",
-    "        # Prepare predicted and real positions DataFrame\n",
-    "        predicted_df = pd.DataFrame({\n",
-    "            'MMSI': mmsi_seq[:len(y_pred)].flatten(),\n",
-    "            'Last Known Latitude': last_lat_denorm.flatten(),\n",
-    "            'Last Known Longitude': last_lon_denorm.flatten(),\n",
-    "            'Predicted Latitude': pred_lat.flatten(),\n",
-    "            'Predicted Longitude': pred_lon.flatten(),\n",
-    "            'Real Latitude': true_lat.flatten(),\n",
-    "            'Real Longitude': true_lon.flatten()\n",
-    "        })\n",
-    "\n",
-    "        # Save predictions as CSV\n",
-    "        with tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w', newline='') as tmp_positions_file:\n",
-    "            predicted_df.to_csv(tmp_positions_file, index=False)\n",
-    "            positions_csv_path = tmp_positions_file.name\n",
-    "\n",
-    "        logging.info(\"Classical prediction completed.\")\n",
-    "        return metrics_json, positions_csv_path, inference_time\n",
-    "    except Exception as e:\n",
-    "        logging.error(f\"An error occurred: {str(e)}\")\n",
-    "        return {\"error\": str(e)}, None, None\n",
-    "\n",
-    "# ============================\n",
-    "# Abnormal Behavior Detection\n",
-    "# ============================\n",
-    "\n",
-    "def abnormal_behavior_detection(prediction_file, alpha=0.5, threshold=10.0):\n",
-    "    \"\"\"\n",
-    "    Detect abnormal behavior based on angular divergence and distance difference.\n",
-    "    Accepts a CSV file containing real and predicted positions.\n",
-    "    \"\"\"\n",
-    "    try:\n",
-    "        logging.info(\"Starting abnormal behavior detection...\")\n",
-    "\n",
-    "        # Load the CSV file containing real and predicted positions\n",
-    "        logging.info(\"Loading prediction CSV file...\")\n",
-    "        df = pd.read_csv(prediction_file.name)\n",
-    "        logging.info(f\"Prediction CSV file loaded with {df.shape[0]} records.\")\n",
-    "\n",
-    "        # Check if necessary columns exist\n",
-    "        expected_columns = [\n",
-    "            'MMSI', 'Last Known Latitude', 'Last Known Longitude',\n",
-    "            'Predicted Latitude', 'Predicted Longitude',\n",
-    "            'Real Latitude', 'Real Longitude'\n",
-    "        ]\n",
-    "\n",
-    "        if not all(col in df.columns for col in expected_columns):\n",
-    "            error_message = (\n",
-    "                f\"Input data does not have the correct columns.\\n\"\n",
-    "                f\"Expected columns: {expected_columns}\\n\"\n",
-    "                f\"Got columns: {list(df.columns)}\"\n",
-    "            )\n",
-    "            logging.error(error_message)\n",
-    "            return {\"error\": error_message}\n",
-    "\n",
-    "        # Extract necessary data\n",
-    "        mmsi_seq = df['MMSI'].values\n",
-    "        last_lat_flat = df['Last Known Latitude'].values\n",
-    "        last_lon_flat = df['Last Known Longitude'].values\n",
-    "        pred_lat_flat = df['Predicted Latitude'].values\n",
-    "        pred_lon_flat = df['Predicted Longitude'].values\n",
-    "        true_lat_flat = df['Real Latitude'].values\n",
-    "        true_lon_flat = df['Real Longitude'].values\n",
-    "\n",
-    "        # Calculate bearings\n",
-    "        logging.info(\"Calculating bearings for predictions and real values...\")\n",
-    "        bearings_pred = [\n",
-    "            calculate_bearing(last_lon_flat[i], last_lat_flat[i], pred_lon_flat[i], pred_lat_flat[i]) \n",
-    "            for i in range(len(pred_lat_flat))\n",
-    "        ]\n",
-    "        bearings_true = [\n",
-    "            calculate_bearing(last_lon_flat[i], last_lat_flat[i], true_lon_flat[i], true_lat_flat[i]) \n",
-    "            for i in range(len(true_lat_flat))\n",
-    "        ]\n",
-    "\n",
-    "        # Calculate angular divergence Δθ\n",
-    "        logging.info(\"Calculating angular divergence (Δθ)...\")\n",
-    "        delta_theta = [\n",
-    "            angular_divergence(bearings_pred[i], bearings_true[i]) \n",
-    "            for i in range(len(bearings_pred))\n",
-    "        ]\n",
-    "\n",
-    "        # Calculate distance difference Δd\n",
-    "        logging.info(\"Calculating distance difference (Δd)...\")\n",
-    "        delta_d = [\n",
-    "            haversine(last_lon_flat[i], last_lat_flat[i], pred_lon_flat[i], pred_lat_flat[i]) - \n",
-    "            haversine(last_lon_flat[i], last_lat_flat[i], true_lon_flat[i], true_lat_flat[i])\n",
-    "            for i in range(len(pred_lat_flat))\n",
-    "        ]\n",
-    "\n",
-    "        # Compute the score\n",
-    "        logging.info(\"Computing the abnormal behavior score...\")\n",
-    "        score = [alpha * abs(dd) + (1 - alpha) * dt for dd, dt in zip(delta_d, delta_theta)]\n",
-    "\n",
-    "        # Determine abnormal behavior\n",
-    "        logging.info(\"Determining abnormal behavior based on the score...\")\n",
-    "        abnormal_behavior = [1 if s >= threshold else 0 for s in score]  # 1: Abnormal, 0: Normal\n",
-    "\n",
-    "        # Create DataFrame for saving\n",
-    "        abnormal_behavior_df = pd.DataFrame({\n",
-    "            'MMSI': mmsi_seq,\n",
-    "            'Last Known Latitude': last_lat_flat,\n",
-    "            'Last Known Longitude': last_lon_flat,\n",
-    "            'Predicted Latitude': pred_lat_flat,\n",
-    "            'Predicted Longitude': pred_lon_flat,\n",
-    "            'Real Latitude': true_lat_flat,\n",
-    "            'Real Longitude': true_lon_flat,\n",
-    "            'Distance Difference (Δd) [km]': delta_d,\n",
-    "            'Angular Divergence (Δθ) [degrees]': delta_theta,\n",
-    "            'Score (αΔd + (1-α)Δθ)': score,\n",
-    "            'Abnormal Behavior (1=Abnormal, 0=Normal)': abnormal_behavior\n",
-    "        })\n",
-    "\n",
-    "        # Save abnormal behavior dataset as CSV\n",
-    "        with tempfile.NamedTemporaryFile(delete=False, suffix='.csv', mode='w', newline='') as tmp_abnormal_file:\n",
-    "            abnormal_behavior_df.to_csv(tmp_abnormal_file, index=False)\n",
-    "            abnormal_csv_path = tmp_abnormal_file.name\n",
-    "\n",
-    "        logging.info(\"Abnormal behavior detection completed.\")\n",
-    "        return abnormal_csv_path\n",
-    "    except Exception as e:\n",
-    "        logging.error(f\"An error occurred: {str(e)}\")\n",
-    "        return {\"error\": str(e)}\n",
-    "\n",
-    "# ============================\n",
-    "# Define Gradio Interface\n",
-    "# ============================\n",
-    "\n",
-    "def main():\n",
-    "    # ============================\n",
-    "    # Define Model and Scaler Paths\n",
-    "    # ============================\n",
-    "\n",
-    "    model_paths = {\n",
-    "        'teacher': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256/horizon_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_seq_24/run_1/best_model.pth',\n",
-    "        'student_north': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_North/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_North_seq_24/run_1/best_model.pth',\n",
-    "        'student_mid': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_Mid/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_Mid_seq_24/run_1/best_model.pth',\n",
-    "        'student_south': 'LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_South/horizon1_data_LSTM_whole_atlantic_horizon1_with_time_decimal_input_batch256_KD_South_seq_24/run_1/best_model.pth'\n",
-    "    }\n",
-    "\n",
-    "    scaler_paths = {\n",
-    "        'Teacher': 'scaler_train_wholedata.joblib',\n",
-    "        'Student_North': 'scaler_train_North.joblib',\n",
-    "        'Student_Mid': 'scaler_train_Mid.joblib',\n",
-    "        'Student_South': 'scaler_train_South.joblib'\n",
-    "    }\n",
-    "\n",
-    "    # ============================\n",
-    "    # Load Models and Scalers\n",
-    "    # ============================\n",
-    "\n",
-    "    logging.info(\"Loading models and scalers...\")\n",
-    "    models = load_models(model_paths)\n",
-    "    loaded_scalers = load_scalers(scaler_paths)\n",
-    "    logging.info(\"All models and scalers loaded successfully.\")\n",
-    "\n",
-    "    # Define the Gradio components for classical prediction tab\n",
-    "    classical_tab = gr.Interface(\n",
-    "        fn=lambda file, model_choice, min_mmsi, max_mmsi: classical_prediction(file, model_choice, min_mmsi, max_mmsi, models, loaded_scalers),\n",
-    "        inputs=[\n",
-    "            gr.File(label=\"Upload CSV File\"),\n",
-    "            gr.Dropdown(choices=[\"Auto-Select\", \"Teacher\", \"Student_North\", \"Student_Mid\", \"Student_South\"], value=\"Auto-Select\", label=\"Choose Model\"),\n",
-    "            gr.Number(label=\"Min MMSI\", value=0),\n",
-    "            gr.Number(label=\"Max MMSI\", value=999999999)\n",
-    "        ],\n",
-    "        outputs=[\n",
-    "            gr.JSON(label=\"Classical Metrics (Degrees)\"),\n",
-    "            gr.File(label=\"Download Predicted & Real Positions CSV\"),\n",
-    "            gr.Number(label=\"Inference Time (seconds)\")\n",
-    "        ],\n",
-    "        title=\"Classical Prediction & Metrics\",\n",
-    "        description=\"Upload a CSV file and select a model to get classical evaluation metrics such as MAE, MSE, RMSE. The inference time is also provided.\"\n",
-    "    )\n",
-    "\n",
-    "    # Define the Gradio components for abnormal behavior detection tab\n",
-    "    abnormal_tab = gr.Interface(\n",
-    "        fn=lambda prediction_file, alpha, threshold: abnormal_behavior_detection(prediction_file, alpha, threshold),\n",
-    "        inputs=[\n",
-    "            gr.File(label=\"Upload Predicted Positions CSV\"),\n",
-    "            gr.Slider(minimum=0, maximum=1, step=0.1, value=0.5, label=\"Alpha (α)\"),\n",
-    "            gr.Number(label=\"Threshold\", value=10.0)\n",
-    "        ],\n",
-    "        outputs=[\n",
-    "            gr.File(label=\"Download Abnormal Behavior CSV\")\n",
-    "        ],\n",
-    "        title=\"Abnormal Behavior Detection\",\n",
-    "        description=(\n",
-    "            \"Upload the CSV file containing real and predicted positions from the Classical Prediction tab. \"\n",
-    "            \"Adjust the Alpha and Threshold parameters to compute abnormal behavior.\"\n",
-    "        )\n",
-    "    )\n",
-    "\n",
-    "    # Combine the two tabs using Gradio Tabs component\n",
-    "    with gr.Blocks() as demo:\n",
-    "        gr.Markdown(\"# Vessel Trajectory Prediction and Abnormal Behavior Detection\")\n",
-    "        with gr.Tabs():\n",
-    "            with gr.TabItem(\"Classical Prediction\"):\n",
-    "                classical_tab.render()\n",
-    "            with gr.TabItem(\"Abnormal Behavior Detection\"):\n",
-    "                abnormal_tab.render()\n",
-    "\n",
-    "    # Launch the Gradio interface\n",
-    "    logging.info(\"Launching Gradio interface...\")\n",
-    "    demo.launch(share=True)\n",
-    "    logging.info(\"Gradio interface launched successfully.\")\n",
-    "\n",
-    "# Run the app\n",
-    "if __name__ == \"__main__\":\n",
-    "    main()\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.11.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}