{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# IESO Coincident Peak Prediction — Operational Design & Deployment Architecture\n",
    "\n",
    "This notebook demonstrates a live prediction workflow using the trained model\n",
    "with current IESO demand data and weather forecasts, and documents the\n",
    "deployment architecture for operational use.\n",
    "\n",
    "**Three-phase daily workflow:**\n",
    "1. Morning forecast (6–7 AM): weather-based demand prediction and risk classification\n",
    "2. Intraday monitoring (noon–8 PM): real-time demand trajectory tracking\n",
    "3. Post-day review: accuracy logging and threshold update"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.patches as mpatches\n",
    "import seaborn as sns\n",
    "import joblib\n",
    "import requests\n",
    "import warnings\n",
    "from pathlib import Path\n",
    "from datetime import datetime, timedelta, date\n",
    "\n",
    "warnings.filterwarnings('ignore')\n",
    "sns.set_theme(style='whitegrid', font_scale=1.1)\n",
    "\n",
    "PROJECT_ROOT = Path(r'C:/wamp64/www/Spec_Driven_Dev_Website')\n",
    "DATA_DIR = PROJECT_ROOT / 'notebooks' / 'source' / 'data'\n",
    "MODEL_DIR = PROJECT_ROOT / 'notebooks' / 'source' / 'models'\n",
    "\n",
    "print('Libraries loaded successfully')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load model and data\n",
    "model_artifact = joblib.load(MODEL_DIR / 'ieso_peak_model.joblib')\n",
    "model = model_artifact['model']\n",
    "FEATURE_COLS = model_artifact['feature_columns']\n",
    "\n",
    "features = pd.read_parquet(DATA_DIR / 'ieso_features_daily.parquet')\n",
    "features['Date'] = pd.to_datetime(features['Date'])\n",
    "hourly = pd.read_parquet(DATA_DIR / 'ieso_hourly_with_features.parquet')\n",
    "hourly['Date'] = pd.to_datetime(hourly['Date'])\n",
    "peaks = pd.read_parquet(DATA_DIR / 'ieso_peak_labels.parquet')\n",
    "peaks['date'] = pd.to_datetime(peaks['date'])\n",
    "\n",
    "print(f'Model loaded: test RMSE = {model_artifact[\"test_rmse\"]:.1f} MW')\n",
    "print(f'Features: {FEATURE_COLS}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Live Prediction Demo\n",
    "\n",
    "Fetch current weather forecast from Open-Meteo and most recent demand data\n",
    "from the local dataset, then run the trained model to generate today's\n",
    "risk assessment."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def fetch_weather_forecast(lat=43.65, lon=-79.38, days=7):\n",
    "    \"\"\"Fetch weather forecast from Open-Meteo.\"\"\"\n",
    "    url = 'https://api.open-meteo.com/v1/forecast'\n",
    "    params = {\n",
    "        'latitude': lat,\n",
    "        'longitude': lon,\n",
    "        'hourly': 'temperature_2m,relative_humidity_2m,dewpoint_2m',\n",
    "        'forecast_days': days,\n",
    "        'timezone': 'America/Toronto'\n",
    "    }\n",
    "    try:\n",
    "        response = requests.get(url, params=params, timeout=30)\n",
    "        response.raise_for_status()\n",
    "        data = response.json()\n",
    "        hourly_data = data['hourly']\n",
    "        df = pd.DataFrame({\n",
    "            'datetime': pd.to_datetime(hourly_data['time']),\n",
    "            'temperature_c': hourly_data['temperature_2m'],\n",
    "            'relative_humidity': hourly_data['relative_humidity_2m'],\n",
    "            'dewpoint_c': hourly_data['dewpoint_2m'],\n",
    "        })\n",
    "        df['date'] = df['datetime'].dt.date\n",
    "        return df\n",
    "    except Exception as e:\n",
    "        print(f'Weather forecast fetch failed: {e}')\n",
    "        return None\n",
    "\n",
    "def compute_humidex(temp_c, dewpoint_c):\n",
    "    e = 6.11 * np.exp(5417.7530 * (1.0/273.16 - 1.0/(273.15 + dewpoint_c)))\n",
    "    h = temp_c + (5.0/9.0) * (e - 10.0)\n",
    "    return np.where(temp_c >= 20, h, temp_c)\n",
    "\n",
    "# Fetch forecast\n",
    "print('Fetching 7-day weather forecast for Toronto...')\n",
    "forecast = fetch_weather_forecast()\n",
    "\n",
    "if forecast is not None:\n",
    "    print(f'Forecast retrieved: {len(forecast)} hourly records')\n",
    "    print(f'Date range: {forecast[\"date\"].min()} to {forecast[\"date\"].max()}')\n",
    "    \n",
    "    # Compute daily weather features from forecast\n",
    "    forecast['humidex'] = compute_humidex(forecast['temperature_c'].values, \n",
    "                                          forecast['dewpoint_c'].values)\n",
    "    forecast['cdh'] = np.maximum(0, forecast['temperature_c'] - 18.0)\n",
    "    \n",
    "    daily_forecast = forecast.groupby('date').agg({\n",
    "        'temperature_c': ['max', 'mean'],\n",
    "        'humidex': 'max',\n",
    "        'dewpoint_c': 'mean',\n",
    "        'relative_humidity': 'mean',\n",
    "        'cdh': 'sum',\n",
    "    }).reset_index()\n",
    "    daily_forecast.columns = ['date', 'daily_max_temp', 'daily_mean_temp',\n",
    "                               'daily_max_humidex', 'daily_mean_dewpoint',\n",
    "                               'daily_mean_rh', 'daily_cdh']\n",
    "    \n",
    "    print('\\n7-Day Forecast Summary:')\n",
    "    print(daily_forecast[['date', 'daily_max_temp', 'daily_max_humidex', \n",
    "                          'daily_cdh']].to_string(index=False))\n",
    "else:\n",
    "    print('Using most recent historical data as fallback')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def predict_daily_risk(forecast_row, recent_features, model, feature_cols):\n",
    "    \"\"\"Generate risk prediction for a single day.\"\"\"\n",
    "    # Build feature vector from forecast + recent demand history\n",
    "    feature_dict = {}\n",
    "    \n",
    "    # Weather features from forecast\n",
    "    feature_dict['daily_max_temp'] = forecast_row.get('daily_max_temp', 20)\n",
    "    feature_dict['daily_mean_temp'] = forecast_row.get('daily_mean_temp', 15)\n",
    "    feature_dict['daily_max_humidex'] = forecast_row.get('daily_max_humidex', 20)\n",
    "    feature_dict['daily_cdh'] = forecast_row.get('daily_cdh', 0)\n",
    "    feature_dict['daily_mean_rh'] = forecast_row.get('daily_mean_rh', 60)\n",
    "    feature_dict['daily_mean_dewpoint'] = forecast_row.get('daily_mean_dewpoint', 10)\n",
    "    \n",
    "    # Rolling averages (use recent history + forecast)\n",
    "    if recent_features is not None and len(recent_features) >= 3:\n",
    "        recent = recent_features.tail(3)\n",
    "        feature_dict['temp_3day_avg'] = recent['daily_max_temp'].mean()\n",
    "        feature_dict['cdh_3day_avg'] = recent['daily_cdh'].mean()\n",
    "    else:\n",
    "        feature_dict['temp_3day_avg'] = feature_dict['daily_max_temp']\n",
    "        feature_dict['cdh_3day_avg'] = feature_dict['daily_cdh']\n",
    "    \n",
    "    # Calendar features\n",
    "    pred_date = pd.Timestamp(forecast_row['date'])\n",
    "    feature_dict['month'] = pred_date.month\n",
    "    feature_dict['day_of_week'] = pred_date.dayofweek\n",
    "    feature_dict['is_business_day'] = 1 if pred_date.dayofweek < 5 else 0\n",
    "    feature_dict['day_of_year'] = pred_date.dayofyear\n",
    "    \n",
    "    # Demand momentum from recent history\n",
    "    if recent_features is not None and len(recent_features) > 0:\n",
    "        feature_dict['prev_day_max_demand'] = recent_features['daily_max_demand'].iloc[-1]\n",
    "        feature_dict['rolling_7d_max_demand'] = recent_features.tail(7)['daily_max_demand'].max()\n",
    "        feature_dict['rolling_7d_mean_demand'] = recent_features.tail(7)['daily_max_demand'].mean()\n",
    "    else:\n",
    "        feature_dict['prev_day_max_demand'] = 16000\n",
    "        feature_dict['rolling_7d_max_demand'] = 18000\n",
    "        feature_dict['rolling_7d_mean_demand'] = 16000\n",
    "    \n",
    "    # Peak context\n",
    "    if recent_features is not None and 'current_5th_peak' in recent_features.columns:\n",
    "        feature_dict['current_5th_peak'] = recent_features['current_5th_peak'].iloc[-1]\n",
    "        feature_dict['max_demand_so_far'] = recent_features['max_demand_so_far'].iloc[-1]\n",
    "    else:\n",
    "        feature_dict['current_5th_peak'] = 20000\n",
    "        feature_dict['max_demand_so_far'] = 20000\n",
    "    \n",
    "    # Create feature vector\n",
    "    X = pd.DataFrame([feature_dict])[feature_cols]\n",
    "    \n",
    "    # Predict\n",
    "    predicted_demand = model.predict(X)[0]\n",
    "    \n",
    "    # Classify risk\n",
    "    threshold = feature_dict['current_5th_peak']\n",
    "    buffer = 500\n",
    "    diff = predicted_demand - threshold\n",
    "    \n",
    "    if diff > buffer:\n",
    "        risk = 'RED'\n",
    "    elif abs(diff) <= buffer:\n",
    "        risk = 'YELLOW'\n",
    "    else:\n",
    "        risk = 'GREEN'\n",
    "    \n",
    "    # Predict peak window\n",
    "    max_temp = feature_dict['daily_max_temp']\n",
    "    if max_temp > 33:\n",
    "        window = 'HE 17-19 (4-7 PM)'\n",
    "    elif max_temp > 28:\n",
    "        window = 'HE 16-18 (3-6 PM)'\n",
    "    else:\n",
    "        window = 'HE 15-17 (2-5 PM)'\n",
    "    \n",
    "    return {\n",
    "        'date': forecast_row['date'],\n",
    "        'predicted_max_demand': predicted_demand,\n",
    "        'threshold': threshold,\n",
    "        'risk_level': risk,\n",
    "        'predicted_window': window,\n",
    "        'max_temp_forecast': max_temp,\n",
    "    }\n",
    "\n",
    "# Get recent features from the latest available data\n",
    "recent_features = features.tail(30).copy()\n",
    "\n",
    "# Generate predictions for 7-day outlook\n",
    "if forecast is not None:\n",
    "    outlook = []\n",
    "    for _, row in daily_forecast.iterrows():\n",
    "        pred = predict_daily_risk(row.to_dict(), recent_features, model, FEATURE_COLS)\n",
    "        outlook.append(pred)\n",
    "    \n",
    "    outlook_df = pd.DataFrame(outlook)\n",
    "    \n",
    "    # Color-coded display\n",
    "    print('\\n' + '='*70)\n",
    "    print('  7-DAY PEAK RISK OUTLOOK')\n",
    "    print('='*70)\n",
    "    for _, row in outlook_df.iterrows():\n",
    "        risk_icon = {'RED': '!!!', 'YELLOW': ' ! ', 'GREEN': '   '}[row['risk_level']]\n",
    "        print(f\"  {row['date']}  [{risk_icon}] {row['risk_level']:6s}  \"\n",
    "              f\"Pred: {row['predicted_max_demand']:,.0f} MW  \"\n",
    "              f\"Temp: {row['max_temp_forecast']:.0f}°C  \"\n",
    "              f\"Window: {row['predicted_window']}\")\n",
    "    print('='*70)\n",
    "else:\n",
    "    print('Forecast unavailable — using historical demo')\n",
    "    outlook_df = pd.DataFrame()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Intraday Tracking Visualization\n",
    "\n",
    "Show how accumulating real-time demand data through the day narrows\n",
    "uncertainty by comparing the current trajectory against historical\n",
    "peak-day demand envelopes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Build historical peak-day demand profiles\n",
    "top5 = peaks[peaks['rank'] <= 5].copy()\n",
    "peak_dates = top5['date'].dt.date.unique()\n",
    "\n",
    "# Get hourly profiles for all peak days\n",
    "peak_profiles = []\n",
    "for pd_date in peak_dates:\n",
    "    day_data = hourly[hourly['Date'].dt.date == pd_date]\n",
    "    if len(day_data) >= 20:  # Need sufficient hours\n",
    "        profile = day_data[['Hour', 'Ontario Demand']].copy()\n",
    "        peak_profiles.append(profile)\n",
    "\n",
    "if peak_profiles:\n",
    "    # Compute envelope (10th and 90th percentile at each hour)\n",
    "    all_profiles = pd.concat(peak_profiles)\n",
    "    envelope = all_profiles.groupby('Hour')['Ontario Demand'].agg(\n",
    "        p10=lambda x: np.percentile(x, 10),\n",
    "        p25=lambda x: np.percentile(x, 25),\n",
    "        p50='median',\n",
    "        p75=lambda x: np.percentile(x, 75),\n",
    "        p90=lambda x: np.percentile(x, 90),\n",
    "    ).reset_index()\n",
    "\n",
    "# Get a recent day's actual trajectory for comparison\n",
    "recent_day = hourly[hourly['Date'] == hourly['Date'].max()]\n",
    "\n",
    "# Also get a typical non-peak day for contrast\n",
    "non_peak_dates = hourly[\n",
    "    (~hourly['Date'].dt.date.isin(peak_dates)) & \n",
    "    (hourly['Date'].dt.month.isin([6, 7, 8]))\n",
    "]['Date'].unique()\n",
    "if len(non_peak_dates) > 0:\n",
    "    typical_day = hourly[hourly['Date'] == non_peak_dates[-10]]\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(14, 7))\n",
    "\n",
    "# Peak day envelope\n",
    "if peak_profiles:\n",
    "    ax.fill_between(envelope['Hour'], envelope['p10'], envelope['p90'],\n",
    "                    alpha=0.15, color='#d32f2f', label='Peak days (10th–90th pctl)')\n",
    "    ax.fill_between(envelope['Hour'], envelope['p25'], envelope['p75'],\n",
    "                    alpha=0.25, color='#d32f2f', label='Peak days (25th–75th pctl)')\n",
    "    ax.plot(envelope['Hour'], envelope['p50'], '--', color='#d32f2f', \n",
    "            linewidth=2, label='Peak day median')\n",
    "\n",
    "# Current day trajectory\n",
    "if len(recent_day) > 0:\n",
    "    ax.plot(recent_day['Hour'], recent_day['Ontario Demand'], 'o-', \n",
    "            color='#1565C0', linewidth=2, markersize=4,\n",
    "            label=f'Latest data ({recent_day[\"Date\"].iloc[0].strftime(\"%Y-%m-%d\")})')\n",
    "\n",
    "# Typical non-peak day\n",
    "if len(non_peak_dates) > 0 and len(typical_day) > 0:\n",
    "    ax.plot(typical_day['Hour'], typical_day['Ontario Demand'], '--', \n",
    "            color='#9E9E9E', linewidth=1.5, alpha=0.7,\n",
    "            label='Typical summer day (non-peak)')\n",
    "\n",
    "# Mark the peak window\n",
    "ax.axvspan(15, 19, alpha=0.08, color='orange')\n",
    "ax.text(17, ax.get_ylim()[1] * 0.98, 'Peak\\nWindow', \n",
    "        ha='center', va='top', fontsize=10, color='#FF9800', fontweight='bold')\n",
    "\n",
    "ax.set_xlabel('Hour Ending (EST)')\n",
    "ax.set_ylabel('Ontario Demand (MW)')\n",
    "ax.set_title('Intraday Demand Tracking — Current Day vs. Historical Peak Envelopes')\n",
    "ax.legend(loc='upper left', framealpha=0.9)\n",
    "ax.set_xlim(1, 24)\n",
    "ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{x:,.0f}'))\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig(DATA_DIR / 'intraday_tracking.png', dpi=150, bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7-Day Forecast Calendar"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "if len(outlook_df) > 0:\n",
    "    fig, ax = plt.subplots(figsize=(14, 3))\n",
    "    \n",
    "    risk_colors = {'RED': '#FFCDD2', 'YELLOW': '#FFF9C4', 'GREEN': '#C8E6C9'}\n",
    "    risk_text_colors = {'RED': '#B71C1C', 'YELLOW': '#F57F17', 'GREEN': '#2E7D32'}\n",
    "    \n",
    "    for i, (_, row) in enumerate(outlook_df.iterrows()):\n",
    "        color = risk_colors[row['risk_level']]\n",
    "        text_color = risk_text_colors[row['risk_level']]\n",
    "        \n",
    "        rect = plt.Rectangle((i, 0), 0.9, 1, facecolor=color, \n",
    "                              edgecolor='white', linewidth=2)\n",
    "        ax.add_patch(rect)\n",
    "        \n",
    "        # Date\n",
    "        ax.text(i + 0.45, 0.85, str(row['date']), ha='center', va='top',\n",
    "                fontsize=8, fontweight='bold')\n",
    "        # Risk level\n",
    "        ax.text(i + 0.45, 0.55, row['risk_level'], ha='center', va='center',\n",
    "                fontsize=14, fontweight='bold', color=text_color)\n",
    "        # Predicted demand\n",
    "        ax.text(i + 0.45, 0.3, f\"{row['predicted_max_demand']:,.0f} MW\", \n",
    "                ha='center', va='center', fontsize=8)\n",
    "        # Temperature\n",
    "        ax.text(i + 0.45, 0.12, f\"{row['max_temp_forecast']:.0f}°C\",\n",
    "                ha='center', va='center', fontsize=8, color='#555')\n",
    "    \n",
    "    ax.set_xlim(-0.2, len(outlook_df) + 0.2)\n",
    "    ax.set_ylim(-0.1, 1.1)\n",
    "    ax.axis('off')\n",
    "    ax.set_title('7-Day Peak Risk Outlook', fontsize=14, fontweight='bold', pad=20)\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.savefig(DATA_DIR / 'forecast_outlook.png', dpi=150, bbox_inches='tight')\n",
    "    plt.show()\n",
    "else:\n",
    "    print('No forecast data available for calendar display')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## API Design Specification\n",
    "\n",
    "REST API endpoints for an operational deployment of the prediction service.\n",
    "\n",
    "| Endpoint | Method | Description | Response |\n",
    "|----------|--------|-------------|----------|\n",
    "| `/predict/today` | GET | Morning forecast with risk level | `{date, predicted_max_mw, risk_level, window, confidence}` |\n",
    "| `/predict/realtime` | GET | Intraday updated probability using actual demand trajectory | `{date, hour, current_demand_mw, projected_max_mw, risk_level, trajectory_percentile}` |\n",
    "| `/predict/outlook` | GET | 7-day forecast outlook | `[{date, risk_level, predicted_max_mw, max_temp_c}]` |\n",
    "| `/status/peaks` | GET | Current top-5 peaks and displacement threshold | `{base_period, peaks: [{rank, date, hour, demand_mw}], threshold_mw}` |\n",
    "| `/status/model` | GET | Model metadata and performance | `{version, trained_on, test_rmse, last_retrained}` |\n",
    "| `/history/backtest` | GET | Walk-forward backtest results | `[{base_period, rmse, precision, recall, f1}]` |\n",
    "| `/history/alerts` | GET | Alert history for current base period | `[{date, alert_level, predicted_mw, actual_mw, was_peak}]` |"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Deployment Architecture\n",
    "\n",
    "```\n",
    "                    IESO Coincident Peak Prediction — System Architecture\n",
    "                    =====================================================\n",
    "\n",
    "    ┌─────────────────────┐     ┌──────────────────────┐\n",
    "    │   IESO Public APIs  │     │  Open-Meteo Weather  │\n",
    "    │                     │     │                      │\n",
    "    │  • Hourly Demand    │     │  • 7-Day Forecast    │\n",
    "    │  • ICI Demand       │     │  • Historical Archive│\n",
    "    │  • Peak Tracker     │     │                      │\n",
    "    └────────┬────────────┘     └──────────┬───────────┘\n",
    "             │                              │\n",
    "             ▼                              ▼\n",
    "    ┌─────────────────────────────────────────────────┐\n",
    "    │              Python Ingestion Layer             │\n",
    "    │                                                 │\n",
    "    │  • Fetch & validate data                        │\n",
    "    │  • Timestamp alignment (HE→datetime)            │\n",
    "    │  • Gap detection & handling                     │\n",
    "    │  • Cron: 6 AM daily + hourly noon–8 PM          │\n",
    "    └────────────────────┬────────────────────────────┘\n",
    "                         │\n",
    "                         ▼\n",
    "    ┌─────────────────────────────────────────────────┐\n",
    "    │               Feature Engine                    │\n",
    "    │                                                 │\n",
    "    │  • Weather → humidex, CDH, rolling averages     │\n",
    "    │  • Demand → lagged, rolling, momentum           │\n",
    "    │  • Peak context → threshold tracker             │\n",
    "    │  • Calendar → holidays, business day flags      │\n",
    "    └────────────────────┬────────────────────────────┘\n",
    "                         │\n",
    "                         ▼\n",
    "    ┌─────────────────────────────────────────────────┐\n",
    "    │           XGBoost Prediction Model              │\n",
    "    │                                                 │\n",
    "    │  • Daily max demand regression                  │\n",
    "    │  • RED / YELLOW / GREEN classification           │\n",
    "    │  • 3-hour peak window estimation                │\n",
    "    │  • Retrained annually (May 1)                   │\n",
    "    └────────────────────┬────────────────────────────┘\n",
    "                         │\n",
    "                         ▼\n",
    "    ┌─────────────────────────────────────────────────┐\n",
    "    │            Flask/FastAPI REST Service            │\n",
    "    │                                                 │\n",
    "    │  GET /predict/today    → morning forecast       │\n",
    "    │  GET /predict/realtime → intraday update        │\n",
    "    │  GET /predict/outlook  → 7-day outlook          │\n",
    "    │  GET /status/peaks     → current threshold      │\n",
    "    └────────────────────┬────────────────────────────┘\n",
    "                         │\n",
    "            ┌────────────┼────────────┐\n",
    "            ▼            ▼            ▼\n",
    "    ┌──────────┐  ┌──────────┐  ┌──────────┐\n",
    "    │  Email   │  │   SMS    │  │  Slack   │\n",
    "    │ (SMTP)   │  │ (Twilio) │  │ Webhook  │\n",
    "    └──────────┘  └──────────┘  └──────────┘\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Model Retraining Schedule\n",
    "\n",
    "The model should be retrained annually to incorporate the latest base period's data.\n",
    "\n",
    "| Step | Timing | Action |\n",
    "|------|--------|--------|\n",
    "| 1 | May 1 | Base period closes. IESO publishes final top-5 peaks. |\n",
    "| 2 | May 1–7 | Download completed base period data. Add to training set. |\n",
    "| 3 | May 7–14 | Retrain XGBoost on full historical dataset (2010–latest). |\n",
    "| 4 | May 14–21 | Validate on previous base period. Compare RMSE and recall to prior model. |\n",
    "| 5 | June 1 | Deploy new model for upcoming peak season. |\n",
    "| 6 | Jun–Sep | Monitor prediction accuracy. Flag drift if RMSE > 1.5× historical average. |\n",
    "| 7 | Ongoing | Track structural demand shifts (EV adoption, new industrial loads, BTM solar). |"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Monitoring & Drift Detection\n",
    "\n",
    "Key metrics to track during the peak season:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Simulate monitoring dashboard with historical data\n",
    "# Show prediction accuracy over the most recent summer\n",
    "bp2024 = features[(features['base_period'] == 2024) & \n",
    "                   features['month'].isin([6, 7, 8])].copy()\n",
    "\n",
    "if len(bp2024) > 0 and len(bp2024.dropna(subset=FEATURE_COLS)) > 0:\n",
    "    bp2024_clean = bp2024.dropna(subset=FEATURE_COLS)\n",
    "    bp2024_clean['predicted'] = model.predict(bp2024_clean[FEATURE_COLS])\n",
    "    bp2024_clean['error'] = bp2024_clean['predicted'] - bp2024_clean['daily_max_demand']\n",
    "    \n",
    "    fig, axes = plt.subplots(2, 2, figsize=(14, 10))\n",
    "    \n",
    "    # Actual vs Predicted scatter\n",
    "    axes[0, 0].scatter(bp2024_clean['daily_max_demand'], bp2024_clean['predicted'],\n",
    "                       alpha=0.5, s=20, color='#1565C0')\n",
    "    lims = [bp2024_clean[['daily_max_demand', 'predicted']].min().min(),\n",
    "            bp2024_clean[['daily_max_demand', 'predicted']].max().max()]\n",
    "    axes[0, 0].plot(lims, lims, '--', color='gray')\n",
    "    axes[0, 0].set_xlabel('Actual Max Demand (MW)')\n",
    "    axes[0, 0].set_ylabel('Predicted Max Demand (MW)')\n",
    "    axes[0, 0].set_title('Actual vs. Predicted')\n",
    "    \n",
    "    # Error distribution\n",
    "    axes[0, 1].hist(bp2024_clean['error'], bins=30, color='#4CAF50', alpha=0.7,\n",
    "                    edgecolor='white')\n",
    "    axes[0, 1].axvline(x=0, color='black', linestyle='--')\n",
    "    axes[0, 1].set_xlabel('Prediction Error (MW)')\n",
    "    axes[0, 1].set_ylabel('Count')\n",
    "    axes[0, 1].set_title(f'Error Distribution (RMSE={bp2024_clean[\"error\"].std():.0f} MW)')\n",
    "    \n",
    "    # Cumulative error over time\n",
    "    axes[1, 0].plot(bp2024_clean['Date'], bp2024_clean['error'].cumsum(),\n",
    "                    color='#FF9800', linewidth=1.5)\n",
    "    axes[1, 0].axhline(y=0, color='gray', linestyle='--')\n",
    "    axes[1, 0].set_xlabel('Date')\n",
    "    axes[1, 0].set_ylabel('Cumulative Error (MW)')\n",
    "    axes[1, 0].set_title('Cumulative Prediction Bias')\n",
    "    axes[1, 0].tick_params(axis='x', rotation=45)\n",
    "    \n",
    "    # Rolling RMSE (7-day window)\n",
    "    rolling_rmse = bp2024_clean['error'].rolling(7).apply(\n",
    "        lambda x: np.sqrt(np.mean(x**2))\n",
    "    )\n",
    "    axes[1, 1].plot(bp2024_clean['Date'], rolling_rmse, color='#d32f2f', linewidth=1.5)\n",
    "    overall_rmse = np.sqrt(np.mean(bp2024_clean['error']**2))\n",
    "    axes[1, 1].axhline(y=overall_rmse, color='gray', linestyle='--', \n",
    "                        label=f'Overall RMSE: {overall_rmse:.0f} MW')\n",
    "    axes[1, 1].axhline(y=overall_rmse * 1.5, color='#d32f2f', linestyle=':', \n",
    "                        label=f'Drift threshold: {overall_rmse*1.5:.0f} MW')\n",
    "    axes[1, 1].set_xlabel('Date')\n",
    "    axes[1, 1].set_ylabel('7-Day Rolling RMSE (MW)')\n",
    "    axes[1, 1].set_title('Model Drift Monitoring')\n",
    "    axes[1, 1].legend(fontsize=9)\n",
    "    axes[1, 1].tick_params(axis='x', rotation=45)\n",
    "    \n",
    "    plt.suptitle('Model Performance Dashboard — Summer 2024', fontsize=14, y=1.02)\n",
    "    plt.tight_layout()\n",
    "    plt.savefig(DATA_DIR / 'monitoring_dashboard.png', dpi=150, bbox_inches='tight')\n",
    "    plt.show()\n",
    "else:\n",
    "    print('Insufficient 2024 summer data for monitoring dashboard')\n",
    "\n",
    "print('\\n=== Notebook 5 complete ===')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}