{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# IESO Coincident Peak Prediction — Backtesting & Financial Valuation\n",
    "\n",
    "This notebook simulates operational deployment of the peak prediction model across\n",
    "multiple historical base periods and quantifies the financial value for a\n",
    "hypothetical 10 MW Class A customer.\n",
    "\n",
    "**Key questions:**\n",
    "- How accurately would the model have predicted peak days?\n",
    "- How often would actual peaks fall within the predicted 3-hour window?\n",
    "- What is the net financial value after accounting for false alarm costs?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.patches as mpatches\n",
    "import seaborn as sns\n",
    "import joblib\n",
    "import warnings\n",
    "from pathlib import Path\n",
    "from sklearn.metrics import (confusion_matrix, precision_score, recall_score,\n",
    "                             f1_score, mean_squared_error, r2_score)\n",
    "import xgboost as xgb\n",
    "\n",
    "warnings.filterwarnings('ignore')\n",
    "sns.set_theme(style='whitegrid', font_scale=1.1)\n",
    "\n",
    "PROJECT_ROOT = Path(r'C:/wamp64/www/Spec_Driven_Dev_Website')\n",
    "DATA_DIR = PROJECT_ROOT / 'notebooks' / 'source' / 'data'\n",
    "MODEL_DIR = PROJECT_ROOT / 'notebooks' / 'source' / 'models'\n",
    "\n",
    "print('Libraries loaded successfully')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load data and model\n",
    "features = pd.read_parquet(DATA_DIR / 'ieso_features_daily.parquet')\n",
    "features['Date'] = pd.to_datetime(features['Date'])\n",
    "peaks = pd.read_parquet(DATA_DIR / 'ieso_peak_labels.parquet')\n",
    "peaks['date'] = pd.to_datetime(peaks['date'])\n",
    "\n",
    "model_artifact = joblib.load(MODEL_DIR / 'ieso_peak_model.joblib')\n",
    "FEATURE_COLS = model_artifact['feature_columns']\n",
    "\n",
    "print(f'Features: {len(features)} days')\n",
    "print(f'Model test RMSE: {model_artifact[\"test_rmse\"]:.1f} MW')\n",
    "print(f'Model test R²: {model_artifact[\"test_r2\"]:.4f}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Operational Simulation (2024 Base Period)\n",
    "\n",
    "For each day in the test period, restrict information to what would be\n",
    "available at 7 AM and generate RED/YELLOW/GREEN alerts."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_alerts(model, features_df, feature_cols, buffer_mw=500):\n",
    "    \"\"\"Simulate operational prediction for a set of days.\"\"\"\n",
    "    df = features_df.copy()\n",
    "    X = df[feature_cols].copy()\n",
    "    \n",
    "    # Handle missing features\n",
    "    complete_mask = X.notna().all(axis=1)\n",
    "    df = df[complete_mask].copy()\n",
    "    X = X[complete_mask].copy()\n",
    "    \n",
    "    # Predict\n",
    "    y_pred = model.predict(X)\n",
    "    df['predicted_max_demand'] = y_pred\n",
    "    \n",
    "    # Generate alerts\n",
    "    alerts = []\n",
    "    for idx, row in df.iterrows():\n",
    "        threshold = row['current_5th_peak'] if row['current_5th_peak'] > 0 else 20000\n",
    "        diff = row['predicted_max_demand'] - threshold\n",
    "        if diff > buffer_mw:\n",
    "            alerts.append('RED')\n",
    "        elif abs(diff) <= buffer_mw:\n",
    "            alerts.append('YELLOW')\n",
    "        else:\n",
    "            alerts.append('GREEN')\n",
    "    \n",
    "    df['alert'] = alerts\n",
    "    df['is_alert'] = df['alert'].isin(['RED', 'YELLOW']).astype(int)\n",
    "    \n",
    "    return df\n",
    "\n",
    "# Run simulation on 2024 base period\n",
    "test_data = features[features['base_period'] == 2024].copy()\n",
    "test_results = generate_alerts(model_artifact['model'], test_data, FEATURE_COLS)\n",
    "\n",
    "print(f'Test period: {test_results[\"Date\"].min()} to {test_results[\"Date\"].max()}')\n",
    "print(f'Total days: {len(test_results)}')\n",
    "print(f'\\nAlert distribution:')\n",
    "print(test_results['alert'].value_counts().to_string())\n",
    "print(f'\\nActual peak days: {test_results[\"is_peak_day\"].sum()}')\n",
    "print(f'Alert days: {test_results[\"is_alert\"].sum()}')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Confusion Matrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Confusion matrix for peak day detection\n",
    "cm = confusion_matrix(test_results['is_peak_day'], test_results['is_alert'])\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(7, 6))\n",
    "sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax,\n",
    "            xticklabels=['No Alert', 'Alert (RED/YELLOW)'],\n",
    "            yticklabels=['Non-Peak Day', 'Peak Day'],\n",
    "            annot_kws={'fontsize': 16})\n",
    "ax.set_xlabel('Predicted', fontsize=12)\n",
    "ax.set_ylabel('Actual', fontsize=12)\n",
    "ax.set_title('Peak Day Detection — Confusion Matrix (2024 Base Period)', fontsize=13)\n",
    "\n",
    "# Add metrics text\n",
    "prec = precision_score(test_results['is_peak_day'], test_results['is_alert'], zero_division=0)\n",
    "rec = recall_score(test_results['is_peak_day'], test_results['is_alert'], zero_division=0)\n",
    "f1 = f1_score(test_results['is_peak_day'], test_results['is_alert'], zero_division=0)\n",
    "ax.text(0.02, -0.15, f'Precision: {prec:.1%}  |  Recall: {rec:.1%}  |  F1: {f1:.3f}',\n",
    "        transform=ax.transAxes, fontsize=11, fontweight='bold',\n",
    "        bbox=dict(boxstyle='round', facecolor='lightyellow', alpha=0.8))\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig(DATA_DIR / 'confusion_matrix_2024.png', dpi=150, bbox_inches='tight')\n",
    "plt.show()\n",
    "\n",
    "print(f'Precision: {prec:.1%} — {prec*100:.0f}% of alerts were actual peak days')\n",
    "print(f'Recall: {rec:.1%} — {rec*100:.0f}% of actual peak days received alerts')\n",
    "print(f'False alarms: {cm[0, 1]} days')\n",
    "print(f'Missed peaks: {cm[1, 0]} days')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Alert Calendar Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calendar heatmap of alerts with actual peaks marked\n",
    "# Focus on summer months (June-August) when peaks concentrate\n",
    "summer = test_results[\n",
    "    (test_results['Date'].dt.month >= 5) & \n",
    "    (test_results['Date'].dt.month <= 9)\n",
    "].copy()\n",
    "\n",
    "alert_colors = {'GREEN': 0, 'YELLOW': 1, 'RED': 2}\n",
    "summer['alert_code'] = summer['alert'].map(alert_colors)\n",
    "summer['week'] = summer['Date'].dt.isocalendar().week.astype(int)\n",
    "summer['dow'] = summer['Date'].dt.dayofweek\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(16, 6))\n",
    "\n",
    "# Plot each day as a colored square\n",
    "from matplotlib.colors import ListedColormap\n",
    "cmap = ListedColormap(['#C8E6C9', '#FFF9C4', '#FFCDD2'])  # green, yellow, red\n",
    "\n",
    "weeks = sorted(summer['week'].unique())\n",
    "for _, row in summer.iterrows():\n",
    "    week_idx = weeks.index(row['week'])\n",
    "    color = ['#C8E6C9', '#FFF9C4', '#FFCDD2'][row['alert_code']]\n",
    "    rect = plt.Rectangle((week_idx, row['dow']), 0.9, 0.9, \n",
    "                          facecolor=color, edgecolor='white', linewidth=1)\n",
    "    ax.add_patch(rect)\n",
    "    \n",
    "    # Mark actual peak days with a star\n",
    "    if row['is_peak_day'] == 1:\n",
    "        ax.plot(week_idx + 0.45, row['dow'] + 0.45, '*', \n",
    "                color='black', markersize=15, zorder=5)\n",
    "    \n",
    "    # Add date label\n",
    "    ax.text(week_idx + 0.45, row['dow'] + 0.45, row['Date'].strftime('%d'),\n",
    "            ha='center', va='center', fontsize=6, color='#555')\n",
    "\n",
    "ax.set_xlim(-0.5, len(weeks) + 0.5)\n",
    "ax.set_ylim(-0.5, 7)\n",
    "ax.set_yticks(np.arange(7) + 0.45)\n",
    "ax.set_yticklabels(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'])\n",
    "ax.set_xlabel('Week')\n",
    "ax.set_title('Alert Calendar — 2024 Base Period (May–Sep)\\n'\n",
    "             'Stars mark actual top-5 peak days', fontsize=13)\n",
    "ax.invert_yaxis()\n",
    "\n",
    "# Legend\n",
    "legend_patches = [\n",
    "    mpatches.Patch(facecolor='#C8E6C9', edgecolor='gray', label='GREEN'),\n",
    "    mpatches.Patch(facecolor='#FFF9C4', edgecolor='gray', label='YELLOW'),\n",
    "    mpatches.Patch(facecolor='#FFCDD2', edgecolor='gray', label='RED'),\n",
    "    plt.Line2D([0], [0], marker='*', color='black', linestyle='None', \n",
    "               markersize=12, label='Actual Peak'),\n",
    "]\n",
    "ax.legend(handles=legend_patches, loc='upper right', framealpha=0.9)\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.savefig(DATA_DIR / 'alert_calendar_2024.png', dpi=150, bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3-Hour Window Evaluation\n",
    "\n",
    "For correctly predicted peak days, determine how often the actual peak hour\n",
    "fell within the predicted 3-hour window."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load hourly data for window evaluation\n",
    "hourly = pd.read_parquet(DATA_DIR / 'ieso_hourly_with_features.parquet')\n",
    "hourly['Date'] = pd.to_datetime(hourly['Date'])\n",
    "\n",
    "# Get the actual peak hours from ground truth\n",
    "top5 = peaks[peaks['rank'] <= 5].copy()\n",
    "bp2024_peaks = top5[top5['base_period'] == 2024]\n",
    "\n",
    "# For each predicted peak day, identify the predicted 3-hour window\n",
    "# based on historical peak hour distribution and temperature profile\n",
    "def predict_peak_window(date, hourly_data):\n",
    "    \"\"\"Predict the most likely 3-hour peak window based on temperature profile.\n",
    "    \n",
    "    Hotter afternoons push peaks later (sustained AC load into evening).\n",
    "    Base window: HE 16-18 (3-6 PM). Shift later for high temps.\n",
    "    \"\"\"\n",
    "    day_data = hourly_data[hourly_data['Date'] == date]\n",
    "    if len(day_data) == 0:\n",
    "        return [16, 17, 18]  # Default window\n",
    "    \n",
    "    max_temp = day_data['temperature_c'].max()\n",
    "    \n",
    "    if max_temp > 33:\n",
    "        return [17, 18, 19]  # Very hot: peaks later\n",
    "    elif max_temp > 28:\n",
    "        return [16, 17, 18]  # Hot: standard window\n",
    "    else:\n",
    "        return [15, 16, 17]  # Moderate: peaks earlier\n",
    "\n",
    "# Evaluate window accuracy on correctly predicted peak days\n",
    "correctly_alerted = test_results[\n",
    "    (test_results['is_peak_day'] == 1) & (test_results['is_alert'] == 1)\n",
    "]\n",
    "\n",
    "window_results = []\n",
    "for _, row in correctly_alerted.iterrows():\n",
    "    predicted_window = predict_peak_window(row['Date'], hourly)\n",
    "    actual_peak = bp2024_peaks[bp2024_peaks['date'].dt.date == row['Date'].date()]\n",
    "    if len(actual_peak) > 0:\n",
    "        actual_hour = actual_peak['hour_ending'].values[0]\n",
    "        in_window = actual_hour in predicted_window\n",
    "        window_results.append({\n",
    "            'date': row['Date'],\n",
    "            'actual_hour': actual_hour,\n",
    "            'predicted_window': f'HE {predicted_window[0]}-{predicted_window[-1]}',\n",
    "            'in_window': in_window\n",
    "        })\n",
    "\n",
    "if window_results:\n",
    "    window_df = pd.DataFrame(window_results)\n",
    "    window_accuracy = window_df['in_window'].mean()\n",
    "    print(f'3-Hour Window Accuracy: {window_accuracy:.1%}')\n",
    "    print(f'\\nWindow evaluation details:')\n",
    "    print(window_df.to_string(index=False))\n",
    "else:\n",
    "    print('No correctly alerted peak days to evaluate window accuracy.')\n",
    "    window_accuracy = 0.0"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Walk-Forward Backtest (2019–2024)\n",
    "\n",
    "Retrain model using only data available prior to each base period,\n",
    "then predict that period's peaks. This simulates true operational conditions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def walk_forward_backtest(features_df, feature_cols, test_periods):\n",
    "    \"\"\"Walk-forward validation: retrain before each test period.\"\"\"\n",
    "    results = []\n",
    "    \n",
    "    for bp in test_periods:\n",
    "        # Train on all base periods before this one\n",
    "        train_mask = (features_df['base_period'] < bp) & \\\n",
    "                     features_df[feature_cols + ['daily_max_demand']].notna().all(axis=1)\n",
    "        test_mask = (features_df['base_period'] == bp) & \\\n",
    "                    features_df[feature_cols + ['daily_max_demand']].notna().all(axis=1)\n",
    "        \n",
    "        train_data = features_df[train_mask]\n",
    "        test_data = features_df[test_mask]\n",
    "        \n",
    "        if len(train_data) < 100 or len(test_data) < 30:\n",
    "            continue\n",
    "        \n",
    "        X_tr = train_data[feature_cols]\n",
    "        y_tr = train_data['daily_max_demand']\n",
    "        X_te = test_data[feature_cols]\n",
    "        y_te = test_data['daily_max_demand']\n",
    "        \n",
    "        # Train XGBoost\n",
    "        model = xgb.XGBRegressor(\n",
    "            n_estimators=300, max_depth=6, learning_rate=0.05,\n",
    "            subsample=0.8, colsample_bytree=0.8, min_child_weight=5,\n",
    "            random_state=42, verbosity=0\n",
    "        )\n",
    "        model.fit(X_tr, y_tr)\n",
    "        \n",
    "        # Generate alerts\n",
    "        test_with_alerts = generate_alerts(model, test_data, feature_cols)\n",
    "        \n",
    "        # Metrics\n",
    "        y_pred = model.predict(X_te)\n",
    "        rmse = np.sqrt(mean_squared_error(y_te, y_pred))\n",
    "        prec = precision_score(test_with_alerts['is_peak_day'], \n",
    "                              test_with_alerts['is_alert'], zero_division=0)\n",
    "        rec = recall_score(test_with_alerts['is_peak_day'], \n",
    "                          test_with_alerts['is_alert'], zero_division=0)\n",
    "        \n",
    "        results.append({\n",
    "            'base_period': bp,\n",
    "            'train_years': f'2010-{bp-1}',\n",
    "            'train_days': len(train_data),\n",
    "            'test_days': len(test_data),\n",
    "            'peak_days': test_with_alerts['is_peak_day'].sum(),\n",
    "            'alert_days': test_with_alerts['is_alert'].sum(),\n",
    "            'rmse_mw': rmse,\n",
    "            'precision': prec,\n",
    "            'recall': rec,\n",
    "            'f1': f1_score(test_with_alerts['is_peak_day'], \n",
    "                          test_with_alerts['is_alert'], zero_division=0),\n",
    "        })\n",
    "        print(f'  BP {bp}: RMSE={rmse:.0f} MW, Precision={prec:.1%}, '\n",
    "              f'Recall={rec:.1%}, Alerts={test_with_alerts[\"is_alert\"].sum()}')\n",
    "    \n",
    "    return pd.DataFrame(results)\n",
    "\n",
    "print('Walk-forward backtest (2019-2024)...')\n",
    "backtest_results = walk_forward_backtest(\n",
    "    features, FEATURE_COLS, \n",
    "    test_periods=[2019, 2020, 2021, 2022, 2023, 2024]\n",
    ")\n",
    "\n",
    "print(f'\\n=== Walk-Forward Results ===')\n",
    "print(backtest_results.round(3).to_string(index=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Walk-forward performance chart\n",
    "fig, axes = plt.subplots(1, 3, figsize=(16, 5))\n",
    "\n",
    "# RMSE over time\n",
    "axes[0].bar(backtest_results['base_period'].astype(str), \n",
    "            backtest_results['rmse_mw'], color='#1565C0', alpha=0.8)\n",
    "axes[0].set_xlabel('Base Period')\n",
    "axes[0].set_ylabel('RMSE (MW)')\n",
    "axes[0].set_title('Prediction Error')\n",
    "axes[0].tick_params(axis='x', rotation=45)\n",
    "\n",
    "# Precision and Recall over time\n",
    "x = np.arange(len(backtest_results))\n",
    "axes[1].bar(x - 0.2, backtest_results['precision'], 0.4, \n",
    "            label='Precision', color='#4CAF50', alpha=0.8)\n",
    "axes[1].bar(x + 0.2, backtest_results['recall'], 0.4, \n",
    "            label='Recall', color='#FF9800', alpha=0.8)\n",
    "axes[1].set_xticks(x)\n",
    "axes[1].set_xticklabels(backtest_results['base_period'].astype(str), rotation=45)\n",
    "axes[1].set_xlabel('Base Period')\n",
    "axes[1].set_ylabel('Score')\n",
    "axes[1].set_title('Peak Detection Performance')\n",
    "axes[1].legend()\n",
    "axes[1].set_ylim(0, 1.1)\n",
    "\n",
    "# Alert days vs peak days\n",
    "axes[2].bar(x - 0.2, backtest_results['alert_days'], 0.4, \n",
    "            label='Alert Days', color='#d32f2f', alpha=0.8)\n",
    "axes[2].bar(x + 0.2, backtest_results['peak_days'], 0.4, \n",
    "            label='Actual Peak Days', color='#1565C0', alpha=0.8)\n",
    "axes[2].set_xticks(x)\n",
    "axes[2].set_xticklabels(backtest_results['base_period'].astype(str), rotation=45)\n",
    "axes[2].set_xlabel('Base Period')\n",
    "axes[2].set_ylabel('Days')\n",
    "axes[2].set_title('Alert Volume')\n",
    "axes[2].legend()\n",
    "\n",
    "plt.suptitle('Walk-Forward Backtest Performance (2019–2024)', fontsize=14, y=1.02)\n",
    "plt.tight_layout()\n",
    "plt.savefig(DATA_DIR / 'walkforward_performance.png', dpi=150, bbox_inches='tight')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Financial Valuation\n",
    "\n",
    "Quantify the dollar value of the prediction model for a hypothetical\n",
    "10 MW Class A customer."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Financial parameters\n",
    "CUSTOMER_BASELINE_MW = 10.0    # Normal operating demand\n",
    "CURTAILMENT_MW = 2.0           # Demand during curtailment\n",
    "GA_RATE_PER_MWH = 40.0         # Average GA rate ($/MWh)\n",
    "MONTHLY_GA_TOTAL = 1.5e9 / 12  # ~$125M/month total system GA\n",
    "SYSTEM_PEAK_MW = 22000.0       # Approximate system-wide peak demand\n",
    "CURTAILMENT_COST_PER_EVENT = 15000  # Lost production per curtailment day ($)\n",
    "CURTAILMENT_HOURS = 3          # Hours of curtailment per alert day\n",
    "\n",
    "def compute_pdf(customer_mw, system_mw, n_peaks=5):\n",
    "    \"\"\"Compute Peak Demand Factor.\n",
    "    PDF = sum(customer MWh during peaks) / sum(system MWh during peaks)\n",
    "    \"\"\"\n",
    "    customer_total = customer_mw * n_peaks  # MWh (1 hour each)\n",
    "    system_total = system_mw * n_peaks\n",
    "    return customer_total / system_total\n",
    "\n",
    "def annual_ga_cost(pdf, monthly_ga=MONTHLY_GA_TOTAL):\n",
    "    \"\"\"Annual GA cost = PDF * monthly GA * 12.\"\"\"\n",
    "    return pdf * monthly_ga * 12\n",
    "\n",
    "# Scenario A: No curtailment\n",
    "pdf_no_curtail = compute_pdf(CUSTOMER_BASELINE_MW, SYSTEM_PEAK_MW)\n",
    "cost_no_curtail = annual_ga_cost(pdf_no_curtail)\n",
    "\n",
    "# Scenario C: Perfect foresight (curtail during only actual 5 peaks)\n",
    "pdf_perfect = compute_pdf(CURTAILMENT_MW, SYSTEM_PEAK_MW)\n",
    "cost_perfect = annual_ga_cost(pdf_perfect)\n",
    "savings_perfect = cost_no_curtail - cost_perfect\n",
    "curtailment_cost_perfect = 5 * CURTAILMENT_COST_PER_EVENT\n",
    "net_value_perfect = savings_perfect - curtailment_cost_perfect\n",
    "\n",
    "print('=== Financial Parameters ===')\n",
    "print(f'Customer baseline demand: {CUSTOMER_BASELINE_MW} MW')\n",
    "print(f'Curtailment level: {CURTAILMENT_MW} MW')\n",
    "print(f'PDF (no curtailment): {pdf_no_curtail:.6f}')\n",
    "print(f'PDF (perfect curtailment): {pdf_perfect:.6f}')\n",
    "print(f'\\nAnnual GA cost (no curtailment): ${cost_no_curtail:,.0f}')\n",
    "print(f'Annual GA cost (perfect foresight): ${cost_perfect:,.0f}')\n",
    "print(f'Maximum possible savings: ${savings_perfect:,.0f}')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Compute financial outcomes for each scenario across backtest periods\n",
    "def evaluate_financial(test_results_df, customer_mw=CUSTOMER_BASELINE_MW,\n",
    "                      curtail_mw=CURTAILMENT_MW, system_mw=SYSTEM_PEAK_MW,\n",
    "                      curtail_cost=CURTAILMENT_COST_PER_EVENT):\n",
    "    \"\"\"Evaluate financial impact of the model's alerts.\"\"\"\n",
    "    df = test_results_df.copy()\n",
    "    \n",
    "    # Count outcomes\n",
    "    true_positives = ((df['is_peak_day'] == 1) & (df['is_alert'] == 1)).sum()\n",
    "    false_positives = ((df['is_peak_day'] == 0) & (df['is_alert'] == 1)).sum()\n",
    "    false_negatives = ((df['is_peak_day'] == 1) & (df['is_alert'] == 0)).sum()\n",
    "    total_peak_days = df['is_peak_day'].sum()\n",
    "    total_alert_days = df['is_alert'].sum()\n",
    "    \n",
    "    # Customer's effective demand during peaks:\n",
    "    # - If alerted AND it was a peak: customer at curtailment level\n",
    "    # - If NOT alerted AND it was a peak: customer at baseline\n",
    "    peaks_curtailed = true_positives\n",
    "    peaks_missed = false_negatives\n",
    "    \n",
    "    # Effective customer MWh during peaks\n",
    "    customer_peak_mwh = (peaks_curtailed * curtail_mw + \n",
    "                         peaks_missed * customer_mw)\n",
    "    system_peak_mwh = total_peak_days * system_mw\n",
    "    \n",
    "    pdf = customer_peak_mwh / system_peak_mwh if system_peak_mwh > 0 else 0\n",
    "    ga_cost = annual_ga_cost(pdf)\n",
    "    ga_savings = cost_no_curtail - ga_cost\n",
    "    total_curtail_cost = total_alert_days * curtail_cost\n",
    "    net_value = ga_savings - total_curtail_cost\n",
    "    \n",
    "    return {\n",
    "        'peaks_curtailed': peaks_curtailed,\n",
    "        'peaks_missed': peaks_missed,\n",
    "        'false_alarms': false_positives,\n",
    "        'total_alert_days': total_alert_days,\n",
    "        'pdf': pdf,\n",
    "        'ga_cost': ga_cost,\n",
    "        'ga_savings': ga_savings,\n",
    "        'curtailment_cost': total_curtail_cost,\n",
    "        'net_value': net_value,\n",
    "    }\n",
    "\n",
    "# Scenario B: Model-guided curtailment\n",
    "model_financial = evaluate_financial(test_results)\n",
    "\n",
    "# Scenario D: Naive heuristic\n",
    "test_naive = test_results.copy()\n",
    "test_naive['is_alert'] = (\n",
    "    (test_naive['daily_max_temp'] > 30) & \n",
    "    (test_naive['is_business_day'] == 1) & \n",
    "    (test_naive['month'].isin([6, 7, 8]))\n",
    ").astype(int)\n",
    "naive_financial = evaluate_financial(test_naive)\n",
    "\n",
    "# Summary table\n",
    "scenarios = pd.DataFrame([\n",
    "    {\n",
    "        'Scenario': 'A: No Curtailment',\n",
    "        'Alert Days': 0,\n",
    "        'Peaks Caught': 0,\n",
    "        'Peaks Missed': 5,\n",
    "        'False Alarms': 0,\n",
    "        'GA Cost': cost_no_curtail,\n",
    "        'GA Savings': 0,\n",
    "        'Curtailment Cost': 0,\n",
    "        'Net Value': 0,\n",
    "    },\n",
    "    {\n",
    "        'Scenario': 'B: Model-Guided',\n",
    "        'Alert Days': model_financial['total_alert_days'],\n",
    "        'Peaks Caught': model_financial['peaks_curtailed'],\n",
    "        'Peaks Missed': model_financial['peaks_missed'],\n",
    "        'False Alarms': model_financial['false_alarms'],\n",
    "        'GA Cost': model_financial['ga_cost'],\n",
    "        'GA Savings': model_financial['ga_savings'],\n",
    "        'Curtailment Cost': model_financial['curtailment_cost'],\n",
    "        'Net Value': model_financial['net_value'],\n",
    "    },\n",
    "    {\n",
    "        'Scenario': 'C: Perfect Foresight',\n",
    "        'Alert Days': 5,\n",
    "        'Peaks Caught': 5,\n",
    "        'Peaks Missed': 0,\n",
    "        'False Alarms': 0,\n",
    "        'GA Cost': cost_perfect,\n",
    "        'GA Savings': savings_perfect,\n",
    "        'Curtailment Cost': curtailment_cost_perfect,\n",
    "        'Net Value': net_value_perfect,\n",
    "    },\n",
    "    {\n",
    "        'Scenario': 'D: Naive Heuristic',\n",
    "        'Alert Days': naive_financial['total_alert_days'],\n",
    "        'Peaks Caught': naive_financial['peaks_curtailed'],\n",
    "        'Peaks Missed': naive_financial['peaks_missed'],\n",
    "        'False Alarms': naive_financial['false_alarms'],\n",
    "        'GA Cost': naive_financial['ga_cost'],\n",
    "        'GA Savings': naive_financial['ga_savings'],\n",
    "        'Curtailment Cost': naive_financial['curtailment_cost'],\n",
    "        'Net Value': naive_financial['net_value'],\n",
    "    },\n",
    "])\n",
    "\n",
    "# Format for display\n",
    "print('=== Financial Valuation — 10 MW Class A Customer (2024 Base Period) ===')\n",
    "display_df = scenarios.copy()\n",
    "for col in ['GA Cost', 'GA Savings', 'Curtailment Cost', 'Net Value']:\n",
    "    display_df[col] = display_df[col].apply(lambda x: f'${x:,.0f}')\n",
    "print(display_df.to_string(index=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Financial comparison bar chart\n",
    "fig, axes = plt.subplots(1, 2, figsize=(14, 6))\n",
    "\n",
    "# Net value comparison\n",
    "scenario_names = scenarios['Scenario'].values\n",
    "net_values = scenarios['Net Value'].values\n",
    "colors = ['#9E9E9E', '#4CAF50', '#1565C0', '#FF9800']\n",
    "axes[0].bar(range(4), net_values, color=colors)\n",
    "axes[0].set_xticks(range(4))\n",
    "axes[0].set_xticklabels(['No Curtail', 'Model', 'Perfect', 'Naive'], rotation=0)\n",
    "axes[0].set_ylabel('Net Value ($)')\n",
    "axes[0].set_title('Annual Net Value by Strategy')\n",
    "axes[0].yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'${x:,.0f}'))\n",
    "axes[0].axhline(y=0, color='black', linestyle='-', linewidth=0.5)\n",
    "\n",
    "# Alert days breakdown\n",
    "x_pos = range(4)\n",
    "axes[1].bar(x_pos, scenarios['Alert Days'].values, color=colors, alpha=0.7)\n",
    "axes[1].set_xticks(range(4))\n",
    "axes[1].set_xticklabels(['No Curtail', 'Model', 'Perfect', 'Naive'], rotation=0)\n",
    "axes[1].set_ylabel('Days')\n",
    "axes[1].set_title('Curtailment Days Required')\n",
    "\n",
    "# Annotate with peaks caught\n",
    "for i, (_, row) in enumerate(scenarios.iterrows()):\n",
    "    axes[1].text(i, row['Alert Days'] + 0.5, \n",
    "                f\"{row['Peaks Caught']}/5\", ha='center', fontsize=10, fontweight='bold')\n",
    "\n",
    "plt.suptitle('Financial Valuation — Model vs. Alternative Strategies', fontsize=14, y=1.02)\n",
    "plt.tight_layout()\n",
    "plt.savefig(DATA_DIR / 'financial_valuation.png', dpi=150, bbox_inches='tight')\n",
    "plt.show()\n",
    "\n",
    "# Value proposition summary\n",
    "model_vs_none = model_financial['net_value']\n",
    "model_vs_naive = model_financial['net_value'] - naive_financial['net_value']\n",
    "print(f'\\n=== Value Proposition ===')\n",
    "print(f'Model net value vs. no curtailment: ${model_vs_none:,.0f}')\n",
    "print(f'Model net value vs. naive heuristic: ${model_vs_naive:,.0f}')\n",
    "print(f'False alarm days per year: {model_financial[\"false_alarms\"]}')\n",
    "print(f'Peaks caught: {model_financial[\"peaks_curtailed\"]}/5')\n",
    "\n",
    "# Save results\n",
    "scenarios.to_csv(DATA_DIR / 'financial_valuation.csv', index=False)\n",
    "backtest_results.to_csv(DATA_DIR / 'walkforward_results.csv', index=False)\n",
    "\n",
    "print('\\n=== Notebook 4 complete ===')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.12.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
