Notebook title

Open In Colab

[1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import enflow as ef

Load problem

[2]:
ef.list_problems()
[2]:
['gefcom2014-solar:full',
 'gefcom2014-solar:simple',
 'heftcom2024:baseline',
 'solar-battery']
[3]:
dataset, env, obj = ef.load_problem("gefcom2014-solar:simple")
[4]:
portfolio = dataset.collection
portfolio.to_tree(show_type=True)
Portfolio (edm.Portfolio)
β”œβ”€β”€ Site1 (edm.PVSystem)
β”œβ”€β”€ Site2 (edm.PVSystem)
└── Site3 (edm.PVSystem)
[5]:
env.state_space.sample(n_rows=4)
[5]:
Site1 Site2 Site3
Power U10 U100 V10 V100 Power U10 U100 V10 V100 Power U10 U100 V10 V100
0 0.673862 -1.074163 -0.810795 -0.803215 -0.134093 0.738978 -0.706520 1.534196 0.428941 -1.205406 0.569435 0.317304 -0.734723 0.265068 1.003829
1 0.092037 -0.251177 -0.400874 -1.271738 0.932581 0.164251 -0.771803 0.996086 -1.705840 0.636402 0.920191 0.842620 1.105172 0.327282 1.225291
2 0.719638 1.448037 -0.847195 0.229970 0.383243 0.143874 0.687098 -1.108294 -0.162650 0.269868 0.667683 0.355245 -1.124801 0.138669 1.334336
3 0.956428 1.253171 -0.157800 -0.920692 1.686314 0.205537 1.581575 0.840222 0.004396 0.626344 0.560573 0.125592 -0.964017 0.493432 -0.160845
[6]:
env.action_space.sample(n_rows=4)
[6]:
Site1 Site2 Site3
Quantile_forecast_1 Quantile_forecast_2 Quantile_forecast_3 Quantile_forecast_1 Quantile_forecast_2 Quantile_forecast_3 Quantile_forecast_1 Quantile_forecast_2 Quantile_forecast_3
0 0.278813 0.220506 0.322269 0.834286 0.697384 0.582269 0.549760 0.137990 0.322234
1 0.850037 0.226165 0.604743 0.151117 0.424133 0.663252 0.192985 0.566891 0.285188
2 0.862166 0.255419 0.663567 0.825011 0.143615 0.982136 0.712281 0.752062 0.947551
3 0.588235 0.134082 0.970577 0.562137 0.757812 0.148808 0.821535 0.373762 0.258579

Create prediction model

[7]:
import lightgbm as lgb
import pandas as pd
from contextlib import nullcontext
import pvpower

class LGBGEFCom2014Predictor(ef.Predictor):
    def __init__(self, portfolio: ef.Portfolio, name=None, quantiles=None):
        """
        Initialize the Predictor class.

        Args:
            quantiles (list): List of quantiles for which to create separate models.
                              Example: [0.1, 0.5, 0.9]
        """

        self.portfolio = portfolio
        self.name = name
        self.models = {}  # Dictionary to hold models for each site and quantile
        self.quantiles = quantiles

    def preprocess(self, df_features: pd.DataFrame) -> pd.DataFrame:

        # Reassign df to a copy only within the function scope
        df_features = df_features.copy()

        # Add lead time feature
        ref_datetime = df_features.index.get_level_values(0)
        valid_datetime = df_features.index.get_level_values(1)
        lead_time = (valid_datetime-ref_datetime)/pd.Timedelta('1 hour')
        for asset in portfolio.assets:
            df_features.loc[:,(asset.name,'lead_time')] = lead_time

        # Differentiate accumulated features
        features_accum = ['VAR169', 'VAR175', 'VAR178', 'VAR228']
        df_accum = df_features.loc[:,(slice(None),features_accum)]
        df_accum = df_accum.diff()
        df_accum[df_accum.index.get_level_values(1).hour==1] = df_features.loc[df_accum.index.get_level_values(1).hour==1,(slice(None),features_accum)]
        df_accum.loc[:,(slice(None),features_accum[:3])] = df_accum.loc[:,(slice(None),features_accum[:3])]/3600 # Convert from J to Wh/h
        df_features.loc[:,(slice(None),features_accum)] = df_accum

        # Calculate solar position
        index = df_features.index.get_level_values(1)
        for asset in portfolio.assets:
            df_solpos = asset.location.to_pvlib().get_solarposition(index)
            df_solpos = df_solpos.loc[:, ['zenith', 'azimuth']]
            df_solpos.index = df_features.index
            for column in df_solpos.columns:
                df_features[(asset.name, column)] = df_solpos[column]

        # Calculate clearsky power
        for asset in portfolio.assets:
            clearsky_power = pvpower.clearsky_power(asset, index)
            df_features[(asset.name, "clearsky_power")] = clearsky_power

        # Calculate physical power
        for asset in portfolio.assets:
            physical_power = pvpower.physical_power(asset, df_features[(asset.name, "VAR169")].droplevel(0))
            df_features[(asset.name, "physical_power")] = physical_power.values

        return df_features

    def train(self, df_features: pd.DataFrame, target: pd.DataFrame, show_progress=True, **kwargs):
        """
        Train separate LightGBM models for each site and quantile.

        Args:
            features (pd.DataFrame): Multi-indexed dataframe where the top-level index corresponds to sites.
            target (pd.DataFrame): The target dataframe (y), also multi-indexed by site.
            kwargs: Additional parameters to pass to the LightGBMRegressor model.
        """

        df_features = self.preprocess(df_features)

        # Get the list of unique sites from the multi-index (top level)
        sites = df_features.columns.get_level_values(0).unique()
        feature_names = df_features.columns.get_level_values(1).unique()

        pbar = tqdm(total=len(sites)*len(self.quantiles), mininterval=0, desc=f"Training {self.name}") if show_progress else nullcontext()

        with pbar as progress:
            # Loop over each site
            for site in sites:
                # Extract the features and target for the current site
                site_features = df_features.xs(site, axis=1, level=0)
                site_target = target.xs(site, axis=1, level=0)
                site_target = site_target["Power"]-site_features["physical_power"]

                # Loop over each quantile
                for quantile in self.quantiles:
                    # Initialize a LightGBM model for this quantile
                    params = {'objective': 'quantile', 'alpha': quantile, "verbose": -1}
                    params.update(kwargs)  # Add any additional LightGBM parameters

                    model = lgb.LGBMRegressor(**params)

                    # Train the model on the site's data
                    model.fit(site_features, site_target)

                    # Store the trained model with a key (site, quantile)
                    self.models[(site, quantile)] = model

                    if show_progress:
                        progress.update(1)

    def predict(self, df_features: pd.DataFrame) -> pd.DataFrame:
        """
        Make predictions for a specific site and quantile using the trained model.

        Args:
            df_features (pd.DataFrame): The feature dataframe (X), multi-indexed column by site.

        Returns:
            pd.DataFrame: Predictions from the model.
        """

        df_features = self.preprocess(df_features)

        # Create a nested dictionary to store predictions
        predictions = {}

        # Extract the features for the specific site
        sites = df_features.columns.get_level_values(0).unique()

        # Loop over each site and quantile
        for site in sites:
            # Extract the features for the current site
            site_features = df_features.xs(site, axis=1, level=0)

            # Initialize an inner dictionary for each site

            for quantile in self.quantiles:
                # Check if the model for the given site and quantile exists
                if (site, quantile) not in self.models:
                    raise ValueError(f"No trained model for site '{site}' and quantile '{quantile}'.")

                # Make predictions using the stored model
                model = self.models[(site, quantile)]
                site_predictions = model.predict(site_features)
                site_predictions = site_predictions+site_features["physical_power"].values

                # Store the predictions under the quantile for the current site
                predictions[(site, f"quantile_{round(100*quantile)}")] = site_predictions

        # Convert the nested dictionary to a DataFrame with multi-index columns
        df_predictions = pd.DataFrame.from_dict(predictions)
        df_predictions.index = df_features.index

        return df_predictions

Run the sequential decision loop

[8]:
initial_data, next_input = env.reset()
[9]:
training_input = initial_data["input"]
training_target = initial_data["target"]
predictor1 = LGBGEFCom2014Predictor(portfolio=portfolio, name="predictor1", quantiles=[0.1, 0.5, 0.9])
predictor1.train(df_features=training_input, target=training_target)

predictor2 = predictor1.copy(name="predictor2")
Training predictor1: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.67it/s]
[10]:
losses = {predictor1.name: [], predictor2.name: []}
predictions = []
for i in range(env.n_steps):
    prediction1 = predictor1.predict(df_features=next_input)
    prediction2 = predictor2.predict(df_features=next_input)
    predictions.append(prediction1)

    training_input = pd.concat([training_input, next_input])
    next_input, next_target, done = env.step()
    training_target = pd.concat([training_target, next_target])

    loss1 = obj.calculate(next_target, prediction1)
    loss2 = obj.calculate(next_target, prediction2)

    losses[predictor1.name].append(loss1)
    losses[predictor2.name].append(loss2)

    predictor2.train(df_features=training_input, target=training_target)

    print(f"{predictor1.name} {obj.name} for step {i+1}: {loss1}")
    print(f"{predictor2.name} {obj.name} for step {i+1}: {loss2}")
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:03<00:00,  2.36it/s]
predictor1 PinballLoss for step 1: 0.011204054803155625
predictor2 PinballLoss for step 1: 0.011204054803155625
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.76it/s]
predictor1 PinballLoss for step 2: 0.009240390300022897
predictor2 PinballLoss for step 2: 0.009063758759589114
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.35it/s]
predictor1 PinballLoss for step 3: 0.010753606907901054
predictor2 PinballLoss for step 3: 0.010443964712996245
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.56it/s]
predictor1 PinballLoss for step 4: 0.012257128079863674
predictor2 PinballLoss for step 4: 0.012103222813525286
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.57it/s]
predictor1 PinballLoss for step 5: 0.015709185957303754
predictor2 PinballLoss for step 5: 0.015349049969093274
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.53it/s]
predictor1 PinballLoss for step 6: 0.010714876092921388
predictor2 PinballLoss for step 6: 0.010428383539273101
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.33it/s]
predictor1 PinballLoss for step 7: 0.01060918593113412
predictor2 PinballLoss for step 7: 0.010258284122606633
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.29it/s]
predictor1 PinballLoss for step 8: 0.011098529790581903
predictor2 PinballLoss for step 8: 0.010757249681650975
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.33it/s]
predictor1 PinballLoss for step 9: 0.011700506431809785
predictor2 PinballLoss for step 9: 0.011300162463063212
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.19it/s]
predictor1 PinballLoss for step 10: 0.011686086915546115
predictor2 PinballLoss for step 10: 0.011458028889962996
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:03<00:00,  2.75it/s]
predictor1 PinballLoss for step 11: 0.01105819817215981
predictor2 PinballLoss for step 11: 0.010466426422608343
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.20it/s]
predictor1 PinballLoss for step 12: 0.01254798074217749
predictor2 PinballLoss for step 12: 0.012111680115991199
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.05it/s]
predictor1 PinballLoss for step 13: 0.010533883821107594
predictor2 PinballLoss for step 13: 0.01007272056513573
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:02<00:00,  3.15it/s]
predictor1 PinballLoss for step 14: 0.008110719272860376
predictor2 PinballLoss for step 14: 0.0074295384959401
Training predictor2: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 9/9 [00:03<00:00,  2.99it/s]
predictor1 PinballLoss for step 15: 0.012833636041290689
predictor2 PinballLoss for step 15: 0.01174686713140884

[11]:
df_predictions = pd.concat(predictions)
env.plot_forecasts(training_target, df_predictions, site="Site1")

Data type cannot be displayed: application/vnd.plotly.v1+json

[12]:
env.plot_overall_results(losses,
                         drop_tasks=["Task1", "Task2", "Task3", "Task4"]);
../_images/notebooks_gefcom2014_solar_16_0.png
[13]:
env.plot_results(losses,
                 drop_tasks=["Task1", "Task2", "Task3", "Task4"],
                 n_top_teams=3,
                 xlim=0.05);
../_images/notebooks_gefcom2014_solar_17_0.png