Notebook title

[1]:

import numpy as np
import pandas as pd
from tqdm import tqdm
import emflow as ef

Load problem

[2]:

ef.list_problems()

[2]:

['gefcom2014-solar:full',
 'gefcom2014-solar:simple',
 'heftcom2024:baseline',
 'solar-battery']

[3]:

dataset, env, obj = ef.load_problem("gefcom2014-solar:simple")

[4]:

portfolio = dataset.collection
portfolio.to_tree(show_type=True)

Portfolio (edm.Portfolio)
├── Site1 (edm.PVSystem)
├── Site2 (edm.PVSystem)
└── Site3 (edm.PVSystem)

[5]:

env.state_space.sample(n_rows=4)

[5]:

	Site1					Site2					Site3
	Power	U10	U100	V10	V100	Power	U10	U100	V10	V100	Power	U10	U100	V10	V100
0	0.673862	-1.074163	-0.810795	-0.803215	-0.134093	0.738978	-0.706520	1.534196	0.428941	-1.205406	0.569435	0.317304	-0.734723	0.265068	1.003829
1	0.092037	-0.251177	-0.400874	-1.271738	0.932581	0.164251	-0.771803	0.996086	-1.705840	0.636402	0.920191	0.842620	1.105172	0.327282	1.225291
2	0.719638	1.448037	-0.847195	0.229970	0.383243	0.143874	0.687098	-1.108294	-0.162650	0.269868	0.667683	0.355245	-1.124801	0.138669	1.334336
3	0.956428	1.253171	-0.157800	-0.920692	1.686314	0.205537	1.581575	0.840222	0.004396	0.626344	0.560573	0.125592	-0.964017	0.493432	-0.160845

[6]:

env.action_space.sample(n_rows=4)

[6]:

	Site1			Site2			Site3
	Quantile_forecast_1	Quantile_forecast_2	Quantile_forecast_3	Quantile_forecast_1	Quantile_forecast_2	Quantile_forecast_3	Quantile_forecast_1	Quantile_forecast_2	Quantile_forecast_3
0	0.278813	0.220506	0.322269	0.834286	0.697384	0.582269	0.549760	0.137990	0.322234
1	0.850037	0.226165	0.604743	0.151117	0.424133	0.663252	0.192985	0.566891	0.285188
2	0.862166	0.255419	0.663567	0.825011	0.143615	0.982136	0.712281	0.752062	0.947551
3	0.588235	0.134082	0.970577	0.562137	0.757812	0.148808	0.821535	0.373762	0.258579

Create prediction model

[7]:

import lightgbm as lgb
import pandas as pd
from contextlib import nullcontext
import pvpower

class LGBGEFCom2014Predictor(ef.Predictor):
    def __init__(self, portfolio: ef.Portfolio, name=None, quantiles=None):
        """
        Initialize the Predictor class.

        Args:
            quantiles (list): List of quantiles for which to create separate models.
                              Example: [0.1, 0.5, 0.9]
        """

        self.portfolio = portfolio
        self.name = name
        self.models = {}  # Dictionary to hold models for each site and quantile
        self.quantiles = quantiles

    def preprocess(self, df_features: pd.DataFrame) -> pd.DataFrame:

        # Reassign df to a copy only within the function scope
        df_features = df_features.copy()

        # Add lead time feature
        ref_datetime = df_features.index.get_level_values(0)
        valid_datetime = df_features.index.get_level_values(1)
        lead_time = (valid_datetime-ref_datetime)/pd.Timedelta('1 hour')
        for asset in portfolio.assets:
            df_features.loc[:,(asset.name,'lead_time')] = lead_time

        # Differentiate accumulated features
        features_accum = ['VAR169', 'VAR175', 'VAR178', 'VAR228']
        df_accum = df_features.loc[:,(slice(None),features_accum)]
        df_accum = df_accum.diff()
        df_accum[df_accum.index.get_level_values(1).hour==1] = df_features.loc[df_accum.index.get_level_values(1).hour==1,(slice(None),features_accum)]
        df_accum.loc[:,(slice(None),features_accum[:3])] = df_accum.loc[:,(slice(None),features_accum[:3])]/3600 # Convert from J to Wh/h
        df_features.loc[:,(slice(None),features_accum)] = df_accum

        # Calculate solar position
        index = df_features.index.get_level_values(1)
        for asset in portfolio.assets:
            df_solpos = asset.location.to_pvlib().get_solarposition(index)
            df_solpos = df_solpos.loc[:, ['zenith', 'azimuth']]
            df_solpos.index = df_features.index
            for column in df_solpos.columns:
                df_features[(asset.name, column)] = df_solpos[column]

        # Calculate clearsky power
        for asset in portfolio.assets:
            clearsky_power = pvpower.clearsky_power(asset, index)
            df_features[(asset.name, "clearsky_power")] = clearsky_power

        # Calculate physical power
        for asset in portfolio.assets:
            physical_power = pvpower.physical_power(asset, df_features[(asset.name, "VAR169")].droplevel(0))
            df_features[(asset.name, "physical_power")] = physical_power.values

        return df_features

    def train(self, df_features: pd.DataFrame, target: pd.DataFrame, show_progress=True, **kwargs):
        """
        Train separate LightGBM models for each site and quantile.

        Args:
            features (pd.DataFrame): Multi-indexed dataframe where the top-level index corresponds to sites.
            target (pd.DataFrame): The target dataframe (y), also multi-indexed by site.
            kwargs: Additional parameters to pass to the LightGBMRegressor model.
        """

        df_features = self.preprocess(df_features)

        # Get the list of unique sites from the multi-index (top level)
        sites = df_features.columns.get_level_values(0).unique()
        feature_names = df_features.columns.get_level_values(1).unique()

        pbar = tqdm(total=len(sites)*len(self.quantiles), mininterval=0, desc=f"Training {self.name}") if show_progress else nullcontext()

        with pbar as progress:
            # Loop over each site
            for site in sites:
                # Extract the features and target for the current site
                site_features = df_features.xs(site, axis=1, level=0)
                site_target = target.xs(site, axis=1, level=0)
                site_target = site_target["Power"]-site_features["physical_power"]

                # Loop over each quantile
                for quantile in self.quantiles:
                    # Initialize a LightGBM model for this quantile
                    params = {'objective': 'quantile', 'alpha': quantile, "verbose": -1}
                    params.update(kwargs)  # Add any additional LightGBM parameters

                    model = lgb.LGBMRegressor(**params)

                    # Train the model on the site's data
                    model.fit(site_features, site_target)

                    # Store the trained model with a key (site, quantile)
                    self.models[(site, quantile)] = model

                    if show_progress:
                        progress.update(1)

    def predict(self, df_features: pd.DataFrame) -> pd.DataFrame:
        """
        Make predictions for a specific site and quantile using the trained model.

        Args:
            df_features (pd.DataFrame): The feature dataframe (X), multi-indexed column by site.

        Returns:
            pd.DataFrame: Predictions from the model.
        """

        df_features = self.preprocess(df_features)

        # Create a nested dictionary to store predictions
        predictions = {}

        # Extract the features for the specific site
        sites = df_features.columns.get_level_values(0).unique()

        # Loop over each site and quantile
        for site in sites:
            # Extract the features for the current site
            site_features = df_features.xs(site, axis=1, level=0)

            # Initialize an inner dictionary for each site

            for quantile in self.quantiles:
                # Check if the model for the given site and quantile exists
                if (site, quantile) not in self.models:
                    raise ValueError(f"No trained model for site '{site}' and quantile '{quantile}'.")

                # Make predictions using the stored model
                model = self.models[(site, quantile)]
                site_predictions = model.predict(site_features)
                site_predictions = site_predictions+site_features["physical_power"].values

                # Store the predictions under the quantile for the current site
                predictions[(site, f"quantile_{round(100*quantile)}")] = site_predictions

        # Convert the nested dictionary to a DataFrame with multi-index columns
        df_predictions = pd.DataFrame.from_dict(predictions)
        df_predictions.index = df_features.index

        return df_predictions

Run the sequential decision loop

[8]:

initial_data, next_input = env.reset()

[9]:

training_input = initial_data["input"]
training_target = initial_data["target"]
predictor1 = LGBGEFCom2014Predictor(portfolio=portfolio, name="predictor1", quantiles=[0.1, 0.5, 0.9])
predictor1.train(df_features=training_input, target=training_target)

predictor2 = predictor1.copy(name="predictor2")

Training predictor1: 100%|██████████| 9/9 [00:02<00:00,  3.67it/s]

[10]:

losses = {predictor1.name: [], predictor2.name: []}
predictions = []
for i in range(env.n_steps):
    prediction1 = predictor1.predict(df_features=next_input)
    prediction2 = predictor2.predict(df_features=next_input)
    predictions.append(prediction1)

    training_input = pd.concat([training_input, next_input])
    next_input, next_target, done = env.step()
    training_target = pd.concat([training_target, next_target])

    loss1 = obj.calculate(next_target, prediction1)
    loss2 = obj.calculate(next_target, prediction2)

    losses[predictor1.name].append(loss1)
    losses[predictor2.name].append(loss2)

    predictor2.train(df_features=training_input, target=training_target)

    print(f"{predictor1.name} {obj.name} for step {i+1}: {loss1}")
    print(f"{predictor2.name} {obj.name} for step {i+1}: {loss2}")

Training predictor2: 100%|██████████| 9/9 [00:03<00:00,  2.36it/s]

predictor1 PinballLoss for step 1: 0.011204054803155625
predictor2 PinballLoss for step 1: 0.011204054803155625

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.76it/s]

predictor1 PinballLoss for step 2: 0.009240390300022897
predictor2 PinballLoss for step 2: 0.009063758759589114

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.35it/s]

predictor1 PinballLoss for step 3: 0.010753606907901054
predictor2 PinballLoss for step 3: 0.010443964712996245

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.56it/s]

predictor1 PinballLoss for step 4: 0.012257128079863674
predictor2 PinballLoss for step 4: 0.012103222813525286

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.57it/s]

predictor1 PinballLoss for step 5: 0.015709185957303754
predictor2 PinballLoss for step 5: 0.015349049969093274

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.53it/s]

predictor1 PinballLoss for step 6: 0.010714876092921388
predictor2 PinballLoss for step 6: 0.010428383539273101

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.33it/s]

predictor1 PinballLoss for step 7: 0.01060918593113412
predictor2 PinballLoss for step 7: 0.010258284122606633

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.29it/s]

predictor1 PinballLoss for step 8: 0.011098529790581903
predictor2 PinballLoss for step 8: 0.010757249681650975

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.33it/s]

predictor1 PinballLoss for step 9: 0.011700506431809785
predictor2 PinballLoss for step 9: 0.011300162463063212

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.19it/s]

predictor1 PinballLoss for step 10: 0.011686086915546115
predictor2 PinballLoss for step 10: 0.011458028889962996

Training predictor2: 100%|██████████| 9/9 [00:03<00:00,  2.75it/s]

predictor1 PinballLoss for step 11: 0.01105819817215981
predictor2 PinballLoss for step 11: 0.010466426422608343

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.20it/s]

predictor1 PinballLoss for step 12: 0.01254798074217749
predictor2 PinballLoss for step 12: 0.012111680115991199

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.05it/s]

predictor1 PinballLoss for step 13: 0.010533883821107594
predictor2 PinballLoss for step 13: 0.01007272056513573

Training predictor2: 100%|██████████| 9/9 [00:02<00:00,  3.15it/s]

predictor1 PinballLoss for step 14: 0.008110719272860376
predictor2 PinballLoss for step 14: 0.0074295384959401

Training predictor2: 100%|██████████| 9/9 [00:03<00:00,  2.99it/s]

predictor1 PinballLoss for step 15: 0.012833636041290689
predictor2 PinballLoss for step 15: 0.01174686713140884

[11]:

df_predictions = pd.concat(predictions)
env.plot_forecasts(training_target, df_predictions, site="Site1")

Data type cannot be displayed: application/vnd.plotly.v1+json

[12]:

env.plot_overall_results(losses,
                         drop_tasks=["Task1", "Task2", "Task3", "Task4"]);

../_images/notebooks_gefcom2014_solar_16_0.png

[13]:

env.plot_results(losses,
                 drop_tasks=["Task1", "Task2", "Task3", "Task4"],
                 n_top_teams=3,
                 xlim=0.05);

../_images/notebooks_gefcom2014_solar_17_0.png