Notebook titleο
[1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import enflow as ef
Load problemο
[2]:
ef.list_problems()
[2]:
['gefcom2014-solar:full',
'gefcom2014-solar:simple',
'heftcom2024:baseline',
'solar-battery']
[3]:
dataset, env, obj = ef.load_problem("gefcom2014-solar:simple")
[4]:
portfolio = dataset.collection
portfolio.to_tree(show_type=True)
Portfolio (edm.Portfolio)
βββ Site1 (edm.PVSystem)
βββ Site2 (edm.PVSystem)
βββ Site3 (edm.PVSystem)
[5]:
env.state_space.sample(n_rows=4)
[5]:
Site1 | Site2 | Site3 | |||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Power | U10 | U100 | V10 | V100 | Power | U10 | U100 | V10 | V100 | Power | U10 | U100 | V10 | V100 | |
0 | 0.673862 | -1.074163 | -0.810795 | -0.803215 | -0.134093 | 0.738978 | -0.706520 | 1.534196 | 0.428941 | -1.205406 | 0.569435 | 0.317304 | -0.734723 | 0.265068 | 1.003829 |
1 | 0.092037 | -0.251177 | -0.400874 | -1.271738 | 0.932581 | 0.164251 | -0.771803 | 0.996086 | -1.705840 | 0.636402 | 0.920191 | 0.842620 | 1.105172 | 0.327282 | 1.225291 |
2 | 0.719638 | 1.448037 | -0.847195 | 0.229970 | 0.383243 | 0.143874 | 0.687098 | -1.108294 | -0.162650 | 0.269868 | 0.667683 | 0.355245 | -1.124801 | 0.138669 | 1.334336 |
3 | 0.956428 | 1.253171 | -0.157800 | -0.920692 | 1.686314 | 0.205537 | 1.581575 | 0.840222 | 0.004396 | 0.626344 | 0.560573 | 0.125592 | -0.964017 | 0.493432 | -0.160845 |
[6]:
env.action_space.sample(n_rows=4)
[6]:
Site1 | Site2 | Site3 | |||||||
---|---|---|---|---|---|---|---|---|---|
Quantile_forecast_1 | Quantile_forecast_2 | Quantile_forecast_3 | Quantile_forecast_1 | Quantile_forecast_2 | Quantile_forecast_3 | Quantile_forecast_1 | Quantile_forecast_2 | Quantile_forecast_3 | |
0 | 0.278813 | 0.220506 | 0.322269 | 0.834286 | 0.697384 | 0.582269 | 0.549760 | 0.137990 | 0.322234 |
1 | 0.850037 | 0.226165 | 0.604743 | 0.151117 | 0.424133 | 0.663252 | 0.192985 | 0.566891 | 0.285188 |
2 | 0.862166 | 0.255419 | 0.663567 | 0.825011 | 0.143615 | 0.982136 | 0.712281 | 0.752062 | 0.947551 |
3 | 0.588235 | 0.134082 | 0.970577 | 0.562137 | 0.757812 | 0.148808 | 0.821535 | 0.373762 | 0.258579 |
Create prediction modelο
[7]:
import lightgbm as lgb
import pandas as pd
from contextlib import nullcontext
import pvpower
class LGBGEFCom2014Predictor(ef.Predictor):
def __init__(self, portfolio: ef.Portfolio, name=None, quantiles=None):
"""
Initialize the Predictor class.
Args:
quantiles (list): List of quantiles for which to create separate models.
Example: [0.1, 0.5, 0.9]
"""
self.portfolio = portfolio
self.name = name
self.models = {} # Dictionary to hold models for each site and quantile
self.quantiles = quantiles
def preprocess(self, df_features: pd.DataFrame) -> pd.DataFrame:
# Reassign df to a copy only within the function scope
df_features = df_features.copy()
# Add lead time feature
ref_datetime = df_features.index.get_level_values(0)
valid_datetime = df_features.index.get_level_values(1)
lead_time = (valid_datetime-ref_datetime)/pd.Timedelta('1 hour')
for asset in portfolio.assets:
df_features.loc[:,(asset.name,'lead_time')] = lead_time
# Differentiate accumulated features
features_accum = ['VAR169', 'VAR175', 'VAR178', 'VAR228']
df_accum = df_features.loc[:,(slice(None),features_accum)]
df_accum = df_accum.diff()
df_accum[df_accum.index.get_level_values(1).hour==1] = df_features.loc[df_accum.index.get_level_values(1).hour==1,(slice(None),features_accum)]
df_accum.loc[:,(slice(None),features_accum[:3])] = df_accum.loc[:,(slice(None),features_accum[:3])]/3600 # Convert from J to Wh/h
df_features.loc[:,(slice(None),features_accum)] = df_accum
# Calculate solar position
index = df_features.index.get_level_values(1)
for asset in portfolio.assets:
df_solpos = asset.location.to_pvlib().get_solarposition(index)
df_solpos = df_solpos.loc[:, ['zenith', 'azimuth']]
df_solpos.index = df_features.index
for column in df_solpos.columns:
df_features[(asset.name, column)] = df_solpos[column]
# Calculate clearsky power
for asset in portfolio.assets:
clearsky_power = pvpower.clearsky_power(asset, index)
df_features[(asset.name, "clearsky_power")] = clearsky_power
# Calculate physical power
for asset in portfolio.assets:
physical_power = pvpower.physical_power(asset, df_features[(asset.name, "VAR169")].droplevel(0))
df_features[(asset.name, "physical_power")] = physical_power.values
return df_features
def train(self, df_features: pd.DataFrame, target: pd.DataFrame, show_progress=True, **kwargs):
"""
Train separate LightGBM models for each site and quantile.
Args:
features (pd.DataFrame): Multi-indexed dataframe where the top-level index corresponds to sites.
target (pd.DataFrame): The target dataframe (y), also multi-indexed by site.
kwargs: Additional parameters to pass to the LightGBMRegressor model.
"""
df_features = self.preprocess(df_features)
# Get the list of unique sites from the multi-index (top level)
sites = df_features.columns.get_level_values(0).unique()
feature_names = df_features.columns.get_level_values(1).unique()
pbar = tqdm(total=len(sites)*len(self.quantiles), mininterval=0, desc=f"Training {self.name}") if show_progress else nullcontext()
with pbar as progress:
# Loop over each site
for site in sites:
# Extract the features and target for the current site
site_features = df_features.xs(site, axis=1, level=0)
site_target = target.xs(site, axis=1, level=0)
site_target = site_target["Power"]-site_features["physical_power"]
# Loop over each quantile
for quantile in self.quantiles:
# Initialize a LightGBM model for this quantile
params = {'objective': 'quantile', 'alpha': quantile, "verbose": -1}
params.update(kwargs) # Add any additional LightGBM parameters
model = lgb.LGBMRegressor(**params)
# Train the model on the site's data
model.fit(site_features, site_target)
# Store the trained model with a key (site, quantile)
self.models[(site, quantile)] = model
if show_progress:
progress.update(1)
def predict(self, df_features: pd.DataFrame) -> pd.DataFrame:
"""
Make predictions for a specific site and quantile using the trained model.
Args:
df_features (pd.DataFrame): The feature dataframe (X), multi-indexed column by site.
Returns:
pd.DataFrame: Predictions from the model.
"""
df_features = self.preprocess(df_features)
# Create a nested dictionary to store predictions
predictions = {}
# Extract the features for the specific site
sites = df_features.columns.get_level_values(0).unique()
# Loop over each site and quantile
for site in sites:
# Extract the features for the current site
site_features = df_features.xs(site, axis=1, level=0)
# Initialize an inner dictionary for each site
for quantile in self.quantiles:
# Check if the model for the given site and quantile exists
if (site, quantile) not in self.models:
raise ValueError(f"No trained model for site '{site}' and quantile '{quantile}'.")
# Make predictions using the stored model
model = self.models[(site, quantile)]
site_predictions = model.predict(site_features)
site_predictions = site_predictions+site_features["physical_power"].values
# Store the predictions under the quantile for the current site
predictions[(site, f"quantile_{round(100*quantile)}")] = site_predictions
# Convert the nested dictionary to a DataFrame with multi-index columns
df_predictions = pd.DataFrame.from_dict(predictions)
df_predictions.index = df_features.index
return df_predictions
Run the sequential decision loopο
[8]:
initial_data, next_input = env.reset()
[9]:
training_input = initial_data["input"]
training_target = initial_data["target"]
predictor1 = LGBGEFCom2014Predictor(portfolio=portfolio, name="predictor1", quantiles=[0.1, 0.5, 0.9])
predictor1.train(df_features=training_input, target=training_target)
predictor2 = predictor1.copy(name="predictor2")
Training predictor1: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.67it/s]
[10]:
losses = {predictor1.name: [], predictor2.name: []}
predictions = []
for i in range(env.n_steps):
prediction1 = predictor1.predict(df_features=next_input)
prediction2 = predictor2.predict(df_features=next_input)
predictions.append(prediction1)
training_input = pd.concat([training_input, next_input])
next_input, next_target, done = env.step()
training_target = pd.concat([training_target, next_target])
loss1 = obj.calculate(next_target, prediction1)
loss2 = obj.calculate(next_target, prediction2)
losses[predictor1.name].append(loss1)
losses[predictor2.name].append(loss2)
predictor2.train(df_features=training_input, target=training_target)
print(f"{predictor1.name} {obj.name} for step {i+1}: {loss1}")
print(f"{predictor2.name} {obj.name} for step {i+1}: {loss2}")
Training predictor2: 100%|ββββββββββ| 9/9 [00:03<00:00, 2.36it/s]
predictor1 PinballLoss for step 1: 0.011204054803155625
predictor2 PinballLoss for step 1: 0.011204054803155625
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.76it/s]
predictor1 PinballLoss for step 2: 0.009240390300022897
predictor2 PinballLoss for step 2: 0.009063758759589114
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.35it/s]
predictor1 PinballLoss for step 3: 0.010753606907901054
predictor2 PinballLoss for step 3: 0.010443964712996245
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.56it/s]
predictor1 PinballLoss for step 4: 0.012257128079863674
predictor2 PinballLoss for step 4: 0.012103222813525286
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.57it/s]
predictor1 PinballLoss for step 5: 0.015709185957303754
predictor2 PinballLoss for step 5: 0.015349049969093274
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.53it/s]
predictor1 PinballLoss for step 6: 0.010714876092921388
predictor2 PinballLoss for step 6: 0.010428383539273101
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.33it/s]
predictor1 PinballLoss for step 7: 0.01060918593113412
predictor2 PinballLoss for step 7: 0.010258284122606633
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.29it/s]
predictor1 PinballLoss for step 8: 0.011098529790581903
predictor2 PinballLoss for step 8: 0.010757249681650975
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.33it/s]
predictor1 PinballLoss for step 9: 0.011700506431809785
predictor2 PinballLoss for step 9: 0.011300162463063212
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.19it/s]
predictor1 PinballLoss for step 10: 0.011686086915546115
predictor2 PinballLoss for step 10: 0.011458028889962996
Training predictor2: 100%|ββββββββββ| 9/9 [00:03<00:00, 2.75it/s]
predictor1 PinballLoss for step 11: 0.01105819817215981
predictor2 PinballLoss for step 11: 0.010466426422608343
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.20it/s]
predictor1 PinballLoss for step 12: 0.01254798074217749
predictor2 PinballLoss for step 12: 0.012111680115991199
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.05it/s]
predictor1 PinballLoss for step 13: 0.010533883821107594
predictor2 PinballLoss for step 13: 0.01007272056513573
Training predictor2: 100%|ββββββββββ| 9/9 [00:02<00:00, 3.15it/s]
predictor1 PinballLoss for step 14: 0.008110719272860376
predictor2 PinballLoss for step 14: 0.0074295384959401
Training predictor2: 100%|ββββββββββ| 9/9 [00:03<00:00, 2.99it/s]
predictor1 PinballLoss for step 15: 0.012833636041290689
predictor2 PinballLoss for step 15: 0.01174686713140884
[11]:
df_predictions = pd.concat(predictions)
env.plot_forecasts(training_target, df_predictions, site="Site1")
Data type cannot be displayed: application/vnd.plotly.v1+json
[12]:
env.plot_overall_results(losses,
drop_tasks=["Task1", "Task2", "Task3", "Task4"]);

[13]:
env.plot_results(losses,
drop_tasks=["Task1", "Task2", "Task3", "Task4"],
n_top_teams=3,
xlim=0.05);
