HEFTCom2024

[1]:
import typing as t
import gymnasium as gym
import enflow as ef

import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from statsmodels.iolib.smpickle import load_pickle

import folium
import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.renderers.default = 'notebook'

Load data

[2]:
df_heftcom = pd.read_csv("data/data_heftcom2024.csv", index_col=[0, 1], parse_dates=["ref_datetime", "valid_datetime", "market_day"])
df_coords_hornsea1 = pd.read_csv("data/dwd_coords_hornsea1.csv")
df_coords_pes10 = pd.read_csv("data/dwd_coords_pes10.csv")
area_pes10 = ef.GeoMultiPolygon.from_geojson("data/pes10.geojson")

Create and plot energy system

[3]:
windfarm = ef.WindFarm(name="Hornsea 1", longitude=1.79, latitude=53.90, capacity=1218)
solar_pes10 = ef.SolarPowerArea(geopolygon=area_pes10)
portfolio = ef.Portfolio(assets=[solar_pes10, windfarm])
[4]:
m = folium.Map(location=[53.5, 0], zoom_start=6.4)

feature_group1 = folium.FeatureGroup(name='Wind Farm Hornsea 1')
folium.CircleMarker(location=[windfarm.latitude, windfarm.longitude], radius=10, color='blue').add_to(feature_group1)
feature_group1.add_to(m)

feature_group2 = folium.FeatureGroup(name='Solar PES10')
folium.GeoJson(solar_pes10.geopolygon.multipolygon, name='geopolygon').add_to(feature_group2)
feature_group2.add_to(m)

feature_group3 = folium.FeatureGroup(name='DWD grid points')
for lat in df_coords_hornsea1.latitude.values:
    for lon in df_coords_hornsea1.longitude.values:
        folium.CircleMarker(location=[lat, lon], radius=0.1, color='red').add_to(feature_group3)
for index, row in df_coords_pes10.iterrows():
    folium.CircleMarker(location=[row.latitude, row.longitude], radius=0.1, color='red').add_to(feature_group3)
feature_group3.add_to(m)

folium.LayerControl().add_to(m)

m
[4]:
Make this Notebook Trusted to load map: File -> Trust Notebook

Create Environment

[5]:
dataset = ef.Dataset(name="heftcom2024",
                     description="All data provided by the organisers of HEFTCom2024. Participants are free to use additional external data.",
                     energy_system=portfolio,
                     data={"data_heftcom2024": df_heftcom, "dwd_coords_hornsea1": df_coords_hornsea1, "dwd_coords_pes10": df_coords_pes10})
[6]:
time_space = gym.spaces.Text(max_length=10, min_length=10)

state_space = gym.spaces.Dict(
    {
        "solar_power": gym.spaces.Box(low=0, high=np.inf, shape=(48,1)),
        "solar_installed": gym.spaces.Box(low=0, high=np.inf, shape=(48,1)),
        "solar_available": gym.spaces.Box(low=0, high=np.inf, shape=(48,1)),
        "solar_forecast": gym.spaces.Box(low=0, high=np.inf, shape=(48,1)),
        "wind_power": gym.spaces.Box(low=0, high=np.inf, shape=(48,1)),
        "wind_forecast": gym.spaces.Box(low=0, high=np.inf, shape=(48,1)),
    }
)

exogeneous_space = gym.spaces.Dict(
    {
        "dayahead_price": gym.spaces.Box(low=-np.inf, high=np.inf, shape=(48,1)),
        "imbalance_price": gym.spaces.Box(low=-np.inf, high=np.inf, shape=(48,1)),
    }
)

action_space = gym.spaces.Dict(
    {
        "power_quantile_forecast": gym.spaces.Box(low=0, high=np.inf, shape=(48,9)),
        "power_bid": gym.spaces.Box(low=0, high=np.inf, shape=(48,1)),
    }
)
[7]:
class HEFTCom2024Environment(gym.Env):

    def __init__(self,
                 dataset: ef.Dataset,
                 train_end: t.Optional[pd.Timestamp] = None,
                 time_lags: t.Optional[t.Dict[str, t.Union[int, None]]] = None,):

        self.time_space = time_space
        self.state_space = state_space
        self.exogeneous_space = exogeneous_space
        self.action_space = action_space

        self.data = dataset.data["data_heftcom2024"]
        self.portfolio = dataset.energy_system
        self.train_end = train_end if train_end is not None else self.data["market_day"].max()
        self.market_days = self.data['market_day'].unique()
        self.n_market_days = len(self.market_days)
        self.idx_first_eval_day = list(self.market_days > self.train_end).index(True)
        self.idx_counter = 0

    def reset(self, return_dataframe=False):
        self.idx_counter = 0
        initial_dataframe = self.data[self.data["market_day"] <= self.train_end]
        ref_times, states, exogenous = self._create_states(self.market_days[:self.idx_first_eval_day])

        if return_dataframe:
            return ref_times, states, exogenous, initial_dataframe
        else:
            return ref_times, states, exogenous

    def _create_state(self, idx_market_day):
        market_day_previous = self.market_days[idx_market_day-1]
        market_day = self.market_days[idx_market_day]

        market_day_start_previous = self.data[self.data["market_day"] == market_day_previous].index.get_level_values(1)[0]
        market_day_start = self.data[self.data["market_day"] == market_day].index.get_level_values(1)[0]

        ref_time = market_day.strftime('%Y-%m-%d')

        next_state = {
            "solar_power": self.data.loc[market_day_start_previous:market_day_start, ["Solar_MWh_credit"]].values,
            "solar_installed": self.data.loc[market_day_start_previous:market_day_start, ["Solar_installedcapacity_mwp"]].values,
            "solar_available": self.data.loc[market_day_start_previous:market_day_start, ["Solar_capacity_mwp"]].values,
            "solar_forecast": self.data[self.data["market_day"] == market_day][["SolarDownwardRadiation"]].values,
            "wind_power": self.data.loc[market_day_start_previous:market_day_start, ["Wind_MWh_credit"]].values,
            "wind_forecast": self.data[self.data["market_day"] == market_day][["WindSpeed"]].values,
            "dayahead_price": self.data.loc[market_day_start_previous:market_day_start, ["DA_Price"]].values,
            "imbalance_price": self.data.loc[market_day_start_previous:market_day_start, ["SS_Price"]].values
        }

        exogenous = {
            "dayahead_price": self.data[self.data["market_day"] == market_day][["DA_Price"]].values,
            "imbalance_price": self.data[self.data["market_day"] == market_day][["SS_Price"]].values,
        }

        return ref_time, next_state, exogenous

    def _create_states(self, market_days):
        ref_times = []
        states = []
        exogenous = []
        for idx in range(1, len(market_days)):
            ref_time, state, exog = self._create_state(idx)
            ref_times.append(ref_time)
            states.append(state)
            exogenous.append(exog)

        return ref_times, states, exogenous

    def step(self, action=None):
        idx_market_day = self.idx_first_eval_day+self.idx_counter
        ref_time, next_state, exogenous = self._create_state(idx_market_day)
        done = True if idx_market_day+1 == self.n_market_days else False
        self.idx_counter += 1

        return ref_time, next_state, exogenous, done
[8]:
env = HEFTCom2024Environment(dataset=dataset,
                             train_end="2023-10-15")
ref_times, states, exogenous, initial_df = env.reset(return_dataframe=True)

Create Objective

[9]:
from enflow.problems.objectives import PinballLoss
from enflow.problems.objectives import BaseScore

class MarketRevenue(BaseScore):

    def calculate(self, bid, production, day_ahead_price, single_system_price):
        revenue = bid * day_ahead_price + (production - bid) * (single_system_price - 0.07 * (production - bid))

        return revenue

Create Predictor

[10]:
class Predictor(ef.Predictor):
        def __init__(self):
            self.model_name = "quantreg"
            self.quantiles = range(10,100,10)
            self.models = dict()

        def save_model(self, path: str):
            for quantile in self.quantiles:
                self.models[f"q{quantile}"].save(f"{path}/{self.model_name}_q{quantile}.pickle")

        def load_model(self, path: str):
            for quantile in self.quantiles:
                self.models[f"q{quantile}"] = load_pickle(f"{path}/{self.model_name}_q{quantile}.pickle")

        def train(self, X, y):
            train_data = {**X, **y}
            train_data = pd.DataFrame({k: v.squeeze() for k, v in train_data.items()})
            mod = smf.quantreg('total_generation_MWh ~ bs(solar_forecast,df=5) + bs(wind_forecast,df=8)',
                               data=train_data)

            for quantile in range(10,100,10):
                print(quantile)
                self.models[f"q{quantile}"] = mod.fit(q=quantile/100, max_iter=2500)

        def predict(self, X: dict) -> dict:
            X = pd.DataFrame({k: v.squeeze() for k, v in X.items()})

            prediction = pd.DataFrame()
            for quantile in range(10,100,10):
                prediction[f"q{quantile}"] = self.models[f"q{quantile}"].predict(X)
                prediction.loc[prediction[f"q{quantile}"] < 0, f"q{quantile}"] = 0

            return prediction.values
[11]:
train_data = initial_df[["WindSpeed", "SolarDownwardRadiation", "total_generation_MWh"]].dropna()
X = {
    "solar_forecast": train_data["SolarDownwardRadiation"].values,
    "wind_forecast": train_data["WindSpeed"].values
}
y = {
    "total_generation_MWh": train_data["total_generation_MWh"].values
}
[12]:
predictor = Predictor()
#predictor.train(X, y)
#predictor.save_model("models")
predictor.load_model("models")

[13]:
y_true = y["total_generation_MWh"].reshape(-1,1)
y_preds = predictor.predict(X)

pinball = PinballLoss(quantiles=np.arange(0.1,1,0.1))
scores = pinball.score(y_true, y_preds)
scores
[13]:
32.092224429059414

Create Agent

[14]:
from enflow.problems.objectives import BaseScore

class MarketRevenue(BaseScore):

    def calculate(self, bid, production, day_ahead_price, single_system_price):
        revenue = bid * day_ahead_price + (production - bid) * (single_system_price - 0.07 * (production - bid))

        return revenue
[15]:
class Optimizer(ef.Optimizer):

    def optimize(self, prediction):
        bid = prediction[:,4].reshape(-1,1)

        return bid

optimizer = Optimizer()
[16]:
class Agent(ef.Agent):

    def __init__(self, predictor, optimizer):
        self.predictor = predictor
        self.optimizer = optimizer

    def load_predictor(self, path: str):
        self.predictor.load_model(path)

    def train(self, X, y):
        self.predictor.train(X, y)

    def act(self, state):
        prediction = self.predictor.predict(state)
        bid = self.optimizer.optimize(prediction)
        action = {"power_quantile_forecast": prediction, "power_bid": bid}

        return action
[17]:
initial_states = {
    "solar_forecast": initial_df["SolarDownwardRadiation"].values,
    "wind_forecast": initial_df["WindSpeed"].values
}
agent = Agent(predictor=predictor, optimizer=optimizer)
action = agent.act(initial_states)
[18]:
scorer = MarketRevenue()
revenue = scorer.calculate(bid=action["power_bid"],
                           production=initial_df[["total_generation_MWh"]].values,
                           day_ahead_price=initial_df[["DA_Price"]].values,
                           single_system_price=initial_df[["SS_Price"]].values)
np.nansum(revenue)
[18]:
2534482077.9752154
[29]:
for i in range(9):
    initial_df.loc[:, f"Predictions_q{10*(i+1)}"] = action["power_quantile_forecast"][:,[i]]
initial_df.loc[:, "Bids"] = action["power_bid"]
initial_df.loc[:, "Revenue"] = revenue
initial_df.index = initial_df.index.droplevel(0)
[32]:
initial_df
[32]:
WindSpeed SolarDownwardRadiation MIP Solar_MW Solar_capacity_mwp Solar_installedcapacity_mwp Wind_MW SS_Price boa_MWh DA_Price ... Predictions_q20 Predictions_q30 Predictions_q40 Predictions_q50 Predictions_q60 Predictions_q70 Predictions_q80 Predictions_q90 Bids Revenue
valid_datetime
2020-09-20 23:00:00+00:00 5.825196 -0.001624 26.76 0.0 2108.444662 2206.067896 131.008 16.00000 0.0 33.30 ... 37.189591 52.602142 66.968993 80.419721 95.607469 113.514992 138.769492 185.531116 80.419721 2423.751669
2020-09-20 23:30:00+00:00 5.601748 -0.004990 24.87 0.0 2108.444662 2206.067896 113.630 24.87000 0.0 33.30 ... 31.212647 45.440970 58.946516 71.895678 86.142042 102.906343 126.432197 171.951934 71.895678 2003.149738
2020-09-21 00:00:00+00:00 5.378300 -0.008356 26.20 0.0 2108.444662 2206.067896 95.558 47.30000 0.0 31.02 ... 25.626327 38.636333 51.282054 63.798042 77.194467 92.910992 114.863054 159.342024 63.798042 1203.351890
2020-09-21 00:30:00+00:00 5.161918 0.000241 29.27 0.0 2108.444662 2206.067896 75.574 29.27000 0.0 31.02 ... 20.655625 32.460931 44.278910 56.398887 69.080706 83.845629 104.449515 147.932062 56.398887 1180.475379
2020-09-21 01:00:00+00:00 4.945536 0.008838 29.66 0.0 2108.444662 2206.067896 49.818 29.66000 0.0 30.03 ... 16.180092 26.774801 37.749784 49.468942 61.474223 75.382278 94.782600 137.453701 49.468942 714.881095
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2023-10-15 20:30:00+00:00 10.973990 -0.006017 127.97 0.0 2332.037710 2487.180474 594.422 35.00000 0.0 105.46 ... 336.709802 384.475883 419.608531 447.791603 473.342823 496.363486 522.909066 553.146818 447.791603 40366.565067
2023-10-15 21:00:00+00:00 10.713284 -0.012358 131.99 0.0 2332.037710 2487.180474 578.362 188.33001 0.0 105.25 ... 314.445034 361.188391 396.432284 425.918531 453.224644 478.242510 508.093521 543.801611 425.918531 17767.344153
2023-10-15 21:30:00+00:00 10.453822 -0.006461 99.17 0.0 2332.037710 2487.180474 590.122 170.00000 0.0 105.25 ... 291.533937 336.649465 371.658430 402.265137 431.315782 458.298024 491.614456 532.930841 402.265137 23309.211453
2023-10-15 22:00:00+00:00 10.194361 -0.000563 110.82 0.0 2332.037710 2487.180474 524.642 195.00000 0.0 103.25 ... 268.265598 311.287853 345.746718 377.272451 407.921540 436.819809 473.585098 520.728865 377.272451 15612.879041
2023-10-15 22:30:00+00:00 9.863417 0.000624 102.93 0.0 2332.037710 2487.180474 468.762 195.00000 0.0 103.25 ... 238.685050 278.568551 311.947690 344.293837 376.674832 407.816720 448.733842 503.187818 344.293837 13269.677237

53040 rows Γ— 25 columns

[51]:
fig = make_subplots(rows=4, cols=1, shared_xaxes=True, vertical_spacing=0.02)

# Add traces
fig.add_trace(go.Scatter(x=initial_df.index, y=initial_df["total_generation_MWh"], mode='lines', name='Total Generation'), row=1, col=1)
fig.add_trace(go.Scatter(x=initial_df.index, y=initial_df["Predictions_q50"], mode='lines', name='Prediction q50'), row=1, col=1)
fig.add_trace(go.Scatter(x=initial_df.index, y=initial_df["DA_Price"], mode='lines', name='DA Prices'), row=2, col=1)
fig.add_trace(go.Scatter(x=initial_df.index, y=initial_df["SS_Price"], mode='lines', name='IM Prices'), row=2, col=1)
fig.add_trace(go.Scatter(x=initial_df.index, y=initial_df["Revenue"], mode='lines', name='Revenue'), row=3, col=1)
fig.add_trace(go.Scatter(x=initial_df.index, y=initial_df["Revenue"].cumsum()/initial_df["total_generation_MWh"].cumsum(), mode='lines', name='Cumulative Revenue'), row=4, col=1)

fig.update_layout(
    xaxis4=dict(
        rangeslider=dict(visible=True),
        type='date'
    ),
    height=500,
    legend=dict(orientation='h', x=0, y=1.1, xanchor='left', yanchor='bottom')
)
fig.show()