Skip to content

Commit

Permalink
Merge pull request #254 from openclimatefix/pvnet_multiple_nwp
Browse files Browse the repository at this point in the history
Allow use of multiple NWPs
  • Loading branch information
dfulu authored Dec 12, 2023
2 parents 6b597d1 + ddb485e commit 4fa9d66
Show file tree
Hide file tree
Showing 95 changed files with 2,364 additions and 2,529 deletions.
1 change: 0 additions & 1 deletion ocf_datapipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import ocf_datapipes.batch
import ocf_datapipes.convert
import ocf_datapipes.experimental
import ocf_datapipes.fake
import ocf_datapipes.load
import ocf_datapipes.select
import ocf_datapipes.transform
Expand Down
8 changes: 8 additions & 0 deletions ocf_datapipes/batch/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
"""Datapipes for batching together data"""
from .merge_numpy_examples_to_batch import (
MergeNumpyBatchIterDataPipe as MergeNumpyBatch,
)
from .merge_numpy_examples_to_batch import (
MergeNumpyExamplesToBatchIterDataPipe as MergeNumpyExamplesToBatch,
)
from .merge_numpy_examples_to_batch import (
stack_np_examples_into_batch,
unstack_np_batch_into_examples,
)
from .merge_numpy_modalities import MergeNumpyModalitiesIterDataPipe as MergeNumpyModalities
from .merge_numpy_modalities import MergeNWPNumpyModalitiesIterDataPipe as MergeNWPNumpyModalities
30 changes: 21 additions & 9 deletions ocf_datapipes/batch/fake/fake_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,17 @@
from ocf_datapipes.utils.utils import datetime64_to_float


def make_fake_batch(configuration: Configuration, to_torch: Optional[bool] = False) -> dict:
def make_fake_batch(
configuration: Configuration,
batch_size: int = 8,
to_torch: Optional[bool] = False,
) -> dict:
"""
Make a random fake batch, this is useful for models that use this object
Args:
configuration: a configuration file
batch_size: the batch size
to_torch: optional if we return the batch with torch.Tensor
Returns: dictionary containing the batch
Expand All @@ -35,24 +40,30 @@ def make_fake_batch(configuration: Configuration, to_torch: Optional[bool] = Fal
t0_datetime_utc = t0_datetime_utc.replace(microsecond=0)

# make fake PV data
batch_pv = make_fake_pv_data(configuration=configuration, t0_datetime_utc=t0_datetime_utc)
batch_pv = make_fake_pv_data(configuration, t0_datetime_utc, batch_size)

# make NWP data
batch_nwp = make_fake_nwp_data(configuration=configuration, t0_datetime_utc=t0_datetime_utc)
batch_nwp = make_fake_nwp_data(configuration, t0_datetime_utc, batch_size)

# make GSP data
batch_gsp = make_fake_gsp_data(configuration=configuration, t0_datetime_utc=t0_datetime_utc)
batch_gsp = make_fake_gsp_data(configuration, t0_datetime_utc, batch_size)

# make hrv and normal satellite data
batch_satellite = make_fake_satellite_data(
configuration=configuration, t0_datetime_utc=t0_datetime_utc, is_hrv=False
configuration,
t0_datetime_utc,
is_hrv=False,
batch_size=batch_size,
)
batch_hrv_satellite = make_fake_satellite_data(
configuration=configuration, t0_datetime_utc=t0_datetime_utc, is_hrv=True
configuration,
t0_datetime_utc,
is_hrv=True,
batch_size=batch_size,
)

# make sun features
batch_sun = make_fake_sun_data(configuration=configuration)
batch_sun = make_fake_sun_data(configuration, batch_size)

batch = {
**batch_pv,
Expand All @@ -76,19 +87,20 @@ def make_fake_batch(configuration: Configuration, to_torch: Optional[bool] = Fal
return batch


def fake_data_pipeline(configuration: Union[str, Configuration]):
def fake_data_pipeline(configuration: Union[str, Configuration], batch_size: int = 8):
"""
Make a fake data pipeline
Args:
configuration: a configuration file
batch_size: Integer batch size to create
"""

if isinstance(configuration, str):
configuration = load_yaml_configuration(configuration)

batch = make_fake_batch(configuration=configuration, to_torch=True)
batch = make_fake_batch(configuration=configuration, to_torch=True, batch_size=batch_size)

def fake_iter():
while True:
Expand Down
6 changes: 4 additions & 2 deletions ocf_datapipes/batch/fake/gsp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@
from ocf_datapipes.utils.consts import BatchKey


def make_fake_gsp_data(configuration: Configuration, t0_datetime_utc: datetime) -> dict:
def make_fake_gsp_data(
configuration: Configuration, t0_datetime_utc: datetime, batch_size: int = 8
) -> dict:
"""
Make Fake GSP data ready for ML model
Args:
configuration: configuration object
t0_datetime_utc: one datetime for when t0 is
batch_size: Integer batch size to create
Returns: dictionary of gsp items
"""
Expand All @@ -24,7 +27,6 @@ def make_fake_gsp_data(configuration: Configuration, t0_datetime_utc: datetime)
if gsp_config is None:
return {}

batch_size = configuration.process.batch_size
n_gsps = gsp_config.n_gsp_per_example
n_fourier_features = 8

Expand Down
108 changes: 63 additions & 45 deletions ocf_datapipes/batch/fake/nwp.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,64 +5,82 @@

from ocf_datapipes.batch.fake.utils import get_n_time_steps_from_config, make_time_utc
from ocf_datapipes.config.model import Configuration
from ocf_datapipes.utils.consts import BatchKey
from ocf_datapipes.utils.consts import BatchKey, NWPBatchKey


def make_fake_nwp_data(configuration: Configuration, t0_datetime_utc: datetime):
def make_fake_nwp_data(
configuration: Configuration, t0_datetime_utc: datetime, batch_size: int = 8
) -> dict:
"""
Make Fake NWP data ready for ML model
Args:
configuration: configuration object
t0_datetime_utc: one datetime for when t0 is
batch_size: Integer batch size to create
Returns: dictionary of nwp items
"""

nwp_config = configuration.input_data.nwp
if nwp_config is None:

if configuration.input_data.nwp is None:
return {}

batch_size = configuration.process.batch_size
n_channels = len(nwp_config.nwp_channels)
n_y_osgb = nwp_config.nwp_image_size_pixels_height
n_x_osgb = nwp_config.nwp_image_size_pixels_width
n_fourier_features = 8

# make time matrix
time_utc = make_time_utc(
batch_size=batch_size,
history_minutes=nwp_config.history_minutes,
forecast_minutes=nwp_config.forecast_minutes,
t0_datetime_utc=t0_datetime_utc,
time_resolution_minutes=nwp_config.time_resolution_minutes,
)
n_times = time_utc.shape[1]

# main nwp components
batch = {}
batch[BatchKey.nwp_init_time_utc] = time_utc # Seconds since UNIX epoch (1970-01-01).
batch[BatchKey.nwp_target_time_utc] = time_utc # Seconds since UNIX epoch (1970-01-01).
batch[BatchKey.nwp] = np.random.random((batch_size, n_times, n_channels, n_y_osgb, n_x_osgb))
batch[BatchKey.nwp_t0_idx] = get_n_time_steps_from_config(
input_data_configuration=nwp_config, include_forecast=False
)

batch[BatchKey.nwp_step] = np.random.randint(0, 100, (batch_size, n_times))
batch[BatchKey.nwp_y_osgb] = np.random.randint(0, 100, (batch_size, n_y_osgb))
batch[BatchKey.nwp_x_osgb] = np.random.randint(0, 100, (batch_size, n_x_osgb))
batch[BatchKey.nwp_channel_names] = np.random.randint(0, 100, (n_channels,))

# fourier components
batch[BatchKey.nwp_x_osgb_fourier] = np.random.random(
(batch_size, n_x_osgb, n_fourier_features)
)
batch[BatchKey.nwp_y_osgb_fourier] = np.random.random(
(batch_size, n_y_osgb, n_fourier_features)
)
batch[BatchKey.nwp_target_time_utc] = np.random.random(
(batch_size, n_times, n_fourier_features)
)
batch[BatchKey.nwp_init_time_utc] = np.random.random((batch_size, n_times, n_fourier_features))

return batch

for nwp_source, nwp_config in configuration.input_data.nwp.items():
source_batch = {}

n_channels = len(nwp_config.nwp_channels)
n_y_osgb = nwp_config.nwp_image_size_pixels_height
n_x_osgb = nwp_config.nwp_image_size_pixels_width
n_fourier_features = 8

# make time matrix
time_utc = make_time_utc(
batch_size=batch_size,
history_minutes=nwp_config.history_minutes,
forecast_minutes=nwp_config.forecast_minutes,
t0_datetime_utc=t0_datetime_utc,
time_resolution_minutes=nwp_config.time_resolution_minutes,
)
n_times = time_utc.shape[1]

# main nwp components

source_batch[
NWPBatchKey.nwp_init_time_utc
] = time_utc # Seconds since UNIX epoch (1970-01-01).
source_batch[
NWPBatchKey.nwp_target_time_utc
] = time_utc # Seconds since UNIX epoch (1970-01-01).
source_batch[NWPBatchKey.nwp] = np.random.random(
(batch_size, n_times, n_channels, n_y_osgb, n_x_osgb)
)
source_batch[NWPBatchKey.nwp_t0_idx] = get_n_time_steps_from_config(
input_data_configuration=nwp_config, include_forecast=False
)

source_batch[NWPBatchKey.nwp_step] = np.random.randint(0, 100, (batch_size, n_times))
source_batch[NWPBatchKey.nwp_y_osgb] = np.random.randint(0, 100, (batch_size, n_y_osgb))
source_batch[NWPBatchKey.nwp_x_osgb] = np.random.randint(0, 100, (batch_size, n_x_osgb))
source_batch[NWPBatchKey.nwp_channel_names] = np.random.randint(0, 100, (n_channels,))

# fourier components
source_batch[NWPBatchKey.nwp_x_osgb_fourier] = np.random.random(
(batch_size, n_x_osgb, n_fourier_features)
)
source_batch[NWPBatchKey.nwp_y_osgb_fourier] = np.random.random(
(batch_size, n_y_osgb, n_fourier_features)
)
source_batch[NWPBatchKey.nwp_target_time_utc] = np.random.random(
(batch_size, n_times, n_fourier_features)
)
source_batch[NWPBatchKey.nwp_init_time_utc] = np.random.random(
(batch_size, n_times, n_fourier_features)
)

batch[nwp_source] = source_batch

return {BatchKey.nwp: batch}
6 changes: 4 additions & 2 deletions ocf_datapipes/batch/fake/pv.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@
from ocf_datapipes.utils.consts import BatchKey


def make_fake_pv_data(configuration: Configuration, t0_datetime_utc: datetime):
def make_fake_pv_data(
configuration: Configuration, t0_datetime_utc: datetime, batch_size: int = 8
) -> dict:
"""
Make Fake PV data ready for ML model
Args:
configuration: configuration object
t0_datetime_utc: one datetime for when t0 is
batch_size: Integer batch size to create
Returns: dictionary of pv items
"""
Expand All @@ -23,7 +26,6 @@ def make_fake_pv_data(configuration: Configuration, t0_datetime_utc: datetime):
if pv_config is None:
return {}

batch_size = configuration.process.batch_size
n_pv_systems = pv_config.n_pv_systems_per_example
n_fourier_features = 8

Expand Down
9 changes: 6 additions & 3 deletions ocf_datapipes/batch/fake/satellite.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,19 @@


def make_fake_satellite_data(
configuration: Configuration, t0_datetime_utc: datetime, is_hrv: bool = False
):
configuration: Configuration,
t0_datetime_utc: datetime,
is_hrv: bool = False,
batch_size: int = 8,
) -> dict:
"""
Make Fake Satellite data ready for ML model. This makes data across all different data inputs
Args:
configuration: configuration object
t0_datetime_utc: one datetime for when t0 is
is_hrv: option if its hrv or not
batch_size: Integer batch size to create
Returns: dictionary of satellite items
"""
Expand All @@ -34,7 +38,6 @@ def make_fake_satellite_data(
if satellite_config is None:
return {}

batch_size = configuration.process.batch_size
n_channels = len(getattr(satellite_config, f"{variable}_channels"))
height = getattr(satellite_config, f"{variable}_image_size_pixels_height")
width = getattr(satellite_config, f"{variable}_image_size_pixels_width")
Expand Down
25 changes: 15 additions & 10 deletions ocf_datapipes/batch/fake/sun.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,22 @@

from ocf_datapipes.batch.fake.utils import get_n_time_steps_from_config
from ocf_datapipes.config.model import Configuration
from ocf_datapipes.utils.consts import BatchKey
from ocf_datapipes.utils.consts import BatchKey, NWPBatchKey


def make_fake_sun_data(configuration: Configuration):
def make_fake_sun_data(configuration: Configuration, batch_size: int = 8):
"""
Make Fake Sun data ready for ML model. This makes data across all different data inputs
Args:
configuration: configuration object
t0_datetime_utc: one datetime for when t0 is
batch_size: Integer batch size to create
Returns: dictionary of pv items
"""

batch = {}
batch_size = configuration.process.batch_size

# HRV Satellite
if configuration.input_data.hrvsatellite is not None:
Expand Down Expand Up @@ -56,12 +56,17 @@ def make_fake_sun_data(configuration: Configuration):

# NWP
if configuration.input_data.nwp is not None:
n_nwp_timesteps = get_n_time_steps_from_config(configuration.input_data.nwp)
batch[BatchKey.nwp_target_time_solar_azimuth] = np.random.random(
(batch_size, n_nwp_timesteps)
)
batch[BatchKey.nwp_target_time_solar_elevation] = np.random.random(
(batch_size, n_nwp_timesteps)
)
batch[BatchKey.nwp] = {}

for nwp_source, nwp_config in configuration.input_data.nwp.items():
batch[BatchKey.nwp][nwp_source] = {}

n_nwp_timesteps = get_n_time_steps_from_config(configuration.input_data.nwp[nwp_source])
batch[BatchKey.nwp][nwp_source][
NWPBatchKey.nwp_target_time_solar_azimuth
] = np.random.random((batch_size, n_nwp_timesteps))
batch[BatchKey.nwp][nwp_source][
NWPBatchKey.nwp_target_time_solar_elevation
] = np.random.random((batch_size, n_nwp_timesteps))

return batch
1 change: 0 additions & 1 deletion ocf_datapipes/batch/merge_batchml.py

This file was deleted.

Loading

0 comments on commit 4fa9d66

Please sign in to comment.