Skip to content
This repository was archived by the owner on Jun 2, 2025. It is now read-only.

Commit 4fa9d66

Browse files
authored
Merge pull request #254 from openclimatefix/pvnet_multiple_nwp
Allow use of multiple NWPs
2 parents 6b597d1 + ddb485e commit 4fa9d66

File tree

95 files changed

+2364
-2529
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

95 files changed

+2364
-2529
lines changed

ocf_datapipes/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import ocf_datapipes.batch
33
import ocf_datapipes.convert
44
import ocf_datapipes.experimental
5-
import ocf_datapipes.fake
65
import ocf_datapipes.load
76
import ocf_datapipes.select
87
import ocf_datapipes.transform

ocf_datapipes/batch/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
"""Datapipes for batching together data"""
2+
from .merge_numpy_examples_to_batch import (
3+
MergeNumpyBatchIterDataPipe as MergeNumpyBatch,
4+
)
25
from .merge_numpy_examples_to_batch import (
36
MergeNumpyExamplesToBatchIterDataPipe as MergeNumpyExamplesToBatch,
47
)
8+
from .merge_numpy_examples_to_batch import (
9+
stack_np_examples_into_batch,
10+
unstack_np_batch_into_examples,
11+
)
512
from .merge_numpy_modalities import MergeNumpyModalitiesIterDataPipe as MergeNumpyModalities
13+
from .merge_numpy_modalities import MergeNWPNumpyModalitiesIterDataPipe as MergeNWPNumpyModalities

ocf_datapipes/batch/fake/fake_batch.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,17 @@
1616
from ocf_datapipes.utils.utils import datetime64_to_float
1717

1818

19-
def make_fake_batch(configuration: Configuration, to_torch: Optional[bool] = False) -> dict:
19+
def make_fake_batch(
20+
configuration: Configuration,
21+
batch_size: int = 8,
22+
to_torch: Optional[bool] = False,
23+
) -> dict:
2024
"""
2125
Make a random fake batch, this is useful for models that use this object
2226
2327
Args:
2428
configuration: a configuration file
29+
batch_size: the batch size
2530
to_torch: optional if we return the batch with torch.Tensor
2631
2732
Returns: dictionary containing the batch
@@ -35,24 +40,30 @@ def make_fake_batch(configuration: Configuration, to_torch: Optional[bool] = Fal
3540
t0_datetime_utc = t0_datetime_utc.replace(microsecond=0)
3641

3742
# make fake PV data
38-
batch_pv = make_fake_pv_data(configuration=configuration, t0_datetime_utc=t0_datetime_utc)
43+
batch_pv = make_fake_pv_data(configuration, t0_datetime_utc, batch_size)
3944

4045
# make NWP data
41-
batch_nwp = make_fake_nwp_data(configuration=configuration, t0_datetime_utc=t0_datetime_utc)
46+
batch_nwp = make_fake_nwp_data(configuration, t0_datetime_utc, batch_size)
4247

4348
# make GSP data
44-
batch_gsp = make_fake_gsp_data(configuration=configuration, t0_datetime_utc=t0_datetime_utc)
49+
batch_gsp = make_fake_gsp_data(configuration, t0_datetime_utc, batch_size)
4550

4651
# make hrv and normal satellite data
4752
batch_satellite = make_fake_satellite_data(
48-
configuration=configuration, t0_datetime_utc=t0_datetime_utc, is_hrv=False
53+
configuration,
54+
t0_datetime_utc,
55+
is_hrv=False,
56+
batch_size=batch_size,
4957
)
5058
batch_hrv_satellite = make_fake_satellite_data(
51-
configuration=configuration, t0_datetime_utc=t0_datetime_utc, is_hrv=True
59+
configuration,
60+
t0_datetime_utc,
61+
is_hrv=True,
62+
batch_size=batch_size,
5263
)
5364

5465
# make sun features
55-
batch_sun = make_fake_sun_data(configuration=configuration)
66+
batch_sun = make_fake_sun_data(configuration, batch_size)
5667

5768
batch = {
5869
**batch_pv,
@@ -76,19 +87,20 @@ def make_fake_batch(configuration: Configuration, to_torch: Optional[bool] = Fal
7687
return batch
7788

7889

79-
def fake_data_pipeline(configuration: Union[str, Configuration]):
90+
def fake_data_pipeline(configuration: Union[str, Configuration], batch_size: int = 8):
8091
"""
8192
Make a fake data pipeline
8293
8394
Args:
8495
configuration: a configuration file
96+
batch_size: Integer batch size to create
8597
8698
"""
8799

88100
if isinstance(configuration, str):
89101
configuration = load_yaml_configuration(configuration)
90102

91-
batch = make_fake_batch(configuration=configuration, to_torch=True)
103+
batch = make_fake_batch(configuration=configuration, to_torch=True, batch_size=batch_size)
92104

93105
def fake_iter():
94106
while True:

ocf_datapipes/batch/fake/gsp.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@
88
from ocf_datapipes.utils.consts import BatchKey
99

1010

11-
def make_fake_gsp_data(configuration: Configuration, t0_datetime_utc: datetime) -> dict:
11+
def make_fake_gsp_data(
12+
configuration: Configuration, t0_datetime_utc: datetime, batch_size: int = 8
13+
) -> dict:
1214
"""
1315
Make Fake GSP data ready for ML model
1416
1517
Args:
1618
configuration: configuration object
1719
t0_datetime_utc: one datetime for when t0 is
20+
batch_size: Integer batch size to create
1821
1922
Returns: dictionary of gsp items
2023
"""
@@ -24,7 +27,6 @@ def make_fake_gsp_data(configuration: Configuration, t0_datetime_utc: datetime)
2427
if gsp_config is None:
2528
return {}
2629

27-
batch_size = configuration.process.batch_size
2830
n_gsps = gsp_config.n_gsp_per_example
2931
n_fourier_features = 8
3032

ocf_datapipes/batch/fake/nwp.py

Lines changed: 63 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -5,64 +5,82 @@
55

66
from ocf_datapipes.batch.fake.utils import get_n_time_steps_from_config, make_time_utc
77
from ocf_datapipes.config.model import Configuration
8-
from ocf_datapipes.utils.consts import BatchKey
8+
from ocf_datapipes.utils.consts import BatchKey, NWPBatchKey
99

1010

11-
def make_fake_nwp_data(configuration: Configuration, t0_datetime_utc: datetime):
11+
def make_fake_nwp_data(
12+
configuration: Configuration, t0_datetime_utc: datetime, batch_size: int = 8
13+
) -> dict:
1214
"""
1315
Make Fake NWP data ready for ML model
1416
1517
Args:
1618
configuration: configuration object
1719
t0_datetime_utc: one datetime for when t0 is
20+
batch_size: Integer batch size to create
1821
1922
Returns: dictionary of nwp items
2023
"""
2124

2225
nwp_config = configuration.input_data.nwp
23-
if nwp_config is None:
26+
27+
if configuration.input_data.nwp is None:
2428
return {}
2529

26-
batch_size = configuration.process.batch_size
27-
n_channels = len(nwp_config.nwp_channels)
28-
n_y_osgb = nwp_config.nwp_image_size_pixels_height
29-
n_x_osgb = nwp_config.nwp_image_size_pixels_width
30-
n_fourier_features = 8
31-
32-
# make time matrix
33-
time_utc = make_time_utc(
34-
batch_size=batch_size,
35-
history_minutes=nwp_config.history_minutes,
36-
forecast_minutes=nwp_config.forecast_minutes,
37-
t0_datetime_utc=t0_datetime_utc,
38-
time_resolution_minutes=nwp_config.time_resolution_minutes,
39-
)
40-
n_times = time_utc.shape[1]
41-
42-
# main nwp components
4330
batch = {}
44-
batch[BatchKey.nwp_init_time_utc] = time_utc # Seconds since UNIX epoch (1970-01-01).
45-
batch[BatchKey.nwp_target_time_utc] = time_utc # Seconds since UNIX epoch (1970-01-01).
46-
batch[BatchKey.nwp] = np.random.random((batch_size, n_times, n_channels, n_y_osgb, n_x_osgb))
47-
batch[BatchKey.nwp_t0_idx] = get_n_time_steps_from_config(
48-
input_data_configuration=nwp_config, include_forecast=False
49-
)
50-
51-
batch[BatchKey.nwp_step] = np.random.randint(0, 100, (batch_size, n_times))
52-
batch[BatchKey.nwp_y_osgb] = np.random.randint(0, 100, (batch_size, n_y_osgb))
53-
batch[BatchKey.nwp_x_osgb] = np.random.randint(0, 100, (batch_size, n_x_osgb))
54-
batch[BatchKey.nwp_channel_names] = np.random.randint(0, 100, (n_channels,))
55-
56-
# fourier components
57-
batch[BatchKey.nwp_x_osgb_fourier] = np.random.random(
58-
(batch_size, n_x_osgb, n_fourier_features)
59-
)
60-
batch[BatchKey.nwp_y_osgb_fourier] = np.random.random(
61-
(batch_size, n_y_osgb, n_fourier_features)
62-
)
63-
batch[BatchKey.nwp_target_time_utc] = np.random.random(
64-
(batch_size, n_times, n_fourier_features)
65-
)
66-
batch[BatchKey.nwp_init_time_utc] = np.random.random((batch_size, n_times, n_fourier_features))
67-
68-
return batch
31+
32+
for nwp_source, nwp_config in configuration.input_data.nwp.items():
33+
source_batch = {}
34+
35+
n_channels = len(nwp_config.nwp_channels)
36+
n_y_osgb = nwp_config.nwp_image_size_pixels_height
37+
n_x_osgb = nwp_config.nwp_image_size_pixels_width
38+
n_fourier_features = 8
39+
40+
# make time matrix
41+
time_utc = make_time_utc(
42+
batch_size=batch_size,
43+
history_minutes=nwp_config.history_minutes,
44+
forecast_minutes=nwp_config.forecast_minutes,
45+
t0_datetime_utc=t0_datetime_utc,
46+
time_resolution_minutes=nwp_config.time_resolution_minutes,
47+
)
48+
n_times = time_utc.shape[1]
49+
50+
# main nwp components
51+
52+
source_batch[
53+
NWPBatchKey.nwp_init_time_utc
54+
] = time_utc # Seconds since UNIX epoch (1970-01-01).
55+
source_batch[
56+
NWPBatchKey.nwp_target_time_utc
57+
] = time_utc # Seconds since UNIX epoch (1970-01-01).
58+
source_batch[NWPBatchKey.nwp] = np.random.random(
59+
(batch_size, n_times, n_channels, n_y_osgb, n_x_osgb)
60+
)
61+
source_batch[NWPBatchKey.nwp_t0_idx] = get_n_time_steps_from_config(
62+
input_data_configuration=nwp_config, include_forecast=False
63+
)
64+
65+
source_batch[NWPBatchKey.nwp_step] = np.random.randint(0, 100, (batch_size, n_times))
66+
source_batch[NWPBatchKey.nwp_y_osgb] = np.random.randint(0, 100, (batch_size, n_y_osgb))
67+
source_batch[NWPBatchKey.nwp_x_osgb] = np.random.randint(0, 100, (batch_size, n_x_osgb))
68+
source_batch[NWPBatchKey.nwp_channel_names] = np.random.randint(0, 100, (n_channels,))
69+
70+
# fourier components
71+
source_batch[NWPBatchKey.nwp_x_osgb_fourier] = np.random.random(
72+
(batch_size, n_x_osgb, n_fourier_features)
73+
)
74+
source_batch[NWPBatchKey.nwp_y_osgb_fourier] = np.random.random(
75+
(batch_size, n_y_osgb, n_fourier_features)
76+
)
77+
source_batch[NWPBatchKey.nwp_target_time_utc] = np.random.random(
78+
(batch_size, n_times, n_fourier_features)
79+
)
80+
source_batch[NWPBatchKey.nwp_init_time_utc] = np.random.random(
81+
(batch_size, n_times, n_fourier_features)
82+
)
83+
84+
batch[nwp_source] = source_batch
85+
86+
return {BatchKey.nwp: batch}

ocf_datapipes/batch/fake/pv.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@
88
from ocf_datapipes.utils.consts import BatchKey
99

1010

11-
def make_fake_pv_data(configuration: Configuration, t0_datetime_utc: datetime):
11+
def make_fake_pv_data(
12+
configuration: Configuration, t0_datetime_utc: datetime, batch_size: int = 8
13+
) -> dict:
1214
"""
1315
Make Fake PV data ready for ML model
1416
1517
Args:
1618
configuration: configuration object
1719
t0_datetime_utc: one datetime for when t0 is
20+
batch_size: Integer batch size to create
1821
1922
Returns: dictionary of pv items
2023
"""
@@ -23,7 +26,6 @@ def make_fake_pv_data(configuration: Configuration, t0_datetime_utc: datetime):
2326
if pv_config is None:
2427
return {}
2528

26-
batch_size = configuration.process.batch_size
2729
n_pv_systems = pv_config.n_pv_systems_per_example
2830
n_fourier_features = 8
2931

ocf_datapipes/batch/fake/satellite.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,19 @@
99

1010

1111
def make_fake_satellite_data(
12-
configuration: Configuration, t0_datetime_utc: datetime, is_hrv: bool = False
13-
):
12+
configuration: Configuration,
13+
t0_datetime_utc: datetime,
14+
is_hrv: bool = False,
15+
batch_size: int = 8,
16+
) -> dict:
1417
"""
1518
Make Fake Satellite data ready for ML model. This makes data across all different data inputs
1619
1720
Args:
1821
configuration: configuration object
1922
t0_datetime_utc: one datetime for when t0 is
2023
is_hrv: option if its hrv or not
24+
batch_size: Integer batch size to create
2125
2226
Returns: dictionary of satellite items
2327
"""
@@ -34,7 +38,6 @@ def make_fake_satellite_data(
3438
if satellite_config is None:
3539
return {}
3640

37-
batch_size = configuration.process.batch_size
3841
n_channels = len(getattr(satellite_config, f"{variable}_channels"))
3942
height = getattr(satellite_config, f"{variable}_image_size_pixels_height")
4043
width = getattr(satellite_config, f"{variable}_image_size_pixels_width")

ocf_datapipes/batch/fake/sun.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,22 @@
33

44
from ocf_datapipes.batch.fake.utils import get_n_time_steps_from_config
55
from ocf_datapipes.config.model import Configuration
6-
from ocf_datapipes.utils.consts import BatchKey
6+
from ocf_datapipes.utils.consts import BatchKey, NWPBatchKey
77

88

9-
def make_fake_sun_data(configuration: Configuration):
9+
def make_fake_sun_data(configuration: Configuration, batch_size: int = 8):
1010
"""
1111
Make Fake Sun data ready for ML model. This makes data across all different data inputs
1212
1313
Args:
1414
configuration: configuration object
1515
t0_datetime_utc: one datetime for when t0 is
16+
batch_size: Integer batch size to create
1617
1718
Returns: dictionary of pv items
1819
"""
1920

2021
batch = {}
21-
batch_size = configuration.process.batch_size
2222

2323
# HRV Satellite
2424
if configuration.input_data.hrvsatellite is not None:
@@ -56,12 +56,17 @@ def make_fake_sun_data(configuration: Configuration):
5656

5757
# NWP
5858
if configuration.input_data.nwp is not None:
59-
n_nwp_timesteps = get_n_time_steps_from_config(configuration.input_data.nwp)
60-
batch[BatchKey.nwp_target_time_solar_azimuth] = np.random.random(
61-
(batch_size, n_nwp_timesteps)
62-
)
63-
batch[BatchKey.nwp_target_time_solar_elevation] = np.random.random(
64-
(batch_size, n_nwp_timesteps)
65-
)
59+
batch[BatchKey.nwp] = {}
60+
61+
for nwp_source, nwp_config in configuration.input_data.nwp.items():
62+
batch[BatchKey.nwp][nwp_source] = {}
63+
64+
n_nwp_timesteps = get_n_time_steps_from_config(configuration.input_data.nwp[nwp_source])
65+
batch[BatchKey.nwp][nwp_source][
66+
NWPBatchKey.nwp_target_time_solar_azimuth
67+
] = np.random.random((batch_size, n_nwp_timesteps))
68+
batch[BatchKey.nwp][nwp_source][
69+
NWPBatchKey.nwp_target_time_solar_elevation
70+
] = np.random.random((batch_size, n_nwp_timesteps))
6671

6772
return batch

ocf_datapipes/batch/merge_batchml.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

0 commit comments

Comments
 (0)