openclimatefix
diff --git a/‎notebooks/2021-08/2021-08-25/video.py
-2 b/‎notebooks/2021-08/2021-08-25/video.py
-2
diff --git a/‎notebooks/2021-08/2021-08-26/video.py
-3 b/‎notebooks/2021-08/2021-08-26/video.py
-3
diff --git a/‎notebooks/2021-09/2021-09-06/gsp.py
+1-1 b/‎notebooks/2021-09/2021-09-06/gsp.py
+1-1
diff --git a/‎notebooks/2021-09/2021-09-07/gsp.py
+1-1 b/‎notebooks/2021-09/2021-09-07/gsp.py
+1-1
diff --git a/‎notebooks/2021-09/2021-09-08/dan.py
-1 b/‎notebooks/2021-09/2021-09-08/dan.py
-1
diff --git a/‎notebooks/2021-09/2021-09-13/remove_hash.py
+1-2 b/‎notebooks/2021-09/2021-09-13/remove_hash.py
+1-2
diff --git a/‎notebooks/2021-09/2021-09-14/gsp_centroid.py
-2 b/‎notebooks/2021-09/2021-09-14/gsp_centroid.py
-2
diff --git a/‎notebooks/2021-09/2021-09-14/gsp_duplicated.py
-2 b/‎notebooks/2021-09/2021-09-14/gsp_duplicated.py
-2
diff --git a/‎notebooks/2021-09/2021-09-28/get_raw_sun_data.py
-1 b/‎notebooks/2021-09/2021-09-28/get_raw_sun_data.py
-1
diff --git a/‎notebooks/2021-09/2021-09-29/gsp_duplicated.py
-2 b/‎notebooks/2021-09/2021-09-29/gsp_duplicated.py
-2
diff --git a/‎notebooks/2021-09/2021-09-29/video.py
+3-5 b/‎notebooks/2021-09/2021-09-29/video.py
+3-5
diff --git a/‎notebooks/2021-10/2021-10-01/pydantic.py
+1-3 b/‎notebooks/2021-10/2021-10-01/pydantic.py
+1-3
diff --git a/‎notebooks/2021-10/2021-10-08/xr_compression.py
+3-4 b/‎notebooks/2021-10/2021-10-08/xr_compression.py
+3-4
diff --git a/‎notebooks/2021-10/2021-10-08/xr_pydantic.py
+1-4 b/‎notebooks/2021-10/2021-10-08/xr_pydantic.py
+1-4
diff --git a/‎nowcasting_dataset/data_sources/data_source.py
-1 b/‎nowcasting_dataset/data_sources/data_source.py
-1
diff --git a/‎nowcasting_dataset/data_sources/fake/batch.py
-1 b/‎nowcasting_dataset/data_sources/fake/batch.py
-1
diff --git a/‎nowcasting_dataset/data_sources/gsp/eso.py
-1 b/‎nowcasting_dataset/data_sources/gsp/eso.py
-1
diff --git a/‎nowcasting_dataset/data_sources/gsp/gsp_data_source.py
-5 b/‎nowcasting_dataset/data_sources/gsp/gsp_data_source.py
-5
diff --git a/‎nowcasting_dataset/data_sources/gsp/pvlive.py
-1 b/‎nowcasting_dataset/data_sources/gsp/pvlive.py
-1
diff --git a/‎nowcasting_dataset/data_sources/metadata/metadata_model.py
-1 b/‎nowcasting_dataset/data_sources/metadata/metadata_model.py
-1
diff --git a/‎nowcasting_dataset/data_sources/pv/live.py
-2 b/‎nowcasting_dataset/data_sources/pv/live.py
-2
diff --git a/‎nowcasting_dataset/data_sources/pv/pv_data_source.py
+1-2 b/‎nowcasting_dataset/data_sources/pv/pv_data_source.py
+1-2
diff --git a/‎nowcasting_dataset/data_sources/sun/raw_data_load_save.py
-3 b/‎nowcasting_dataset/data_sources/sun/raw_data_load_save.py
-3
diff --git a/‎nowcasting_dataset/data_sources/sun/sun_data_source.py
-3 b/‎nowcasting_dataset/data_sources/sun/sun_data_source.py
-3
@@ -3,7 +3,6 @@
 import cv2
 import numpy as np
 import pandas as pd
-import plotly
 import plotly.graph_objects as go
 
 from nowcasting_dataset.dataset import SAT_MEAN, SAT_STD, NetCDFDataset
@@ -69,7 +68,6 @@
 channel_indexes = [1, 8, 9]
 satellite_data = []
 for channel_index in channel_indexes:
-
     # renormalize
     satellite_data.append(
         data["sat_data"][batch_index, :, :, :, channel_index] * SAT_STD.values[channel_index]
 
@@ -3,8 +3,6 @@
 import cv2
 import numpy as np
 import pandas as pd
-import plotly
-import plotly.express as px
 import plotly.graph_objects as go
 
 # get satellite image, currently from https://github.com/openclimatefix/py-staticmaps
@@ -90,7 +88,6 @@
 channel_indexes = [1, 9, 8]
 satellite_data = []
 for channel_index in channel_indexes:
-
     # renormalize
     satellite_data.append(
         data["sat_data"][batch_index, :, :, :, channel_index] * SAT_STD.values[channel_index]
 
@@ -36,7 +36,7 @@
 
 
 # find out if point is in gsp
-from shapely.geometry import Point, Polygon
+from shapely.geometry import Point
 
 _pnts = [Point(3, 3), Point(8, 8), Point(0, 51.38)]
 pnts = gpd.GeoDataFrame(geometry=_pnts, index=["A", "B", "C"])
 
@@ -38,7 +38,7 @@
 
 
 # find out if point is in gsp
-from shapely.geometry import Point, Polygon
+from shapely.geometry import Point
 
 _pnts = [Point(3, 3), Point(8, 8), Point(0, 51.38)]
 pnts = gpd.GeoDataFrame(geometry=_pnts, index=["A", "B", "C"])
 
@@ -1,4 +1,3 @@
-import gcsfs
 import xarray as xr
 
 filename = "gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr/"
 
@@ -20,7 +20,6 @@
 
 for filenames in [train_filenames, validation_filenames]:
     for file in train_filenames:
-
         print(file)
 
         filename = file.split("/")[-1]
@@ -30,7 +29,7 @@
 
             try:
                 rename_file(remote_file=file, new_filename=new_filename)
-            except Exception as e:
+            except Exception:
                 pass
         else:
             print(f"Skipping {filename}")
@@ -4,7 +4,6 @@
 
 from nowcasting_dataset.data_sources.gsp.eso import (
     get_gsp_metadata_from_eso,
-    get_gsp_shape_from_eso,
 )
 from nowcasting_dataset.geospatial import WGS84_CRS
 
@@ -16,7 +15,6 @@
 
 # for index in range(0, len(shape_data_raw)):
 for index in range(140, 150):
-
     # just select the first one
     shape_data = shape_data_raw.iloc[index : index + 1]
     shapes_dict = json.loads(shape_data["geometry"].to_json())
 
@@ -3,7 +3,6 @@
 import plotly.graph_objects as go
 
 from nowcasting_dataset.data_sources.gsp.eso import (
-    get_gsp_metadata_from_eso,
     get_gsp_shape_from_eso,
 )
 
@@ -13,7 +12,6 @@
 duplicated_raw["Amount"] = range(0, len(duplicated_raw))
 
 for i in range(0, 8, 2):
-
     # just select the first one
     duplicated = duplicated_raw.iloc[i : i + 2]
     shapes_dict = json.loads(duplicated["geometry"].to_json())
 
@@ -23,7 +23,6 @@
 from nowcasting_dataset.data_sources.gsp.eso import get_gsp_metadata_from_eso
 from nowcasting_dataset.data_sources.sun.raw_data_load_save import (
     get_azimuth_and_elevation,
-    save_to_zarr,
 )
 
 # set up
 
@@ -3,7 +3,6 @@
 import plotly.graph_objects as go
 
 from nowcasting_dataset.data_sources.gsp.eso import (
-    get_gsp_metadata_from_eso,
     get_gsp_shape_from_eso,
 )
 from nowcasting_dataset.geospatial import WGS84_CRS
@@ -15,7 +14,6 @@
 duplicated_raw["Amount"] = range(0, len(duplicated_raw))
 
 for i in range(0, 8, 2):
-
     # just select the first one
     duplicated = duplicated_raw.iloc[i : i + 2]
     shapes_dict = json.loads(duplicated["geometry"].to_json())
 
@@ -41,7 +41,6 @@
 
 
 def get_trace(dt):
-
     # plot to check it looks right
     return go.Choroplethmapbox(
         geojson=shapes_dict,
@@ -54,7 +53,6 @@ def get_trace(dt):
 
 
 def get_frame(dt):
-
     # plot to check it looks right
     return go.Choroplethmapbox(
         z=gps_data[dt],
@@ -71,8 +69,8 @@ def get_frame(dt):
 fig.update_layout(title="Midday")
 
 # fig.show(renderer="browser")
-fig.write_html(f"midday_fix.html")
-fig.write_image(f"midday_fix.png")
+fig.write_html("midday_fix.html")
+fig.write_image("midday_fix.png")
 
 # make annimation
 frames = []
@@ -147,4 +145,4 @@ def get_frame(dt):
 )
 
 fig.show(renderer="browser")
-fig.write_html(f"video.html")
+fig.write_html("video.html")
@@ -3,15 +3,14 @@
 import numpy as np
 import torch
 import xarray as xr
-from pydantic import BaseModel, Field, validator
+from pydantic import BaseModel, Field
 
 from nowcasting_dataset.config.model import Configuration
 
 Array = Union[xr.DataArray, np.ndarray, torch.Tensor]
 
 
 class Satellite(BaseModel):
-
     # width: int = Field(..., g=0, description="The width of the satellite image")
     # height: int = Field(..., g=0, description="The width of the satellite image")
     # num_channels: int = Field(..., g=0, description="The width of the satellite image")
@@ -49,7 +48,6 @@ class Config:
 
 
 class Batch(BaseModel):
-
     batch_size: int = Field(
         ...,
         g=0,
 
@@ -9,7 +9,6 @@
 def get_satellite_xrarray_data_array(
     batch_size, seq_length_5, satellite_image_size_pixels, number_sat_channels=10
 ):
-
     r = np.random.randn(
         # self.batch_size,
         seq_length_5,
@@ -53,9 +52,9 @@ def sat_data_array_to_dataset(sat_xr):
         ds = coord_to_range(ds, dim, prefix="sat")
     ds = ds.rename(
         {
-            "channels": f"sat_channels",
-            "x": f"sat_x",
-            "y": f"sat_y",
+            "channels": "sat_channels",
+            "x": "sat_x",
+            "y": "sat_y",
         }
     )
 
 
@@ -1,12 +1,10 @@
-from typing import List, Union
+from typing import Union
 
 import numpy as np
 import torch
 import xarray as xr
 from pydantic import BaseModel, Field, validator
 
-from nowcasting_dataset.config.model import Configuration
-
 Array = Union[xr.DataArray, np.ndarray, torch.Tensor]
 
 
@@ -27,7 +25,6 @@ def v_image_data(cls, v):
 
 
 class Batch(BaseModel):
-
     batch_size: int = 0
     satellite: Satellite
 
 
@@ -82,7 +82,6 @@ def __post_init__(self):
     def _get_start_dt(
         self, t0_datetime_utc: Union[pd.Timestamp, pd.DatetimeIndex]
     ) -> Union[pd.Timestamp, pd.DatetimeIndex]:
-
         return t0_datetime_utc - self.history_duration
 
     def _get_end_dt(
 
@@ -504,7 +504,6 @@ def topographic_fake(
     # make batch of arrays
     xr_arrays = []
     for i in range(batch_size):
-
         x, y = make_image_coords_osgb(
             size_x=image_size_pixels_width,
             size_y=image_size_pixels_height,
 
@@ -164,7 +164,6 @@ def get_gsp_shape_from_eso(
             shape_gpd["RegionID"] = range(1, len(shape_gpd) + 1)
 
     if save_local_file:
-
         # rename the columns to less than 10 characters
         shape_gpd_to_save = shape_gpd.copy()
         shape_gpd_to_save.rename(columns=rename_save_columns, inplace=True)
 
@@ -173,7 +173,6 @@ def get_all_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTim
         if total_gsp_nan_count > 0:
             assert Exception("There are nans in the GSP data. Can't get locations for all GSPs")
         else:
-
             t0_datetimes_utc.name = "t0_datetime_utc"
 
             # get all locations
@@ -236,7 +235,6 @@ def get_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTimeLoc
 
         total_gsp_nan_count = self.gsp_power.isna().sum().sum()
         if total_gsp_nan_count == 0:
-
             # get random GSP metadata
             indexes = sorted(
                 list(self.rng.integers(low=0, high=len(self.metadata), size=len(t0_datetimes_utc)))
@@ -249,7 +247,6 @@ def get_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTimeLoc
             ids = list(metadata.index)
 
         else:
-
             logger.warning(
                 "There are some nans in the gsp data, "
                 "so to get x,y locations we have to do a big loop"
@@ -262,7 +259,6 @@ def get_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTimeLoc
             ids = []
 
             for t0_dt in t0_datetimes_utc:
-
                 # Choose start and end times
                 start_dt = self._get_start_dt(t0_dt)
                 end_dt = self._get_end_dt(t0_dt)
@@ -290,7 +286,6 @@ def get_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTimeLoc
 
         locations = []
         for i in range(len(x_centers_osgb)):
-
             locations.append(
                 SpaceTimeLocation(
                     t0_datetime_utc=t0_datetimes_utc[i],
 
@@ -89,7 +89,6 @@ def load_pv_gsp_raw_data_from_pvlive(
     future_tasks = []
     with futures.ThreadPoolExecutor(max_workers=4) as executor:
         for gsp_id in gsp_ids:
-
             # set the first chunk start and end times
             start_chunk = first_start_chunk
             end_chunk = first_end_chunk
 
@@ -114,7 +114,6 @@ def save_to_csv(self, path):
             metadata_df = pd.DataFrame(metadata_dict)
 
         else:
-
             metadata_df = pd.read_csv(filename)
 
             metadata_df_extra = pd.DataFrame(metadata_dict)
 
@@ -43,7 +43,6 @@ def get_metadata_from_database(providers: List[str] = None) -> pd.DataFrame:
 
     pv_system_all_df = []
     for provider in providers:
-
         logger.debug(f"Get PV systems from database for {provider}")
 
         with db_connection.get_session() as session:
@@ -132,7 +131,6 @@ def get_pv_power_from_database(
         logger.debug(f"Found {len(pv_yields_df)} pv yields")
 
     if len(pv_yields_df) == 0:
-
         data = create_empty_pv_data(end_utc=now, providers=providers, start_utc=start_utc)
 
         return data
 
@@ -98,7 +98,6 @@ def get_data_model_for_batch():
         return PV
 
     def _load_metadata(self):
-
         logger.debug(f"Loading PV metadata from {self.files_groups}")
 
         # collect all metadata together
@@ -155,7 +154,6 @@ def _load_metadata(self):
         logger.debug(f"Found {len(pv_metadata)} pv systems")
 
     def _load_pv_power(self):
-
         logger.debug(f"Loading PV Power data from {self.files_groups}")
 
         if not self.is_live:
@@ -452,6 +450,7 @@ def get_locations(self, t0_datetimes_utc: pd.DatetimeIndex) -> List[SpaceTimeLoc
         Returns:  x_locations, y_locations. Each has one entry per t0_datetime.
             Locations are in OSGB coordinates.
         """
+
         # Set this up as a separate function, so we can cache the result!
         @functools.cache  # functools.cache requires Python >= 3.9
         def _get_pv_system_ids(t0_datetime: pd.Timestamp) -> pd.Int64Index:
 
@@ -49,16 +49,13 @@ def get_azimuth_and_elevation(
     names = []
     # loop over locations and find azimuth and elevation angles,
     with futures.ThreadPoolExecutor() as executor:
-
         logger.debug("Setting up jobs")
 
         # Submit tasks to the executor.
         future_azimuth_and_elevation_per_location = []
         for i in tqdm(range(len(x_centers))):
-
             name = x_y_to_name(x_centers[i], y_centers[i])
             if name not in names:
-
                 lat, lon = geospatial.osgb_to_lat_lon(x=x_centers[i], y=y_centers[i])
 
                 future_azimuth_and_elevation = executor.submit(
 
@@ -69,7 +69,6 @@ def get_example(self, location: SpaceTimeLocation) -> xr.Dataset:
         end_dt = self._get_end_dt(t0_datetime_utc)
 
         if not self.load_live:
-
             # The names of the columns get truncated when saving, therefore we need to look for the
             # name of the columns near the location we are looking for
             locations = np.array(
@@ -96,7 +95,6 @@ def get_example(self, location: SpaceTimeLocation) -> xr.Dataset:
             elevation = self.elevation.loc[start_dt:end_dt][name]
 
         else:
-
             latitude, longitude = osgb_to_lat_lon(x=x_center_osgb, y=y_center_osgb)
 
             datestamps = pd.date_range(start=start_dt, end=end_dt, freq="5T").tolist()
@@ -115,7 +113,6 @@ def get_example(self, location: SpaceTimeLocation) -> xr.Dataset:
         return sun
 
     def _load(self):
-
         logger.info(f"Loading Sun data from {self.zarr_path}")
 
         if not self.load_live:
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,3 @@`
`1`		`-import gcsfs`
`2`	`1`	`import xarray as xr`
`3`	`2`
`4`	`3`	`filename = "gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr/"`
Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,6 @@`
`23`	`23`	`from nowcasting_dataset.data_sources.gsp.eso import get_gsp_metadata_from_eso`
`24`	`24`	`from nowcasting_dataset.data_sources.sun.raw_data_load_save import (`
`25`	`25`	`get_azimuth_and_elevation,`
`26`		`- save_to_zarr,`
`27`	`26`	`)`
`28`	`27`
`29`	`28`	`# set up`
Original file line number	Diff line number	Diff line change
`@@ -9,7 +9,6 @@`
`9`	`9`	`def get_satellite_xrarray_data_array(`
`10`	`10`	`batch_size, seq_length_5, satellite_image_size_pixels, number_sat_channels=10`
`11`	`11`	`):`
`12`		`-`
`13`	`12`	`r = np.random.randn(`
`14`	`13`	`# self.batch_size,`
`15`	`14`	`seq_length_5,`
`@@ -53,9 +52,9 @@ def sat_data_array_to_dataset(sat_xr):`
`53`	`52`	`ds = coord_to_range(ds, dim, prefix="sat")`
`54`	`53`	`ds = ds.rename(`
`55`	`54`	`{`
`56`		`- "channels": f"sat_channels",`
`57`		`- "x": f"sat_x",`
`58`		`- "y": f"sat_y",`
	`55`	`+ "channels": "sat_channels",`
	`56`	`+ "x": "sat_x",`
	`57`	`+ "y": "sat_y",`
`59`	`58`	`}`
`60`	`59`	`)`
`61`	`60`