From 6962c1e64652e09e970fbf4be45ad3dce4980c91 Mon Sep 17 00:00:00 2001 From: vrym2 Date: Fri, 13 Jan 2023 12:24:19 +0000 Subject: [PATCH 1/6] update .gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index b6e4761..c330f85 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,7 @@ dmypy.json # Pyre type checker .pyre/ +pv-solar-farm-forecasting.code-workspace +tests/data/009967.csv +.vscode/extensions.json +tests/data/test.csv From ebeff5a005bcc70183530867357e7a33d8adeb73 Mon Sep 17 00:00:00 2001 From: vrym2 Date: Fri, 13 Jan 2023 12:40:11 +0000 Subject: [PATCH 2/6] adding metadata download test --- .flake8 | 2 + .github/workflows/linters.yaml | 2 +- .gitignore | 7 ++- .pre-commit-config.yaml | 59 +++++++++++++++++++ conftest.py | 0 environment.yml | 30 ++++++++++ pydoc-markdown.yml | 20 +++++++ requirements.txt | 26 +++++++++ tests/scripts/test_download_data.py | 22 ++++++++ ukpn/__init__.py | 2 + ukpn/scripts/__init__.py | 2 + ukpn/scripts/download_data.py | 87 +++++++++++++++++++++++++++++ 12 files changed, 255 insertions(+), 4 deletions(-) create mode 100644 .flake8 create mode 100644 .pre-commit-config.yaml create mode 100644 conftest.py create mode 100644 environment.yml create mode 100644 pydoc-markdown.yml create mode 100644 requirements.txt create mode 100644 tests/scripts/test_download_data.py create mode 100644 ukpn/scripts/__init__.py create mode 100644 ukpn/scripts/download_data.py diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000..7da1f96 --- /dev/null +++ b/.flake8 @@ -0,0 +1,2 @@ +[flake8] +max-line-length = 100 diff --git a/.github/workflows/linters.yaml b/.github/workflows/linters.yaml index 54ee3c9..2df1630 100644 --- a/.github/workflows/linters.yaml +++ b/.github/workflows/linters.yaml @@ -6,4 +6,4 @@ jobs: call-run-python-linters: uses: openclimatefix/.github/.github/workflows/python-lint.yml@main with: - folder: "ocf_datapipes" + folder: "ukpn" diff --git a/.gitignore b/.gitignore index c330f85..e0554f0 100644 --- a/.gitignore +++ b/.gitignore @@ -127,7 +127,8 @@ dmypy.json # Pyre type checker .pyre/ +tests/scripts/data +tests/data pv-solar-farm-forecasting.code-workspace -tests/data/009967.csv -.vscode/extensions.json -tests/data/test.csv +.vscode +.gitattributes diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..3c7542b --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,59 @@ +default_language_version: + python: python3 + +ci: + skip: [pydocstyle, flake8] + +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + # list of supported hooks: https://pre-commit.com/hooks.html + - id: trailing-whitespace + - id: end-of-file-fixer + - id: debug-statements + - id: detect-private-key + + # python code formatting/linting + - repo: https://github.com/PyCQA/pydocstyle + rev: 6.1.1 + hooks: + - id: pydocstyle + args: + [ + --convention=google, + "--add-ignore=D200,D202,D210,D212,D415,D105", + "ukpn", + ] + files: ^ukpn/ + - repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 + args: + [ + --max-line-length, + "100", + --extend-ignore=E203, + --per-file-ignores, + "__init__.py:F401", + "ukpn", + ] + files: ^ukpn/ + - repo: https://github.com/PyCQA/isort + rev: 5.11.4 + hooks: + - id: isort + args: [--profile, black, --line-length, "100", "ukpn"] + - repo: https://github.com/psf/black + rev: 22.12.0 + hooks: + - id: black + args: [--line-length, "100"] + + # yaml formatting + - repo: https://github.com/pre-commit/mirrors-prettier + rev: v3.0.0-alpha.4 + hooks: + - id: prettier + types: [yaml] diff --git a/conftest.py b/conftest.py new file mode 100644 index 0000000..e69de29 diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..1321823 --- /dev/null +++ b/environment.yml @@ -0,0 +1,30 @@ +name: uk_pv_solar_farm_forecasting +channels: + - pytorch + - conda-forge + - defaults +dependencies: + - pip + - pytorch + - rioxarray + - torchdata + - torchvision + - xarray + - fsspec + - zarr + - cartopy + - dask + - pyproj + - pyresample + - geopandas + - h5netcdf + - scipy + - pip: + - einops + - pathy + - git+https://github.com/SheffieldSolar/PV_Live-API + - pyaml_env + - nowcasting_datamodel + - gitpython + - tqdm + - bottleneck diff --git a/pydoc-markdown.yml b/pydoc-markdown.yml new file mode 100644 index 0000000..0223f93 --- /dev/null +++ b/pydoc-markdown.yml @@ -0,0 +1,20 @@ +loaders: + - type: python + search_path: [ukpn/] +processors: + - type: filter + - type: smart +renderer: + type: mkdocs + pages: + - title: Home + name: index + source: README.md + - title: API Documentation + children: + - title: Data + contents: [data] + mkdocs_config: + site_name: PV solar farm forecasting + theme: readthedocs + repo_url: https://github.com/openclimatefix/pv-solar-farm-forecasting diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0e64332 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,26 @@ +torch +torchdata +Cartopy>=0.20.3 +xarray +zarr +fsspec +einops +numpy +pandas +rioxarray +pathy +pyaml_env +nowcasting_datamodel +gitpython +geopandas +dask +pvlib +jpeg_xl_float_with_nans +h5netcdf +tqdm +bottleneck +pyproj +pyresample +fastparquet +scipy +pytorch_lightning diff --git a/tests/scripts/test_download_data.py b/tests/scripts/test_download_data.py new file mode 100644 index 0000000..4ff80d9 --- /dev/null +++ b/tests/scripts/test_download_data.py @@ -0,0 +1,22 @@ +from ukpn.scripts import construct_url, get_metadata + + +def test_download_metadata(): + cantubry_api_url = "https://ukpowernetworks.opendatasoft.com/api/records/1.0/search/?dataset=embedded-capacity-register&q=&facet=grid_supply_point&facet=licence_area&facet=energy_conversion_technology_1&facet=flexible_connection_yes_no&facet=connection_status&facet=primary_resource_type_group&refine.grid_supply_point=CANTERBURY+NORTH&refine.energy_conversion_technology_1=Photovoltaic" + download = get_metadata(api_url=cantubry_api_url, print_data=True) + + +def test_construct_url(): + url = construct_url( + list_of_facets=[ + "grid_supply_point", + "licence_area", + "energy_conversion_technology_1", + "flexible_connection_yes_no", + "connection_status", + "primary_resource_type_group", + ], + refiners=["grid_supply_point", "energy_conversion_technology_1"], + refine_values=["CANTERBURY+NORTH", "Photovoltaic"], + ) + search_url = get_metadata(api_url=url, print_data=True) diff --git a/ukpn/__init__.py b/ukpn/__init__.py index e69de29..cef144c 100644 --- a/ukpn/__init__.py +++ b/ukpn/__init__.py @@ -0,0 +1,2 @@ +"""DataPipes""" +from ukpn import scripts diff --git a/ukpn/scripts/__init__.py b/ukpn/scripts/__init__.py new file mode 100644 index 0000000..8eb37d7 --- /dev/null +++ b/ukpn/scripts/__init__.py @@ -0,0 +1,2 @@ +"""Import Functions""" +from .download_data import construct_url, get_metadata diff --git a/ukpn/scripts/download_data.py b/ukpn/scripts/download_data.py new file mode 100644 index 0000000..6060745 --- /dev/null +++ b/ukpn/scripts/download_data.py @@ -0,0 +1,87 @@ +"""This class is ued to retrieve data through API calls""" +import json +import logging +from pprint import pprint + +import requests + +logger = logging.getLogger(__name__) + + +def get_metadata(api_url: str, print_data: bool = False): + """ + This function retrievs metadata through api calls + + Args: + api_url: The api url link that emiits json format data + print_data: Optional to choose printing the data + """ + + response_api = requests.get(api_url) + while True: + if response_api == 200: + logger.info(f"The api resposne {response_api} is successful") + else: + logger.warning(f"The api resposne {response_api} is unsuccessul") + logger.info(f"Please enter the correct {'url'}") + break + + # Get the data from the resposne + raw_data = response_api.text + + # Parse the data into json format + data_json = json.loads(raw_data) + data_first_record = data_json["records"][0] + + if print_data: + pprint(data_first_record) + + +def construct_url( + dataset_name: str = "embedded-capacity-register", + list_of_facets=None, + refiners=None, + refine_values=None, +): + """This function constructs a downloadble url of JSON data + + For more information, please visit + - https://ukpowernetworks.opendatasoft.com/pages/home/ + + Args: + dataset_name: Name of the dataset that needs to be downloaded, defined by UKPN + list_of_facets: List of facets that needs to be included in the JSON data + refiners: list of refiner terms that needs to refined from the JSON data + refine_values: List of refine values of the refiners + + Note: + refiners and refine values needs to be exactly mapped + """ + # Constructing a base url + base_url = "https://ukpowernetworks.opendatasoft.com/api/records/1.0/search/?dataset=" + base_url = base_url + dataset_name + + # A seperator in the url + seperator = "&" + + # A questionare in the url + questionare = "q=" + + # A facet questionare in the url + facet_questionare = "facet=" + + # Constructing a facet string from the list of facets + facet_str = [facet_questionare + x for x in list_of_facets] + facet_str = seperator.join(facet_str) + facet_str = str(questionare + seperator + facet_str) + + # Constructing a refiner string to refine the JSON data + refine_questionare = "refine." + refiners = [refine_questionare + x for x in refiners] + refiners = list(map(lambda x, y: x + str("=") + y, refiners, refine_values)) + refiners = seperator.join(refiners) + + # Constructing the final url + final_url = [base_url, facet_str, refiners] + final_url = seperator.join(final_url) + return final_url From bbfb0b5023c7a094877db12cf3a38853a594f03a Mon Sep 17 00:00:00 2001 From: vrym2 Date: Fri, 13 Jan 2023 15:23:54 +0000 Subject: [PATCH 3/6] removed common files --- .flake8 | 2 -- .pre-commit-config.yaml | 59 ----------------------------------------- README.md | 2 -- conftest.py | 0 environment.yml | 30 --------------------- pydoc-markdown.yml | 20 -------------- requirements.txt | 26 ------------------ 7 files changed, 139 deletions(-) delete mode 100644 .flake8 delete mode 100644 .pre-commit-config.yaml delete mode 100644 README.md delete mode 100644 conftest.py delete mode 100644 environment.yml delete mode 100644 pydoc-markdown.yml delete mode 100644 requirements.txt diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 7da1f96..0000000 --- a/.flake8 +++ /dev/null @@ -1,2 +0,0 @@ -[flake8] -max-line-length = 100 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 3c7542b..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,59 +0,0 @@ -default_language_version: - python: python3 - -ci: - skip: [pydocstyle, flake8] - -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - # list of supported hooks: https://pre-commit.com/hooks.html - - id: trailing-whitespace - - id: end-of-file-fixer - - id: debug-statements - - id: detect-private-key - - # python code formatting/linting - - repo: https://github.com/PyCQA/pydocstyle - rev: 6.1.1 - hooks: - - id: pydocstyle - args: - [ - --convention=google, - "--add-ignore=D200,D202,D210,D212,D415,D105", - "ukpn", - ] - files: ^ukpn/ - - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 - hooks: - - id: flake8 - args: - [ - --max-line-length, - "100", - --extend-ignore=E203, - --per-file-ignores, - "__init__.py:F401", - "ukpn", - ] - files: ^ukpn/ - - repo: https://github.com/PyCQA/isort - rev: 5.11.4 - hooks: - - id: isort - args: [--profile, black, --line-length, "100", "ukpn"] - - repo: https://github.com/psf/black - rev: 22.12.0 - hooks: - - id: black - args: [--line-length, "100"] - - # yaml formatting - - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.0-alpha.4 - hooks: - - id: prettier - types: [yaml] diff --git a/README.md b/README.md deleted file mode 100644 index 0fc74fc..0000000 --- a/README.md +++ /dev/null @@ -1,2 +0,0 @@ -# pv-solar-farm-forecasting -Forecasting for individual solar farms diff --git a/conftest.py b/conftest.py deleted file mode 100644 index e69de29..0000000 diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 1321823..0000000 --- a/environment.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: uk_pv_solar_farm_forecasting -channels: - - pytorch - - conda-forge - - defaults -dependencies: - - pip - - pytorch - - rioxarray - - torchdata - - torchvision - - xarray - - fsspec - - zarr - - cartopy - - dask - - pyproj - - pyresample - - geopandas - - h5netcdf - - scipy - - pip: - - einops - - pathy - - git+https://github.com/SheffieldSolar/PV_Live-API - - pyaml_env - - nowcasting_datamodel - - gitpython - - tqdm - - bottleneck diff --git a/pydoc-markdown.yml b/pydoc-markdown.yml deleted file mode 100644 index 0223f93..0000000 --- a/pydoc-markdown.yml +++ /dev/null @@ -1,20 +0,0 @@ -loaders: - - type: python - search_path: [ukpn/] -processors: - - type: filter - - type: smart -renderer: - type: mkdocs - pages: - - title: Home - name: index - source: README.md - - title: API Documentation - children: - - title: Data - contents: [data] - mkdocs_config: - site_name: PV solar farm forecasting - theme: readthedocs - repo_url: https://github.com/openclimatefix/pv-solar-farm-forecasting diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 0e64332..0000000 --- a/requirements.txt +++ /dev/null @@ -1,26 +0,0 @@ -torch -torchdata -Cartopy>=0.20.3 -xarray -zarr -fsspec -einops -numpy -pandas -rioxarray -pathy -pyaml_env -nowcasting_datamodel -gitpython -geopandas -dask -pvlib -jpeg_xl_float_with_nans -h5netcdf -tqdm -bottleneck -pyproj -pyresample -fastparquet -scipy -pytorch_lightning From ad428f916c91bed133eb9545b3e8ee15fb681b3e Mon Sep 17 00:00:00 2001 From: vrym2 Date: Sat, 14 Jan 2023 19:23:31 +0000 Subject: [PATCH 4/6] update .gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e0554f0..c00e45f 100644 --- a/.gitignore +++ b/.gitignore @@ -132,3 +132,4 @@ tests/data pv-solar-farm-forecasting.code-workspace .vscode .gitattributes +conftest.py From 81231f5dfa2cf70f7d0bee8b7fe658badd2d4b55 Mon Sep 17 00:00:00 2001 From: vrym2 Date: Sat, 14 Jan 2023 19:35:42 +0000 Subject: [PATCH 5/6] metadata download from api and excel url --- .gitignore | 6 +++ tests/scripts/test_download_data.py | 18 ++++--- ukpn/__init__.py | 3 +- ukpn/scripts/__init__.py | 2 +- ukpn/scripts/download_data.py | 84 +++++++++++++++++++++++++++-- 5 files changed, 101 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index c00e45f..7d842d6 100644 --- a/.gitignore +++ b/.gitignore @@ -133,3 +133,9 @@ pv-solar-farm-forecasting.code-workspace .vscode .gitattributes conftest.py +.flake8 +.pre-commit-config.yaml +environment.yml +pydoc-markdown.yml +README.md +requirements.txt diff --git a/tests/scripts/test_download_data.py b/tests/scripts/test_download_data.py index 4ff80d9..c394502 100644 --- a/tests/scripts/test_download_data.py +++ b/tests/scripts/test_download_data.py @@ -1,9 +1,7 @@ -from ukpn.scripts import construct_url, get_metadata +from pathlib import Path +from pprint import pprint - -def test_download_metadata(): - cantubry_api_url = "https://ukpowernetworks.opendatasoft.com/api/records/1.0/search/?dataset=embedded-capacity-register&q=&facet=grid_supply_point&facet=licence_area&facet=energy_conversion_technology_1&facet=flexible_connection_yes_no&facet=connection_status&facet=primary_resource_type_group&refine.grid_supply_point=CANTERBURY+NORTH&refine.energy_conversion_technology_1=Photovoltaic" - download = get_metadata(api_url=cantubry_api_url, print_data=True) +from ukpn.scripts import construct_url, get_metadata_from_ukpn_api, get_metadata_from_ukpn_xlsx def test_construct_url(): @@ -19,4 +17,12 @@ def test_construct_url(): refiners=["grid_supply_point", "energy_conversion_technology_1"], refine_values=["CANTERBURY+NORTH", "Photovoltaic"], ) - search_url = get_metadata(api_url=url, print_data=True) + data = get_metadata_from_ukpn_api(api_url=url, eastings="615378", northings="165525") + + +def test_metadata_from_xlsx(): + url = "https://media.umbraco.io/uk-power-networks/0dqjxaho/embedded-capacity-register.xlsx" + local_path = Path(r"/home/raj/ocf/pv-solar-farm-forecasting/tests/data") + df = get_metadata_from_ukpn_xlsx( + link_of_ecr_excel=url, local_path=local_path, eastings="615378", northings="165525" + ) diff --git a/ukpn/__init__.py b/ukpn/__init__.py index cef144c..ef9e21c 100644 --- a/ukpn/__init__.py +++ b/ukpn/__init__.py @@ -1,2 +1 @@ -"""DataPipes""" -from ukpn import scripts +"""Import functions""" diff --git a/ukpn/scripts/__init__.py b/ukpn/scripts/__init__.py index 8eb37d7..b3a88d5 100644 --- a/ukpn/scripts/__init__.py +++ b/ukpn/scripts/__init__.py @@ -1,2 +1,2 @@ """Import Functions""" -from .download_data import construct_url, get_metadata +from .download_data import construct_url, get_metadata_from_ukpn_api, get_metadata_from_ukpn_xlsx diff --git a/ukpn/scripts/download_data.py b/ukpn/scripts/download_data.py index 6060745..1959c00 100644 --- a/ukpn/scripts/download_data.py +++ b/ukpn/scripts/download_data.py @@ -1,20 +1,33 @@ """This class is ued to retrieve data through API calls""" import json import logging +import os +from pathlib import Path from pprint import pprint +from typing import Optional, Union +import numpy as np +import pandas as pd import requests +from openpyxl import load_workbook logger = logging.getLogger(__name__) -def get_metadata(api_url: str, print_data: bool = False): +def get_metadata_from_ukpn_api( + api_url: str, + eastings: Optional[str] = None, + northings: Optional[str] = None, + print_data: bool = False, +): """ This function retrievs metadata through api calls Args: api_url: The api url link that emiits json format data print_data: Optional to choose printing the data + eastings: eastings value of the pv solar farm + northings: Northings value of the pv solar farm """ response_api = requests.get(api_url) @@ -31,10 +44,75 @@ def get_metadata(api_url: str, print_data: bool = False): # Parse the data into json format data_json = json.loads(raw_data) - data_first_record = data_json["records"][0] + + # Getting all the records + data_records = data_json["records"] + first_record = data_json["records"][0] if print_data: - pprint(data_first_record) + pprint(first_record) + + pv_site_dict_index = [] + # From the list of dictionaries + for i in range(len(data_records)): + if isinstance(data_records[i], dict): + fields = data_records[i]["fields"] + if isinstance(fields, dict): + if ( + fields["location_x_coordinate_eastings_where_data_is_held"] == eastings + and fields["location_y_coordinate_northings_where_data_is_held"] == northings + ) is True: + pv_site_dict_index.append(i) + + # CHecking if there are any sites matching the coordinates + if len(pv_site_dict_index) == 0: + logger.info(f"There are no PV sites matching with {eastings}") + return None + else: + # Getting the required data from Eastings and Northings + data_json = data_records[pv_site_dict_index[0]] + + return data_json + + +def get_metadata_from_ukpn_xlsx( + link_of_ecr_excel: str, + local_path: Path[Union, str], + eastings: Optional[str] = None, + northings: Optional[str] = None, +): + """Download and load the ECR file from the link provided below + + For direct download, opne this link- + https://media.umbraco.io/uk-power-networks/0dqjxaho/embedded-capacity-register.xlsx + + Args: + link_of_ecr_excel: Link shown above + local_path: The folder where the file needs to get downloaded + eastings: eastings value of the pv solar farm + northings: Northings value of the pv solar farm + """ + # Download and store the excel sheet in a location + resp = requests.get(link_of_ecr_excel) + local_path = os.path.join(local_path, "ecr.xlsx") + with open(local_path, "wb") as output: + output.write(resp.content) + + # Read the excel sheet + wb = load_workbook(local_path, read_only=True, keep_links=False) + + # The sheet need and its name according to UKPN is "Register Part 1" + file_name = "Register Part 1" + for text in wb.sheetnames: + if file_name in text: + df = pd.read_excel(local_path, sheet_name=text, skiprows=1) + + # Filtering the data based on the eastings and northings provided + for text in df.columns: + if "Eastings" in text: + df = df[df[text] == np.float64(eastings)].reset_index() + + return df def construct_url( From 789f68391c37f8848f361523109388babd7c21f1 Mon Sep 17 00:00:00 2001 From: vrym2 Date: Sun, 15 Jan 2023 23:07:39 +0000 Subject: [PATCH 6/6] function converting to netcdf --- .gitignore | 2 ++ tests/scripts/test_download_data.py | 9 ++++++- ukpn/scripts/__init__.py | 7 +++++- ukpn/scripts/download_data.py | 37 +++++++++++++++++++++++++++-- 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 7d842d6..3efeabb 100644 --- a/.gitignore +++ b/.gitignore @@ -139,3 +139,5 @@ environment.yml pydoc-markdown.yml README.md requirements.txt +pv-solar-farm-forecasting/ukpn/scripts/resample_data.py +ukpn/scripts/resample_data.py diff --git a/tests/scripts/test_download_data.py b/tests/scripts/test_download_data.py index c394502..31706d1 100644 --- a/tests/scripts/test_download_data.py +++ b/tests/scripts/test_download_data.py @@ -1,7 +1,12 @@ from pathlib import Path from pprint import pprint -from ukpn.scripts import construct_url, get_metadata_from_ukpn_api, get_metadata_from_ukpn_xlsx +from ukpn.scripts import ( + construct_url, + get_metadata_from_ukpn_api, + get_metadata_from_ukpn_xlsx, + metadata_df_to_netcdf, +) def test_construct_url(): @@ -22,7 +27,9 @@ def test_construct_url(): def test_metadata_from_xlsx(): url = "https://media.umbraco.io/uk-power-networks/0dqjxaho/embedded-capacity-register.xlsx" + test_path = "/home/raj/ocf/pv-solar-farm-forecasting/tests/data/test.csv" local_path = Path(r"/home/raj/ocf/pv-solar-farm-forecasting/tests/data") df = get_metadata_from_ukpn_xlsx( link_of_ecr_excel=url, local_path=local_path, eastings="615378", northings="165525" ) + ncxr = metadata_df_to_netcdf(path_to_ukpn_timeseries=test_path) diff --git a/ukpn/scripts/__init__.py b/ukpn/scripts/__init__.py index b3a88d5..03473f6 100644 --- a/ukpn/scripts/__init__.py +++ b/ukpn/scripts/__init__.py @@ -1,2 +1,7 @@ """Import Functions""" -from .download_data import construct_url, get_metadata_from_ukpn_api, get_metadata_from_ukpn_xlsx +from .download_data import ( + construct_url, + get_metadata_from_ukpn_api, + get_metadata_from_ukpn_xlsx, + metadata_df_to_netcdf, +) diff --git a/ukpn/scripts/download_data.py b/ukpn/scripts/download_data.py index 1959c00..d927c40 100644 --- a/ukpn/scripts/download_data.py +++ b/ukpn/scripts/download_data.py @@ -9,8 +9,11 @@ import numpy as np import pandas as pd import requests +import xarray as xr from openpyxl import load_workbook +from ukpn.scripts.resample_data import interpolation_pandas, load_csv_to_pandas + logger = logging.getLogger(__name__) @@ -66,7 +69,8 @@ def get_metadata_from_ukpn_api( # CHecking if there are any sites matching the coordinates if len(pv_site_dict_index) == 0: - logger.info(f"There are no PV sites matching with {eastings}") + logger.info(f"There are no PV sites matching with eastinngs: {eastings}") + logger.info(f"There are no PV sites matching with northings: {northings}") return None else: # Getting the required data from Eastings and Northings @@ -80,7 +84,7 @@ def get_metadata_from_ukpn_xlsx( local_path: Path[Union, str], eastings: Optional[str] = None, northings: Optional[str] = None, -): +) -> pd.DataFrame: """Download and load the ECR file from the link provided below For direct download, opne this link- @@ -115,6 +119,35 @@ def get_metadata_from_ukpn_xlsx( return df +def metadata_df_to_netcdf( + path_to_ukpn_timeseries: str, input_dataframe: Optional[pd.DataFrame] = None +) -> xr.Dataset: + + # Loading the UKPN time series data into a dataframe + original_df = load_csv_to_pandas(path_to_file=path_to_ukpn_timeseries) + + # Interpolating time series + interpolated_df = interpolation_pandas(original_df=original_df) + + # Getting all the time series dates + interpolated_timeseries = interpolated_df.index.values + interpolated_data_values = interpolated_df[interpolated_df.columns[0]].values + + # Creating an Xarray data array + final_xarray = xr.DataArray( + data=interpolated_data_values, + dims="time_utc", + coords={"time_utc": interpolated_timeseries}, + attrs={"Description": " This Data array consists of time-series data from UKPN"}, + ) + + # Getiing the column names + # TODO + required_column_names = ["Eastings", "Northings", "Maximum Export Capacity"] + + return final_xarray + + def construct_url( dataset_name: str = "embedded-capacity-register", list_of_facets=None,