From 6962c1e64652e09e970fbf4be45ad3dce4980c91 Mon Sep 17 00:00:00 2001
From: vrym2 <vrym2@student.le.ac.uk>
Date: Fri, 13 Jan 2023 12:24:19 +0000
Subject: [PATCH 1/6] update .gitignore

---
 .gitignore | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.gitignore b/.gitignore
index b6e4761..c330f85 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,7 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+pv-solar-farm-forecasting.code-workspace
+tests/data/009967.csv
+.vscode/extensions.json
+tests/data/test.csv

From ebeff5a005bcc70183530867357e7a33d8adeb73 Mon Sep 17 00:00:00 2001
From: vrym2 <vrym2@student.le.ac.uk>
Date: Fri, 13 Jan 2023 12:40:11 +0000
Subject: [PATCH 2/6] adding metadata download test

---
 .flake8                             |  2 +
 .github/workflows/linters.yaml      |  2 +-
 .gitignore                          |  7 ++-
 .pre-commit-config.yaml             | 59 +++++++++++++++++++
 conftest.py                         |  0
 environment.yml                     | 30 ++++++++++
 pydoc-markdown.yml                  | 20 +++++++
 requirements.txt                    | 26 +++++++++
 tests/scripts/test_download_data.py | 22 ++++++++
 ukpn/__init__.py                    |  2 +
 ukpn/scripts/__init__.py            |  2 +
 ukpn/scripts/download_data.py       | 87 +++++++++++++++++++++++++++++
 12 files changed, 255 insertions(+), 4 deletions(-)
 create mode 100644 .flake8
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 conftest.py
 create mode 100644 environment.yml
 create mode 100644 pydoc-markdown.yml
 create mode 100644 requirements.txt
 create mode 100644 tests/scripts/test_download_data.py
 create mode 100644 ukpn/scripts/__init__.py
 create mode 100644 ukpn/scripts/download_data.py

diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..7da1f96
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 100
diff --git a/.github/workflows/linters.yaml b/.github/workflows/linters.yaml
index 54ee3c9..2df1630 100644
--- a/.github/workflows/linters.yaml
+++ b/.github/workflows/linters.yaml
@@ -6,4 +6,4 @@ jobs:
   call-run-python-linters:
     uses: openclimatefix/.github/.github/workflows/python-lint.yml@main
     with:
-      folder: "ocf_datapipes"
+      folder: "ukpn"
diff --git a/.gitignore b/.gitignore
index c330f85..e0554f0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,7 +127,8 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+tests/scripts/data
+tests/data
 pv-solar-farm-forecasting.code-workspace
-tests/data/009967.csv
-.vscode/extensions.json
-tests/data/test.csv
+.vscode
+.gitattributes
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..3c7542b
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,59 @@
+default_language_version:
+  python: python3
+
+ci:
+  skip: [pydocstyle, flake8]
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.4.0
+    hooks:
+      # list of supported hooks: https://pre-commit.com/hooks.html
+      - id: trailing-whitespace
+      - id: end-of-file-fixer
+      - id: debug-statements
+      - id: detect-private-key
+
+  # python code formatting/linting
+  - repo: https://github.com/PyCQA/pydocstyle
+    rev: 6.1.1
+    hooks:
+      - id: pydocstyle
+        args:
+          [
+            --convention=google,
+            "--add-ignore=D200,D202,D210,D212,D415,D105",
+            "ukpn",
+          ]
+        files: ^ukpn/
+  - repo: https://github.com/PyCQA/flake8
+    rev: 6.0.0
+    hooks:
+      - id: flake8
+        args:
+          [
+            --max-line-length,
+            "100",
+            --extend-ignore=E203,
+            --per-file-ignores,
+            "__init__.py:F401",
+            "ukpn",
+          ]
+        files: ^ukpn/
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.11.4
+    hooks:
+      - id: isort
+        args: [--profile, black, --line-length, "100", "ukpn"]
+  - repo: https://github.com/psf/black
+    rev: 22.12.0
+    hooks:
+      - id: black
+        args: [--line-length, "100"]
+
+  # yaml formatting
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v3.0.0-alpha.4
+    hooks:
+      - id: prettier
+        types: [yaml]
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 0000000..e69de29
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..1321823
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,30 @@
+name: uk_pv_solar_farm_forecasting
+channels:
+  - pytorch
+  - conda-forge
+  - defaults
+dependencies:
+  - pip
+  - pytorch
+  - rioxarray
+  - torchdata
+  - torchvision
+  - xarray
+  - fsspec
+  - zarr
+  - cartopy
+  - dask
+  - pyproj
+  - pyresample
+  - geopandas
+  - h5netcdf
+  - scipy
+  - pip:
+      - einops
+      - pathy
+      - git+https://github.com/SheffieldSolar/PV_Live-API
+      - pyaml_env
+      - nowcasting_datamodel
+      - gitpython
+      - tqdm
+      - bottleneck
diff --git a/pydoc-markdown.yml b/pydoc-markdown.yml
new file mode 100644
index 0000000..0223f93
--- /dev/null
+++ b/pydoc-markdown.yml
@@ -0,0 +1,20 @@
+loaders:
+  - type: python
+    search_path: [ukpn/]
+processors:
+  - type: filter
+  - type: smart
+renderer:
+  type: mkdocs
+  pages:
+    - title: Home
+      name: index
+      source: README.md
+    - title: API Documentation
+      children:
+        - title: Data
+          contents: [data]
+  mkdocs_config:
+    site_name: PV solar farm forecasting
+    theme: readthedocs
+    repo_url: https://github.com/openclimatefix/pv-solar-farm-forecasting
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..0e64332
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,26 @@
+torch
+torchdata
+Cartopy>=0.20.3
+xarray
+zarr
+fsspec
+einops
+numpy
+pandas
+rioxarray
+pathy
+pyaml_env
+nowcasting_datamodel
+gitpython
+geopandas
+dask
+pvlib
+jpeg_xl_float_with_nans
+h5netcdf
+tqdm
+bottleneck
+pyproj
+pyresample
+fastparquet
+scipy
+pytorch_lightning
diff --git a/tests/scripts/test_download_data.py b/tests/scripts/test_download_data.py
new file mode 100644
index 0000000..4ff80d9
--- /dev/null
+++ b/tests/scripts/test_download_data.py
@@ -0,0 +1,22 @@
+from ukpn.scripts import construct_url, get_metadata
+
+
+def test_download_metadata():
+    cantubry_api_url = "https://ukpowernetworks.opendatasoft.com/api/records/1.0/search/?dataset=embedded-capacity-register&q=&facet=grid_supply_point&facet=licence_area&facet=energy_conversion_technology_1&facet=flexible_connection_yes_no&facet=connection_status&facet=primary_resource_type_group&refine.grid_supply_point=CANTERBURY+NORTH&refine.energy_conversion_technology_1=Photovoltaic"
+    download = get_metadata(api_url=cantubry_api_url, print_data=True)
+
+
+def test_construct_url():
+    url = construct_url(
+        list_of_facets=[
+            "grid_supply_point",
+            "licence_area",
+            "energy_conversion_technology_1",
+            "flexible_connection_yes_no",
+            "connection_status",
+            "primary_resource_type_group",
+        ],
+        refiners=["grid_supply_point", "energy_conversion_technology_1"],
+        refine_values=["CANTERBURY+NORTH", "Photovoltaic"],
+    )
+    search_url = get_metadata(api_url=url, print_data=True)
diff --git a/ukpn/__init__.py b/ukpn/__init__.py
index e69de29..cef144c 100644
--- a/ukpn/__init__.py
+++ b/ukpn/__init__.py
@@ -0,0 +1,2 @@
+"""DataPipes"""
+from ukpn import scripts
diff --git a/ukpn/scripts/__init__.py b/ukpn/scripts/__init__.py
new file mode 100644
index 0000000..8eb37d7
--- /dev/null
+++ b/ukpn/scripts/__init__.py
@@ -0,0 +1,2 @@
+"""Import Functions"""
+from .download_data import construct_url, get_metadata
diff --git a/ukpn/scripts/download_data.py b/ukpn/scripts/download_data.py
new file mode 100644
index 0000000..6060745
--- /dev/null
+++ b/ukpn/scripts/download_data.py
@@ -0,0 +1,87 @@
+"""This class is ued to retrieve data through API calls"""
+import json
+import logging
+from pprint import pprint
+
+import requests
+
+logger = logging.getLogger(__name__)
+
+
+def get_metadata(api_url: str, print_data: bool = False):
+    """
+    This function retrievs metadata through api calls
+
+    Args:
+        api_url: The api url link that emiits json format data
+        print_data: Optional to choose printing the data
+    """
+
+    response_api = requests.get(api_url)
+    while True:
+        if response_api == 200:
+            logger.info(f"The api resposne {response_api} is successful")
+        else:
+            logger.warning(f"The api resposne {response_api} is unsuccessul")
+            logger.info(f"Please enter the correct {'url'}")
+            break
+
+    # Get the data from the resposne
+    raw_data = response_api.text
+
+    # Parse the data into json format
+    data_json = json.loads(raw_data)
+    data_first_record = data_json["records"][0]
+
+    if print_data:
+        pprint(data_first_record)
+
+
+def construct_url(
+    dataset_name: str = "embedded-capacity-register",
+    list_of_facets=None,
+    refiners=None,
+    refine_values=None,
+):
+    """This function constructs a downloadble url of JSON data
+
+    For more information, please visit
+    - https://ukpowernetworks.opendatasoft.com/pages/home/
+
+    Args:
+        dataset_name: Name of the dataset that needs to be downloaded, defined by UKPN
+        list_of_facets: List of facets that needs to be included in the JSON data
+        refiners: list of refiner terms that needs to refined from the JSON data
+        refine_values: List of refine values of the refiners
+
+    Note:
+        refiners and refine values needs to be exactly mapped
+    """
+    # Constructing a base url
+    base_url = "https://ukpowernetworks.opendatasoft.com/api/records/1.0/search/?dataset="
+    base_url = base_url + dataset_name
+
+    # A seperator in the url
+    seperator = "&"
+
+    # A questionare in the url
+    questionare = "q="
+
+    # A facet questionare in the url
+    facet_questionare = "facet="
+
+    # Constructing a facet string from the list of facets
+    facet_str = [facet_questionare + x for x in list_of_facets]
+    facet_str = seperator.join(facet_str)
+    facet_str = str(questionare + seperator + facet_str)
+
+    # Constructing a refiner string to refine the JSON data
+    refine_questionare = "refine."
+    refiners = [refine_questionare + x for x in refiners]
+    refiners = list(map(lambda x, y: x + str("=") + y, refiners, refine_values))
+    refiners = seperator.join(refiners)
+
+    # Constructing the final url
+    final_url = [base_url, facet_str, refiners]
+    final_url = seperator.join(final_url)
+    return final_url

From bbfb0b5023c7a094877db12cf3a38853a594f03a Mon Sep 17 00:00:00 2001
From: vrym2 <vrym2@student.le.ac.uk>
Date: Fri, 13 Jan 2023 15:23:54 +0000
Subject: [PATCH 3/6] removed common files

---
 .flake8                 |  2 --
 .pre-commit-config.yaml | 59 -----------------------------------------
 README.md               |  2 --
 conftest.py             |  0
 environment.yml         | 30 ---------------------
 pydoc-markdown.yml      | 20 --------------
 requirements.txt        | 26 ------------------
 7 files changed, 139 deletions(-)
 delete mode 100644 .flake8
 delete mode 100644 .pre-commit-config.yaml
 delete mode 100644 README.md
 delete mode 100644 conftest.py
 delete mode 100644 environment.yml
 delete mode 100644 pydoc-markdown.yml
 delete mode 100644 requirements.txt

diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 7da1f96..0000000
--- a/.flake8
+++ /dev/null
@@ -1,2 +0,0 @@
-[flake8]
-max-line-length = 100
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
deleted file mode 100644
index 3c7542b..0000000
--- a/.pre-commit-config.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-default_language_version:
-  python: python3
-
-ci:
-  skip: [pydocstyle, flake8]
-
-repos:
-  - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
-    hooks:
-      # list of supported hooks: https://pre-commit.com/hooks.html
-      - id: trailing-whitespace
-      - id: end-of-file-fixer
-      - id: debug-statements
-      - id: detect-private-key
-
-  # python code formatting/linting
-  - repo: https://github.com/PyCQA/pydocstyle
-    rev: 6.1.1
-    hooks:
-      - id: pydocstyle
-        args:
-          [
-            --convention=google,
-            "--add-ignore=D200,D202,D210,D212,D415,D105",
-            "ukpn",
-          ]
-        files: ^ukpn/
-  - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
-    hooks:
-      - id: flake8
-        args:
-          [
-            --max-line-length,
-            "100",
-            --extend-ignore=E203,
-            --per-file-ignores,
-            "__init__.py:F401",
-            "ukpn",
-          ]
-        files: ^ukpn/
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.11.4
-    hooks:
-      - id: isort
-        args: [--profile, black, --line-length, "100", "ukpn"]
-  - repo: https://github.com/psf/black
-    rev: 22.12.0
-    hooks:
-      - id: black
-        args: [--line-length, "100"]
-
-  # yaml formatting
-  - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v3.0.0-alpha.4
-    hooks:
-      - id: prettier
-        types: [yaml]
diff --git a/README.md b/README.md
deleted file mode 100644
index 0fc74fc..0000000
--- a/README.md
+++ /dev/null
@@ -1,2 +0,0 @@
-# pv-solar-farm-forecasting
-Forecasting for individual solar farms
diff --git a/conftest.py b/conftest.py
deleted file mode 100644
index e69de29..0000000
diff --git a/environment.yml b/environment.yml
deleted file mode 100644
index 1321823..0000000
--- a/environment.yml
+++ /dev/null
@@ -1,30 +0,0 @@
-name: uk_pv_solar_farm_forecasting
-channels:
-  - pytorch
-  - conda-forge
-  - defaults
-dependencies:
-  - pip
-  - pytorch
-  - rioxarray
-  - torchdata
-  - torchvision
-  - xarray
-  - fsspec
-  - zarr
-  - cartopy
-  - dask
-  - pyproj
-  - pyresample
-  - geopandas
-  - h5netcdf
-  - scipy
-  - pip:
-      - einops
-      - pathy
-      - git+https://github.com/SheffieldSolar/PV_Live-API
-      - pyaml_env
-      - nowcasting_datamodel
-      - gitpython
-      - tqdm
-      - bottleneck
diff --git a/pydoc-markdown.yml b/pydoc-markdown.yml
deleted file mode 100644
index 0223f93..0000000
--- a/pydoc-markdown.yml
+++ /dev/null
@@ -1,20 +0,0 @@
-loaders:
-  - type: python
-    search_path: [ukpn/]
-processors:
-  - type: filter
-  - type: smart
-renderer:
-  type: mkdocs
-  pages:
-    - title: Home
-      name: index
-      source: README.md
-    - title: API Documentation
-      children:
-        - title: Data
-          contents: [data]
-  mkdocs_config:
-    site_name: PV solar farm forecasting
-    theme: readthedocs
-    repo_url: https://github.com/openclimatefix/pv-solar-farm-forecasting
diff --git a/requirements.txt b/requirements.txt
deleted file mode 100644
index 0e64332..0000000
--- a/requirements.txt
+++ /dev/null
@@ -1,26 +0,0 @@
-torch
-torchdata
-Cartopy>=0.20.3
-xarray
-zarr
-fsspec
-einops
-numpy
-pandas
-rioxarray
-pathy
-pyaml_env
-nowcasting_datamodel
-gitpython
-geopandas
-dask
-pvlib
-jpeg_xl_float_with_nans
-h5netcdf
-tqdm
-bottleneck
-pyproj
-pyresample
-fastparquet
-scipy
-pytorch_lightning

From ad428f916c91bed133eb9545b3e8ee15fb681b3e Mon Sep 17 00:00:00 2001
From: vrym2 <vrym2@student.le.ac.uk>
Date: Sat, 14 Jan 2023 19:23:31 +0000
Subject: [PATCH 4/6] update .gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index e0554f0..c00e45f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -132,3 +132,4 @@ tests/data
 pv-solar-farm-forecasting.code-workspace
 .vscode
 .gitattributes
+conftest.py

From 81231f5dfa2cf70f7d0bee8b7fe658badd2d4b55 Mon Sep 17 00:00:00 2001
From: vrym2 <vrym2@student.le.ac.uk>
Date: Sat, 14 Jan 2023 19:35:42 +0000
Subject: [PATCH 5/6] metadata download from api and excel url

---
 .gitignore                          |  6 +++
 tests/scripts/test_download_data.py | 18 ++++---
 ukpn/__init__.py                    |  3 +-
 ukpn/scripts/__init__.py            |  2 +-
 ukpn/scripts/download_data.py       | 84 +++++++++++++++++++++++++++--
 5 files changed, 101 insertions(+), 12 deletions(-)

diff --git a/.gitignore b/.gitignore
index c00e45f..7d842d6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -133,3 +133,9 @@ pv-solar-farm-forecasting.code-workspace
 .vscode
 .gitattributes
 conftest.py
+.flake8
+.pre-commit-config.yaml
+environment.yml
+pydoc-markdown.yml
+README.md
+requirements.txt
diff --git a/tests/scripts/test_download_data.py b/tests/scripts/test_download_data.py
index 4ff80d9..c394502 100644
--- a/tests/scripts/test_download_data.py
+++ b/tests/scripts/test_download_data.py
@@ -1,9 +1,7 @@
-from ukpn.scripts import construct_url, get_metadata
+from pathlib import Path
+from pprint import pprint
 
-
-def test_download_metadata():
-    cantubry_api_url = "https://ukpowernetworks.opendatasoft.com/api/records/1.0/search/?dataset=embedded-capacity-register&q=&facet=grid_supply_point&facet=licence_area&facet=energy_conversion_technology_1&facet=flexible_connection_yes_no&facet=connection_status&facet=primary_resource_type_group&refine.grid_supply_point=CANTERBURY+NORTH&refine.energy_conversion_technology_1=Photovoltaic"
-    download = get_metadata(api_url=cantubry_api_url, print_data=True)
+from ukpn.scripts import construct_url, get_metadata_from_ukpn_api, get_metadata_from_ukpn_xlsx
 
 
 def test_construct_url():
@@ -19,4 +17,12 @@ def test_construct_url():
         refiners=["grid_supply_point", "energy_conversion_technology_1"],
         refine_values=["CANTERBURY+NORTH", "Photovoltaic"],
     )
-    search_url = get_metadata(api_url=url, print_data=True)
+    data = get_metadata_from_ukpn_api(api_url=url, eastings="615378", northings="165525")
+
+
+def test_metadata_from_xlsx():
+    url = "https://media.umbraco.io/uk-power-networks/0dqjxaho/embedded-capacity-register.xlsx"
+    local_path = Path(r"/home/raj/ocf/pv-solar-farm-forecasting/tests/data")
+    df = get_metadata_from_ukpn_xlsx(
+        link_of_ecr_excel=url, local_path=local_path, eastings="615378", northings="165525"
+    )
diff --git a/ukpn/__init__.py b/ukpn/__init__.py
index cef144c..ef9e21c 100644
--- a/ukpn/__init__.py
+++ b/ukpn/__init__.py
@@ -1,2 +1 @@
-"""DataPipes"""
-from ukpn import scripts
+"""Import functions"""
diff --git a/ukpn/scripts/__init__.py b/ukpn/scripts/__init__.py
index 8eb37d7..b3a88d5 100644
--- a/ukpn/scripts/__init__.py
+++ b/ukpn/scripts/__init__.py
@@ -1,2 +1,2 @@
 """Import Functions"""
-from .download_data import construct_url, get_metadata
+from .download_data import construct_url, get_metadata_from_ukpn_api, get_metadata_from_ukpn_xlsx
diff --git a/ukpn/scripts/download_data.py b/ukpn/scripts/download_data.py
index 6060745..1959c00 100644
--- a/ukpn/scripts/download_data.py
+++ b/ukpn/scripts/download_data.py
@@ -1,20 +1,33 @@
 """This class is ued to retrieve data through API calls"""
 import json
 import logging
+import os
+from pathlib import Path
 from pprint import pprint
+from typing import Optional, Union
 
+import numpy as np
+import pandas as pd
 import requests
+from openpyxl import load_workbook
 
 logger = logging.getLogger(__name__)
 
 
-def get_metadata(api_url: str, print_data: bool = False):
+def get_metadata_from_ukpn_api(
+    api_url: str,
+    eastings: Optional[str] = None,
+    northings: Optional[str] = None,
+    print_data: bool = False,
+):
     """
     This function retrievs metadata through api calls
 
     Args:
         api_url: The api url link that emiits json format data
         print_data: Optional to choose printing the data
+        eastings: eastings value of the pv solar farm
+        northings: Northings value of the pv solar farm
     """
 
     response_api = requests.get(api_url)
@@ -31,10 +44,75 @@ def get_metadata(api_url: str, print_data: bool = False):
 
     # Parse the data into json format
     data_json = json.loads(raw_data)
-    data_first_record = data_json["records"][0]
+
+    # Getting all the records
+    data_records = data_json["records"]
+    first_record = data_json["records"][0]
 
     if print_data:
-        pprint(data_first_record)
+        pprint(first_record)
+
+    pv_site_dict_index = []
+    # From the list of dictionaries
+    for i in range(len(data_records)):
+        if isinstance(data_records[i], dict):
+            fields = data_records[i]["fields"]
+            if isinstance(fields, dict):
+                if (
+                    fields["location_x_coordinate_eastings_where_data_is_held"] == eastings
+                    and fields["location_y_coordinate_northings_where_data_is_held"] == northings
+                ) is True:
+                    pv_site_dict_index.append(i)
+
+    # CHecking if there are any sites matching the coordinates
+    if len(pv_site_dict_index) == 0:
+        logger.info(f"There are no PV sites matching with {eastings}")
+        return None
+    else:
+        # Getting the required data from Eastings and Northings
+        data_json = data_records[pv_site_dict_index[0]]
+
+        return data_json
+
+
+def get_metadata_from_ukpn_xlsx(
+    link_of_ecr_excel: str,
+    local_path: Path[Union, str],
+    eastings: Optional[str] = None,
+    northings: Optional[str] = None,
+):
+    """Download and load the ECR file from the link provided below
+
+    For direct download, opne this link-
+    https://media.umbraco.io/uk-power-networks/0dqjxaho/embedded-capacity-register.xlsx
+
+    Args:
+        link_of_ecr_excel: Link shown above
+        local_path: The folder where the file needs to get downloaded
+        eastings: eastings value of the pv solar farm
+        northings: Northings value of the pv solar farm
+    """
+    # Download and store the excel sheet in a location
+    resp = requests.get(link_of_ecr_excel)
+    local_path = os.path.join(local_path, "ecr.xlsx")
+    with open(local_path, "wb") as output:
+        output.write(resp.content)
+
+    # Read the excel sheet
+    wb = load_workbook(local_path, read_only=True, keep_links=False)
+
+    # The sheet need and its name according to UKPN is "Register Part 1"
+    file_name = "Register Part 1"
+    for text in wb.sheetnames:
+        if file_name in text:
+            df = pd.read_excel(local_path, sheet_name=text, skiprows=1)
+
+    # Filtering the data based on the eastings and northings provided
+    for text in df.columns:
+        if "Eastings" in text:
+            df = df[df[text] == np.float64(eastings)].reset_index()
+
+    return df
 
 
 def construct_url(

From 789f68391c37f8848f361523109388babd7c21f1 Mon Sep 17 00:00:00 2001
From: vrym2 <vrym2@student.le.ac.uk>
Date: Sun, 15 Jan 2023 23:07:39 +0000
Subject: [PATCH 6/6] function converting to netcdf

---
 .gitignore                          |  2 ++
 tests/scripts/test_download_data.py |  9 ++++++-
 ukpn/scripts/__init__.py            |  7 +++++-
 ukpn/scripts/download_data.py       | 37 +++++++++++++++++++++++++++--
 4 files changed, 51 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7d842d6..3efeabb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -139,3 +139,5 @@ environment.yml
 pydoc-markdown.yml
 README.md
 requirements.txt
+pv-solar-farm-forecasting/ukpn/scripts/resample_data.py
+ukpn/scripts/resample_data.py
diff --git a/tests/scripts/test_download_data.py b/tests/scripts/test_download_data.py
index c394502..31706d1 100644
--- a/tests/scripts/test_download_data.py
+++ b/tests/scripts/test_download_data.py
@@ -1,7 +1,12 @@
 from pathlib import Path
 from pprint import pprint
 
-from ukpn.scripts import construct_url, get_metadata_from_ukpn_api, get_metadata_from_ukpn_xlsx
+from ukpn.scripts import (
+    construct_url,
+    get_metadata_from_ukpn_api,
+    get_metadata_from_ukpn_xlsx,
+    metadata_df_to_netcdf,
+)
 
 
 def test_construct_url():
@@ -22,7 +27,9 @@ def test_construct_url():
 
 def test_metadata_from_xlsx():
     url = "https://media.umbraco.io/uk-power-networks/0dqjxaho/embedded-capacity-register.xlsx"
+    test_path = "/home/raj/ocf/pv-solar-farm-forecasting/tests/data/test.csv"
     local_path = Path(r"/home/raj/ocf/pv-solar-farm-forecasting/tests/data")
     df = get_metadata_from_ukpn_xlsx(
         link_of_ecr_excel=url, local_path=local_path, eastings="615378", northings="165525"
     )
+    ncxr = metadata_df_to_netcdf(path_to_ukpn_timeseries=test_path)
diff --git a/ukpn/scripts/__init__.py b/ukpn/scripts/__init__.py
index b3a88d5..03473f6 100644
--- a/ukpn/scripts/__init__.py
+++ b/ukpn/scripts/__init__.py
@@ -1,2 +1,7 @@
 """Import Functions"""
-from .download_data import construct_url, get_metadata_from_ukpn_api, get_metadata_from_ukpn_xlsx
+from .download_data import (
+    construct_url,
+    get_metadata_from_ukpn_api,
+    get_metadata_from_ukpn_xlsx,
+    metadata_df_to_netcdf,
+)
diff --git a/ukpn/scripts/download_data.py b/ukpn/scripts/download_data.py
index 1959c00..d927c40 100644
--- a/ukpn/scripts/download_data.py
+++ b/ukpn/scripts/download_data.py
@@ -9,8 +9,11 @@
 import numpy as np
 import pandas as pd
 import requests
+import xarray as xr
 from openpyxl import load_workbook
 
+from ukpn.scripts.resample_data import interpolation_pandas, load_csv_to_pandas
+
 logger = logging.getLogger(__name__)
 
 
@@ -66,7 +69,8 @@ def get_metadata_from_ukpn_api(
 
     # CHecking if there are any sites matching the coordinates
     if len(pv_site_dict_index) == 0:
-        logger.info(f"There are no PV sites matching with {eastings}")
+        logger.info(f"There are no PV sites matching with eastinngs: {eastings}")
+        logger.info(f"There are no PV sites matching with northings: {northings}")
         return None
     else:
         # Getting the required data from Eastings and Northings
@@ -80,7 +84,7 @@ def get_metadata_from_ukpn_xlsx(
     local_path: Path[Union, str],
     eastings: Optional[str] = None,
     northings: Optional[str] = None,
-):
+) -> pd.DataFrame:
     """Download and load the ECR file from the link provided below
 
     For direct download, opne this link-
@@ -115,6 +119,35 @@ def get_metadata_from_ukpn_xlsx(
     return df
 
 
+def metadata_df_to_netcdf(
+    path_to_ukpn_timeseries: str, input_dataframe: Optional[pd.DataFrame] = None
+) -> xr.Dataset:
+
+    # Loading the UKPN time series data into a dataframe
+    original_df = load_csv_to_pandas(path_to_file=path_to_ukpn_timeseries)
+
+    # Interpolating time series
+    interpolated_df = interpolation_pandas(original_df=original_df)
+
+    # Getting all the time series dates
+    interpolated_timeseries = interpolated_df.index.values
+    interpolated_data_values = interpolated_df[interpolated_df.columns[0]].values
+
+    # Creating an Xarray data array
+    final_xarray = xr.DataArray(
+        data=interpolated_data_values,
+        dims="time_utc",
+        coords={"time_utc": interpolated_timeseries},
+        attrs={"Description": " This Data array consists of time-series data from UKPN"},
+    )
+
+    # Getiing the column names
+    # TODO
+    required_column_names = ["Eastings", "Northings", "Maximum Export Capacity"]
+
+    return final_xarray
+
+
 def construct_url(
     dataset_name: str = "embedded-capacity-register",
     list_of_facets=None,