diff --git a/.travis.yml b/.travis.yml
index ee346469..8fb67e6d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -64,6 +64,9 @@ before_install:
# Install pygdal bindings compatible with dpkg based gdal libs
- pip install pygdal>="$(gdal-config --version)"
+ - pip install --progress-bar off --requirement requirements-test.txt
+ - pip install git+https://github.com/mapbox/rasterio.git@master
+
install:
- travis_retry pip install --upgrade pytest
- travis_retry pip install codecov cython boto3
diff --git a/check-code.sh b/check-code.sh
index 9501dc75..428c4d32 100755
--- a/check-code.sh
+++ b/check-code.sh
@@ -26,5 +26,5 @@ shellcheck ./**/*.sh
yamllint ./**/*.yaml
# Users can specify extra folders (ie. integration_tests) as arguments.
-py.test -r sx --cov digitalearthau --durations=5 digitalearthau scripts/**/*.py "$@"
+py.test -r sx --cov digitalearthau --durations=5 digitalearthau scripts_tests/*.py scripts/**/*.py "$@"
diff --git a/digitalearthau/testing/plugin.py b/digitalearthau/testing/plugin.py
index b73fdee1..d956a46f 100644
--- a/digitalearthau/testing/plugin.py
+++ b/digitalearthau/testing/plugin.py
@@ -1,14 +1,11 @@
import itertools
-
import os
-import pytest
from pathlib import Path
from typing import Iterable
-import datacube
-import digitalearthau
-import digitalearthau.system
+import pytest
from datacube.config import LocalConfig
+
from . import factories
# These are unavoidable in pytests due to fixtures
@@ -26,6 +23,8 @@
def pytest_report_header(config):
if config.getoption('verbose') > 0:
+ import datacube
+ import digitalearthau
return (
f"digitaleathau {digitalearthau.__version__}, "
f"opendatacube {datacube.__version__}"
diff --git a/requirements-test.txt b/requirements-test.txt
new file mode 100644
index 00000000..80cfbee8
--- /dev/null
+++ b/requirements-test.txt
@@ -0,0 +1,2 @@
+colorama
+xmlschema
diff --git a/scripts/index_aster_lpdaac.py b/scripts/index_aster_lpdaac.py
new file mode 100755
index 00000000..39b4ed8b
--- /dev/null
+++ b/scripts/index_aster_lpdaac.py
@@ -0,0 +1,641 @@
+#!/usr/bin/env python
+# language=rst
+"""
+ASTER L1T Indexing
+==================
+
+This program allows indexing the Australia region ASTER (Advanced Spaceborne Thermal
+Emission and Reflection Radiometer) L1T data stored on the NCI into an ODC Database.
+
+Background
+----------
+
+ASTER data consists of visible and near infrared (VNIR) frequencies
+with three bands at 15-meter resolution, short-wave infrared (SWIR)
+frequencies with six bands at 30-meter resolution, and thermal infrared (TIR)
+wavelength with five bands at 90-meter resolution.
+
+Further details of AST_L1T data is available from
+https://lpdaac.usgs.gov/dataset_discovery/aster/aster_products_table/ast_l1t_v003
+
+The ASTER L1T data product is derived from ASTER Level 1A data that has been
+geometrically corrected and reprojected to a north-up Universal Transverse Mercator (UTM)
+projection.
+(Please see: https://lpdaac.usgs.gov/sites/default/files/public/elearning/ASTER_L1T_Tutorial.html)
+
+Further, depending on whether the following modes are enabled, dataset may present
+different bands.
+
+::
+
+ ASTEROBSERVATIONMODE.1=VNIR1, ON/OFF
+ ASTEROBSERVATIONMODE.2=VNIR2, ON/OFF
+ ASTEROBSERVATIONMODE.3=SWIR, ON/OFF
+ ASTEROBSERVATIONMODE.4=TIR, ON/OFF
+
+Regarding `SWIR` bands please note the following advice from
+https://asterweb.jpl.nasa.gov/swir-alert.asp
+
+::
+
+ ASTER SWIR detectors are no longer functioning due to anomalously high SWIR detector
+ temperatures. ASTER SWIR data acquired since April 2008 are not useable, and
+ show saturation of values and severe striping. All attempts to bring the SWIR bands
+ back to life have failed, and no further action is envisioned. -- January 12, 2009
+
+Usage
+-----
+
+This script has three modes, all of which need a directory of ASTER_L1T HDF data files.
+
+ 1. Create the product definition in an ODC database
+ 2. Generate VRT files with GDAL usable geospatial information
+ 3. Index Datasets into an ODC Database
+
+The input data is stored in sets of hdf files in `/g/data/v10/ASTER_AU/`.
+
+
+
+::
+
+ ./index_nci_aster_lpdaac.py create-product --product aster_l1t_vnir /g/data/v10/ASTER_AU/2018.01.01
+ ./index_nci_aster_lpdaac.py create-vrt --product aster_l1t_vnir /g/data/v10/ASTER_AU/2018.01.01
+ ./index_nci_aster_lpdaac.py index-data --product aster_l1t_vnir /g/data/v10/ASTER_AU/2018.01.01
+
+::
+
+ for i in /g/data/v10/ASTER_AU/*; do
+ ./index_nci_aster_lpdaac.py --config aster_lpdacc.conf index-data
+ --product aster_l1t_vnir $i
+ done
+
+Test Database Creation Steps
+----------------------------
+
+::
+
+ psql -h agdcdev-db.nci.org.au
+ CREATE DATABASE aster_lpdaac WITH OWNER agdc_admin;
+ GRANT TEMPORARY, CONNECT ON DATABASE aster_lpdaac to public;
+
+Example ODC Configuration: ``aster_lpdaac.conf``
+
+::
+
+ [datacube]
+ db_hostname: agdcdev-db.nci.org.au
+ db_port: 6432
+ db_database: aster_lpdaac
+
+::
+
+ datacube --config aster_lpdaac.conf system init
+
+
+Implementation Details
+----------------------
+
+The ODC Index datasets points to to the corresponding VRT files in order to access
+raster measurement data. The VRT file in turn points to the original `.hdf` file
+through `absolute path names` (Relative path names are not working at the moment,
+and it is advised that the VRT files must be generated for the final resident
+location of `.hdf` files.
+
+Each VRT file specify consistent set of bands from ASTER as a single product.
+For example, `vnir` sensors correspond to `aster_l1t_vnir` product, `tir`
+sensors correspond to `aster_l1t_tir` product, and `swir` sensors correspond
+to `aster_l1t_swir` product. The corresponding definitions of these product
+names and corresponding bands (with band names as identified in the original
+`hdf` file) are defined in the constant `PRODUCTS` in this script.
+
+It attempts to create stable UUIDs for the generated Datasets,
+based on the file path and modification time of the underlying HDF file Data
+as well as product name.
+
+The VRT file for each product and dataset YAML file for each product is written
+onto the same directory that contains the original `.hdf` file.
+"""
+import functools
+import json
+import logging
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+
+import click
+import numpy as np
+import rasterio
+import yaml
+from osgeo import gdal, osr
+
+from datacube import Datacube
+from datacube.index.hl import Doc2Dataset
+
+LOG = logging.getLogger(__name__)
+
+PRODUCTS = {
+ 'aster_l1t_vnir': {'bands': {'ImageData2', 'ImageData1', 'ImageData3N'},
+ 'include_only': {'ASTER', 'CORRECT', 'EAST', 'FLY', 'GAIN', 'INPUT', 'LOWER', 'MAP',
+ 'NORTH', 'NUMBERGCP', 'ORBIT', 'POINT', 'QAPERCENT', 'RECURRENT', 'SCENE',
+ 'SIZE', 'SOLAR', 'SOUTH', 'UPPER', 'UTMZONENUMBER', 'WEST'}},
+ 'aster_l1t_swir': {'bands': {'ImageData4', 'ImageData5', 'ImageData6', 'ImageData7', 'ImageData8', 'ImageData9'},
+ 'include_only': {'ASTER', 'CORRECT', 'EAST', 'FLY', 'GAIN', 'INPUT', 'LOWER', 'MAP',
+ 'NORTH', 'NUMBERGCP', 'ORBIT', 'POINT', 'QAPERCENT', 'RECURRENT', 'SCENE',
+ 'SIZE', 'SOLAR', 'SOUTH', 'UPPER', 'UTMZONENUMBER', 'WEST'}},
+ 'aster_l1t_tir': {'bands': {'ImageData10', 'ImageData11', 'ImageData12', 'ImageData13', 'ImageData14'},
+ 'exclude': {'BAND', 'CALENDAR', 'COARSE', 'FUTURE', 'GEO', 'HDF', 'IDENT', 'IMAGE',
+ 'PGE', 'PROCESSED', 'PROCESSING', 'RADIO', 'RECEIVING', 'REPROCESSING', 'SOURCE',
+ 'TIME', 'UTMZONECODE'}}
+}
+
+
+@click.group(help=__doc__)
+@click.option('--config', '-c', help="Pass the configuration file to access the database",
+ type=click.Path(exists=True))
+@click.pass_context
+def cli(ctx, config):
+ """ Used to pass the datacube index to functions via click."""
+ ctx.obj = Datacube(config=config).index
+
+
+@cli.command()
+@click.argument('path')
+@click.option('--product', help='Which ASTER product? vnir, swir, or tir')
+def create_vrt(path, product):
+ file_paths = find_lpdaac_file_paths(Path(path))
+
+ for file_path in file_paths:
+ print(file_path)
+ bands = selected_bands(file_path, product)
+ if bands:
+ vrt = generate_vrt(file_path, bands)
+ with open(vrt_file_path(file_path, product), 'w') as fd:
+ fd.write(vrt)
+ else:
+ logging.error("File does not have bands of this product: %s", file_path)
+
+
+@cli.command()
+@click.argument('path')
+@click.option('--product', help='Which ASTER product? vnir, swir, or tir')
+@click.pass_obj
+def show(index, path, product):
+ file_paths = find_lpdaac_file_paths(Path(path))
+
+ for file_path in file_paths:
+ print(file_path)
+ bands = selected_bands(file_path, product)
+
+ if bands:
+ doc = generate_lpdaac_doc(file_path, product)
+ print_dict(doc)
+ else:
+ logging.error("File does not have bands of this product: %s", file_path)
+
+
+@cli.command()
+@click.argument('path')
+@click.option('--product', help='Which ASTER product? vnir, swir, or tir')
+@click.pass_obj
+def create_product(index, path, product):
+ file_paths = find_lpdaac_file_paths(Path(path))
+
+ # Find a file which has the specified bands of this product
+ file_path = None
+ for file_path_ in file_paths:
+ try:
+ _ = selected_bands(file_path_, product)
+ except AssertionError:
+ pass
+ else:
+ file_path = file_path_
+ break
+
+ if file_path:
+ measurements = raster_to_measurements(file_path, product)
+ for measure in measurements:
+ measure.pop('path') # This is not needed here
+ print_dict(measurements)
+ product_def = generate_lpdaac_defn(measurements, product)
+ print_dict(product_def)
+
+ print(index)
+ product = index.products.from_doc(product_def)
+ print(product)
+ indexed_product = index.products.add(product)
+ print(indexed_product)
+ else:
+ logging.error("No file found having the specified bands of this product: %s", product)
+
+
+@cli.command()
+@click.argument('path')
+@click.option('--product', help='Which ASTER product? vnir, swir, or tir')
+@click.pass_obj
+def index_data(index, path, product):
+ file_paths = find_lpdaac_file_paths(Path(path))
+
+ resolver = Doc2Dataset(index)
+ for file_path in file_paths:
+ print(file_path)
+
+ bands = selected_bands(file_path, product)
+ if bands:
+ vrt_path = vrt_file_path(file_path, product)
+
+ if vrt_path.exists():
+
+ doc = generate_lpdaac_doc(file_path, product)
+ print_dict(doc)
+ dataset, err = resolver(doc, vrt_path.as_uri())
+
+ print(dataset)
+ if err is not None:
+ logging.error("%s", err)
+ try:
+ index.datasets.add(dataset)
+ except Exception as e:
+ logging.error("Couldn't index %s", file_path)
+ logging.exception("Exception", e)
+ else:
+ with open(yaml_file_path(file_path, product), 'w') as yaml_file:
+ yaml.safe_dump(doc, yaml_file)
+
+ else:
+ logging.error("VRT file not found: %s", vrt_path)
+ else:
+ logging.error("File does not have bands of this product: %s", file_path)
+
+
+def vrt_file_path(file_path, product):
+ return file_path.with_name(f'{file_path.stem}_{product}.vrt')
+
+
+def yaml_file_path(file_path, product):
+ return file_path.with_name(f'{file_path.stem}_{product}.yaml')
+
+
+def print_dict(doc):
+ print(json.dumps(doc, indent=4, sort_keys=True, cls=NumpySafeEncoder))
+
+
+def find_lpdaac_file_paths(path: Path):
+ """
+ Return a list of hdf file path objects.
+
+ :param path:
+ :return: A generator of path objects.
+ """
+ for afile in path.iterdir():
+ if afile.suffix == '.hdf' and afile.stem[:7] == 'AST_L1T':
+ yield afile
+
+
+def raster_to_measurements(file_path, product):
+ bands = selected_bands(file_path, product)
+
+ measurements = []
+ for index, band in enumerate(bands):
+ measure = dict(name=str(index + 1))
+ measure['path'] = vrt_file_path(file_path, product).name
+
+ with rasterio.open(band) as band_:
+ measure['dtype'] = str(band_.dtypes[0])
+ measure['nodata'] = band_.nodatavals[0] or 0
+ measure['units'] = str(band_.units[0] or 1)
+ measure['aliases'] = [band.split(':')[-1]]
+ measurements.append(measure)
+ return measurements
+
+
+@functools.lru_cache(maxsize=None)
+def selected_bands(file_path, product):
+ band_suffixes = PRODUCTS[product]['bands']
+
+ ds = gdal.Open(str(file_path), gdal.GA_ReadOnly)
+ sub_datasets = ds.GetSubDatasets()
+ # Check the last field of the band name: something like 'ImageDataXX'
+
+ available_bands_of_product = [band[0] for band in sub_datasets if band[0].split(':')[-1] in band_suffixes]
+
+ assert len(available_bands_of_product) == len(band_suffixes)
+
+ available_bands_of_product.sort(key=lambda x: x.split(':')[-1])
+
+ return available_bands_of_product
+
+
+def generate_lpdaac_defn(measurements, product):
+ """
+ Generate the product def for the product.
+ """
+ return {
+ 'name': product,
+ 'metadata_type': 'eo',
+ 'metadata': {
+ 'product_type': product,
+ 'platform': {'code': 'ASTER'},
+ 'version': 1,
+ 'coverage': 'aust'
+ },
+ 'description': 'ASTER L1T - Precision Terrain Corrected Registered At-Sensor Radiance data',
+ 'measurements': measurements
+ }
+
+
+def generate_lpdaac_doc(file_path, product):
+ modification_time = file_path.stat().st_mtime
+
+ unique_ds_uri = f'{file_path.as_uri()}#{modification_time}#{product}'
+
+ left, bottom, right, top = compute_extents(file_path)
+ spatial_ref = infer_aster_srs(file_path)
+ geo_ref_points = {
+ 'ul': {'x': left, 'y': top},
+ 'ur': {'x': right, 'y': top},
+ 'll': {'x': left, 'y': bottom},
+ 'lr': {'x': right, 'y': bottom},
+ }
+
+ acquisition_time = get_acquisition_time(file_path)
+ measurements = raster_to_measurements(file_path, product)
+
+ the_format = 'VRT'
+
+ doc = {
+ 'id': str(uuid.uuid5(uuid.NAMESPACE_URL, unique_ds_uri)),
+ 'product_type': product,
+ 'creation_dt': str(datetime.fromtimestamp(modification_time)),
+ 'platform': {'code': 'ASTER'},
+ 'extent': {
+ 'from_dt': str(acquisition_time),
+ 'to_dt': str(acquisition_time),
+ 'coord': geo_ref_points
+ },
+ 'format': {'name': the_format},
+ 'grid_spatial': {
+ 'projection': {
+ 'geo_ref_points': geo_ref_points,
+ 'spatial_reference': spatial_ref,
+ }
+ },
+ 'image': {
+ 'bands': {
+ measure['name']: {
+ 'path': measure['path'],
+ 'layer': str(index + 1)
+ } for index, measure in enumerate(measurements)
+ }
+ },
+ 'version': 1,
+ 'coverage': 'aust',
+ 'lineage': {'source_datasets': {}},
+ 'further_info': filter_metadata(file_path, product)
+ }
+ return doc
+
+
+def infer_aster_srs(file_path: Path):
+ """
+ Compute SRS based on metadata (UTMZONENUMBER and NORTHBOUNDINGCOORDINATE) in the file and
+ generic osr.SpatialReference data.
+ Reference:
+ https://git.earthdata.nasa.gov/projects/LPDUR/repos/aster-l1t/raw/ASTERL1T_hdf2tif.py?at=refs%2Fheads%2Fmaster
+ """
+
+ ds = gdal.Open(str(file_path), gdal.GA_ReadOnly)
+ meta = ds.GetMetadata()
+
+ # Define UL, LR, UTM zone
+ utm = np.int(meta['UTMZONENUMBER'])
+ n_s = np.float(meta['NORTHBOUNDINGCOORDINATE'])
+
+ # Create UTM zone code numbers
+ utm_n = [i + 32600 for i in range(60)]
+ utm_s = [i + 32700 for i in range(60)]
+
+ # Define UTM zone based on North or South
+ if n_s < 0:
+ utm_zone = utm_s[utm]
+ else:
+ utm_zone = utm_n[utm]
+
+ srs = osr.SpatialReference()
+ srs.ImportFromEPSG(utm_zone)
+
+ return srs.ExportToWkt()
+
+
+def generate_vrt(file_path: Path, bands):
+ """
+ Generate a VRT file for a given file
+ The following tags did not show visual impact on raster bands when rendering:
+ 1. Top level GeoTransform
+ """
+
+ assert bands
+
+ x_size, y_size = get_raster_sizes(bands)
+ geo_transform = compute_geo_transform(file_path, bands)
+
+ return """\
+
+ {srs}
+ {geo}
+ {raster_bands}
+
+ """.format(x=x_size, y=y_size, srs=infer_aster_srs(file_path),
+ geo=', '.join(('%1.16e' % v for v in geo_transform)),
+ raster_bands=get_raster_bands_vrt(bands))
+
+
+def get_raster_bands_vrt(bands):
+ """
+ Compute the tags for each band ang return them as a single string
+
+ :param bands: GDAL SubDatasets
+ """
+
+ raster_band_template = """\
+
+ {nodata}
+
+ {band_name}
+
+
+ """
+
+ raster_bands = ''
+ for index, band in enumerate(bands):
+ sdt = gdal.Open(band, gdal.GA_ReadOnly)
+ data_type = gdal.GetDataTypeName(sdt.GetRasterBand(1).DataType)
+ raster_bands += raster_band_template.format(dtype=data_type, number=str(index + 1),
+ nodata=0,
+ band_name=band)
+ return raster_bands
+
+
+def get_raster_sizes(bands):
+ """
+ Raster sizes of different bands are different. So compute the max of x axis
+ and max of y axis
+
+ :param bands: GDAL SubDataset names
+ """
+
+ x_size = []
+ y_size = []
+ for band in bands:
+ sdt = gdal.Open(band, gdal.GA_ReadOnly)
+ x_size.append(sdt.RasterXSize)
+ y_size.append(sdt.RasterYSize)
+ return max(x_size), max(y_size)
+
+
+def get_acquisition_time(file_path):
+ dt = gdal.Open(str(file_path), gdal.GA_ReadOnly)
+ meta = dt.GetMetadata()
+ date_string = meta['CALENDARDATE']
+
+ time_ = meta['TIMEOFDAY']
+
+ return datetime(year=int(date_string[:4]), month=int(date_string[4:6]), day=int(date_string[6:8]),
+ hour=int(time_[:2]), minute=int(time_[2:4]), second=int(time_[4:6]),
+ microsecond=int(time_[6:12]), tzinfo=timezone.utc)
+
+
+def compute_geo_transform(file_path, bands):
+ """
+ Compute the geo transform for the given bands. If the geo transform is not same
+ for all the given bands an assert error is raised.
+ """
+
+ # pylint: disable=round-builtin
+
+ dt = gdal.Open(str(file_path), gdal.GA_ReadOnly)
+ meta = dt.GetMetadata()
+
+ # Define UL, LR, UTM zone
+ ul = [np.float(x) for x in meta['UPPERLEFTM'].split(', ')]
+ lr = [np.float(x) for x in meta['LOWERRIGHTM'].split(', ')]
+ n_s = np.float(meta['NORTHBOUNDINGCOORDINATE'])
+
+ # Define extent and provide offset for UTM South zones
+ if n_s < 0:
+ ul_y = ul[0] + 10000000
+ ul_x = ul[1]
+
+ lr_y = lr[0] + 10000000
+ lr_x = lr[1]
+
+ # Define extent for UTM North zones
+ else:
+ ul_y = ul[0]
+ ul_x = ul[1]
+
+ lr_y = lr[0]
+ lr_x = lr[1]
+
+ # We want all the bands to be consistent in terms of data type,
+ # raster number of columns and rows
+ band_info = dict()
+ band_info['ncol'] = set()
+ band_info['nrow'] = set()
+ band_info['data_type'] = set()
+ for band in bands:
+ band_ds = gdal.Open(band, gdal.GA_ReadOnly)
+ data_type = gdal.GetDataTypeName(band_ds.GetRasterBand(1).DataType)
+ if data_type == 'Byte':
+ band_data = band_ds.ReadAsArray().astype(np.byte)
+ elif data_type == 'UInt16':
+ band_data = band_ds.ReadAsArray().astype(np.uint16)
+ else:
+ raise ValueError('Unexpected band type')
+
+ # Query raster dimensions
+ ncol, nrow = band_data.shape
+
+ band_info['data_type'].add(data_type)
+ band_info['ncol'].add(ncol)
+ band_info['nrow'].add(nrow)
+
+ assert len(band_info['data_type']) == 1 and len(band_info['ncol']) == 1 and len(band_info['nrow']) == 1
+
+ # Compute resolutions
+ y_res = -1 * round((max(ul_y, lr_y) - min(ul_y, lr_y)) / band_info['ncol'].pop())
+ x_res = round((max(ul_x, lr_x) - min(ul_x, lr_x)) / band_info['nrow'].pop())
+
+ # Define UL x and y coordinates based on spatial resolution
+ ul_yy = ul_y - (y_res / 2)
+ ul_xx = ul_x - (x_res / 2)
+
+ return ul_xx, x_res, 0., ul_yy, 0., y_res
+
+
+def compute_extents(file_path):
+ """
+ Compute the union of extents of individual raster bands.
+ https://git.earthdata.nasa.gov/projects/LPDUR/repos/aster-l1t/raw/ASTERL1T_hdf2tif.py?at=refs%2Fheads%2Fmaster
+ """
+ dt = gdal.Open(str(file_path), gdal.GA_ReadOnly)
+ meta = dt.GetMetadata()
+
+ # Define LL, UR
+ ll = [np.float(x) for x in meta['LOWERLEFTM'].split(', ')]
+ ur = [np.float(x) for x in meta['UPPERRIGHTM'].split(', ')]
+ n_s = np.float(meta['NORTHBOUNDINGCOORDINATE'])
+ # Define extent and provide offset for UTM South zones
+ if n_s < 0:
+ ll_y = ll[0] + 10000000
+ ll_x = ll[1]
+
+ ur_y = ur[0] + 10000000
+ ur_x = ur[1]
+
+ # Define extent for UTM North zones
+ else:
+ ll_y = ll[0]
+ ll_x = ll[1]
+
+ ur_y = ur[0]
+ ur_x = ur[1]
+
+ # Do we need to offset pixel center by half of pixel resolution as in the above reference?
+ # Note: pixel resolution vary per band
+
+ return ll_x, ll_y, ur_x, ur_y
+
+
+def filter_metadata(file_path, product):
+ """
+ Filter the metadata dictionary based on what is to include or exclude defined by
+ the global PRODUCTS
+ """
+
+ dt = gdal.Open(str(file_path), gdal.GA_ReadOnly)
+ meta = dt.GetMetadata()
+ items = set()
+ if PRODUCTS[product].get('include_only'):
+ for prefix in PRODUCTS[product]['include_only']:
+ items.update({meta_item for meta_item in meta if meta_item.startswith(prefix)})
+ elif PRODUCTS[product].get('exclude'):
+ for prefix in PRODUCTS[product]['exclude']:
+ items.update({meta_item for meta_item in meta})
+ items.difference({meta_item for meta_item in meta if meta_item.startswith(prefix)})
+ return {item: meta[item] for item in items}
+
+
+class NumpySafeEncoder(json.JSONEncoder):
+ def default(self, obj):
+ if isinstance(obj, np.integer):
+ return int(obj)
+ elif isinstance(obj, np.floating):
+ return float(obj)
+ elif isinstance(obj, np.ndarray):
+ return obj.tolist()
+ else:
+ return super(NumpySafeEncoder, self).default(obj)
+
+
+if __name__ == '__main__':
+ cli()
diff --git a/scripts/index_aster_lpdaac.sh b/scripts/index_aster_lpdaac.sh
new file mode 100755
index 00000000..87cb3f77
--- /dev/null
+++ b/scripts/index_aster_lpdaac.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+# The argument is the config file location
+# e.g.
+# ./index_aster_lpdaac.sh ../../../dsg547_dev.conf
+#./index_aster_lpdaac.py --config $1 index-data --product aster_l1t_vnir /g/data/v10/ASTER_AU/2018.01.01
+
+# Remeber to do create product;
+# python ./index_aster_lpdaac.py
+# --config /g/data/u46/users/dsg547/dsg547_dev.conf
+# create-product --product aster_l1t_vnir
+# /g/data/v10/ASTER_AU/2018.01.01
+
+module use /g/data/v10/public/modules/modulefiles
+module load dea
+
+for i in /g/data/u46/users/aj9439/aster/tests/*; do
+ ./index_aster_lpdaac.py --config "$1" create-vrt --product aster_l1t_swir "$i"
+ ./index_aster_lpdaac.py --config "$1" index-data --product aster_l1t_swir "$i"
+done
\ No newline at end of file
diff --git a/scripts_tests/data/aster/2007.12.27/shrunk.hdf.xz b/scripts_tests/data/aster/2007.12.27/shrunk.hdf.xz
new file mode 100644
index 00000000..915c5e9b
Binary files /dev/null and b/scripts_tests/data/aster/2007.12.27/shrunk.hdf.xz differ
diff --git a/scripts_tests/data/aster/2017.12.10/shrunk.hdf.xz b/scripts_tests/data/aster/2017.12.10/shrunk.hdf.xz
new file mode 100644
index 00000000..c198a216
Binary files /dev/null and b/scripts_tests/data/aster/2017.12.10/shrunk.hdf.xz differ
diff --git a/scripts_tests/data/aster/hdf_shrinker.py b/scripts_tests/data/aster/hdf_shrinker.py
new file mode 100644
index 00000000..4a300e42
--- /dev/null
+++ b/scripts_tests/data/aster/hdf_shrinker.py
@@ -0,0 +1,51 @@
+# Thanks http://fhs.github.io/pyhdf/modules/SD.html#programming-models
+import lzma
+import shutil
+from pathlib import Path
+
+import click
+import numpy as np
+from pyhdf.SD import SD, SDC
+
+
+class PathlibPath(click.Path):
+ """A Click path argument that returns a pathlib Path, not a string"""
+
+ def convert(self, value, param, ctx):
+ return Path(super().convert(value, param, ctx))
+
+
+@click.command()
+@click.argument('in_file_name', type=click.Path(exists=True, dir_okay=False, readable=True))
+@click.argument('out_file_name', type=click.Path(exists=False, writable=True))
+def shrink_inplace(in_file_name, out_file_name):
+ """
+ Shrink a HDF4 File
+
+ Replace all data values with 0, and then compress with LZMA.
+
+ Useful for generating tiny test data files that still contain all the metadata.
+ """
+ shutil.copy(in_file_name, out_file_name)
+
+ in_file = SD(out_file_name, SDC.WRITE)
+
+ for dataset_name, dataset_def in in_file.datasets().items():
+ coord_axis, shape, dataset_type, index = dataset_def
+ print(dataset_name, dataset_def)
+
+ dataset = in_file.select(dataset_name)
+
+ print(dataset[:])
+ dataset[:] = np.zeros(shape, dataset.get().dtype)
+
+ dataset.endaccess()
+
+ in_file.end()
+
+ with open(out_file_name, 'rb') as fin, lzma.open(out_file_name + '.xz', 'wb') as fout:
+ fout.write(fin.read())
+
+
+if __name__ == '__main__':
+ shrink_inplace()
diff --git a/scripts_tests/data/aster/vrt_schema.xsd b/scripts_tests/data/aster/vrt_schema.xsd
new file mode 100644
index 00000000..2000996e
--- /dev/null
+++ b/scripts_tests/data/aster/vrt_schema.xsd
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/scripts_tests/test_aster.py b/scripts_tests/test_aster.py
new file mode 100644
index 00000000..7f655b04
--- /dev/null
+++ b/scripts_tests/test_aster.py
@@ -0,0 +1,134 @@
+import lzma
+from pathlib import Path
+
+import pytest
+
+from datacube.index.hl import Doc2Dataset
+from scripts.index_aster_lpdaac import generate_lpdaac_defn, generate_lpdaac_doc, generate_vrt
+from scripts.index_aster_lpdaac import selected_bands, raster_to_measurements, vrt_file_path
+from scripts.index_aster_lpdaac import PRODUCTS
+
+pytest_plugins = "digitalearthau.testing.plugin"
+
+SCRIPTS_TEST_DATA = Path(__file__).parent / 'data'
+
+
+def uncompress_xz(in_file, dest_file):
+ """
+ Uncompress the lzma compressed file
+ """
+ with lzma.open(in_file, 'rb') as fin, open(dest_file, 'wb') as fout:
+ fout.write(fin.read())
+
+
+@pytest.fixture
+def aster_files(tmp_path):
+ """
+ Return the locations of aster HDF files
+ """
+ dest_file1 = tmp_path / 'AST_L1T_00312102017022934_20171211115854_25347.hdf'
+ uncompress_xz(SCRIPTS_TEST_DATA / 'aster' / '2017.12.10' / 'shrunk.hdf.xz',
+ dest_file1)
+
+ dest_file2 = tmp_path / 'AST_L1T_00312272007012132_20150522121457_113468.hdf'
+ uncompress_xz(SCRIPTS_TEST_DATA / 'aster' / '2007.12.27' / 'shrunk.hdf.xz',
+ dest_file2)
+
+ return dest_file1, dest_file2
+
+
+def products_present(aster_file_path):
+ """
+ Return products present in a given aster file
+ """
+ products = []
+ for product in PRODUCTS:
+ try:
+ # See if band extraction fails for this product
+ _ = selected_bands(aster_file_path, product)
+
+ products.append(product)
+ except AssertionError:
+ pass
+ return products
+
+
+def test_product_defs(aster_files):
+ """
+ Test product definitions generated for given files
+ """
+ for aster_file in aster_files:
+ for product in products_present(aster_file):
+ measurements = raster_to_measurements(aster_file, product)
+ for measure in measurements:
+ measure.pop('path') # This is not needed here
+ product_def = generate_lpdaac_defn(measurements, product)
+
+ assert product_def['metadata']['product_type'] == product
+ # Check all expected band names ['1', '2', '3', etc]
+ num_bands = len(PRODUCTS[product]['bands'])
+ assert all([a == b for a, b in zip([str(band_num)
+ for band_num in range(1, num_bands + 1)],
+ [m['name'] for m in product_def['measurements']])])
+
+
+def test_vrt_generation(aster_files):
+ """
+ Test generated VRT strings for given files
+ """
+ import xml.etree.ElementTree as ET
+ import xmlschema
+
+ for aster_file in aster_files:
+ for product in products_present(aster_file):
+ bands = selected_bands(aster_file, product)
+ vrt = generate_vrt(aster_file, bands)
+
+ # Is it valid VRT schema
+ xsd = xmlschema.XMLSchema(str(SCRIPTS_TEST_DATA / 'aster/vrt_schema.xsd'))
+ xsd.validate(vrt)
+
+ tree = ET.fromstring(vrt)
+
+ assert len(tree.findall('VRTRasterBand')) == len(PRODUCTS[product]['bands'])
+ sources = tree.findall('SourceFilename')
+ for source in sources:
+ parts = source.text.split(':')
+ # We want the source path name to be absolute
+ assert aster_file == Path(parts[2])
+ assert parts[4] in PRODUCTS[product]['bands']
+
+
+def test_dataset_doc(aster_files):
+ """
+ Test dataset docs generated for given files.
+ """
+ for aster_file in aster_files:
+ for product in products_present(aster_file):
+ doc = generate_lpdaac_doc(aster_file, product)
+ assert doc['grid_spatial']['projection']['spatial_reference']
+ assert len(doc['image']['bands']) == len(PRODUCTS[product]['bands'])
+
+
+def test_dataset_indexing(dea_index, aster_files):
+ """
+ Test datacube indexing for each product for the given files
+ """
+
+ for aster_file in aster_files:
+ for product in products_present(aster_file):
+ vrt_path = vrt_file_path(aster_file, product)
+ measurements = raster_to_measurements(aster_file, product)
+ for measure in measurements:
+ measure.pop('path') # This is not needed here
+ product_def = generate_lpdaac_defn(measurements, product)
+ product_ = dea_index.products.from_doc(product_def)
+ indexed_product = dea_index.products.add(product_)
+
+ assert indexed_product
+
+ doc = generate_lpdaac_doc(aster_file, product)
+ resolver = Doc2Dataset(dea_index)
+ dataset, _ = resolver(doc, vrt_path.as_uri())
+ print('the dataset to be indexed: ', dataset)
+ dea_index.datasets.add(dataset)
diff --git a/scripts/test_index_nci_modis_lpdaac.py b/scripts_tests/test_index_nci_modis_lpdaac.py
similarity index 93%
rename from scripts/test_index_nci_modis_lpdaac.py
rename to scripts_tests/test_index_nci_modis_lpdaac.py
index 9a031a83..33698d21 100644
--- a/scripts/test_index_nci_modis_lpdaac.py
+++ b/scripts_tests/test_index_nci_modis_lpdaac.py
@@ -1,7 +1,7 @@
from pathlib import Path
from datetime import datetime
-from index_nci_modis_lpdaac import *
+from scripts.index_nci_modis_lpdaac import *
def test_modis_path_to_date_range():
diff --git a/setup.py b/setup.py
index 7c9a66ce..9c5b1612 100755
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@
license='Apache License 2.0',
packages=find_packages(
- exclude=('tests', 'tests.*',
+ exclude=('scripts_tests', 'scripts_tests.*',
'integration_tests', 'integration_tests.*')
),
package_data={
@@ -37,7 +37,7 @@
'pytest-runner'
],
install_requires=[
- 'colorama',
+ 'colorama', # required to work around a structlog issue
'click>=5.0',
'datacube[celery]',
'python-dateutil',
@@ -49,7 +49,6 @@
'boltons',
'lxml',
'pydash',
- 'colorama', # required to work around a structlog issue
],
tests_require=tests_require,
extras_require=extras_require,
@@ -68,6 +67,8 @@
'dea-stacker = digitalearthau.stacker:cli',
'dea-system = digitalearthau.system:cli',
'dea-test-env = digitalearthau.test_env:cli',
- ]
+ ],
+ # This might be a bad idea, it pulls in all of datacube whenever pytest is run
+ #'pytest11': ['digitalearthau = digitalearthau.testing.plugin']
},
)