diff --git a/Cargo.lock b/Cargo.lock index 5ccf5e06e..caace1a24 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2202,12 +2202,24 @@ dependencies = [ "thiserror 2.0.12", ] +[[package]] +name = "gdal-src" +version = "0.2.0+3.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dfe065be4c59457d934487706b6ffa7fe91d238e20e42b8fae03f47a291f684" +dependencies = [ + "cmake", + "link-cplusplus", + "proj-sys", +] + [[package]] name = "gdal-sys" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "febef67dc08a956a9ecb04de2b40dbd15ad56be49421aad9ae0cdcbe9a24166c" dependencies = [ + "gdal-src", "pkg-config", "semver", ] diff --git a/Cargo.toml b/Cargo.toml index 961386944..50615840c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -102,7 +102,9 @@ float-cmp = "0.10" futures = "0.3" futures-util = "0.3" gdal = "0.18" -gdal-sys = "0.11" +gdal-sys = { version = "0.11", features = ["bundled"] } +gdal-src = { version = "0.2.0+3.10.2", features = ["driver_gpkg"] } + # when changing geo version also adapt the Cargo.toml in the expression "deps-workspace"! geo = "0.30.0" geo-rand = { git = "https://github.com/lelongg/geo-rand.git", branch = "dependabot/cargo/geo-0.30.0"} # TODO: revert back to "0.5" when it is released diff --git a/operators/src/adapters/raster_subquery/raster_subquery_adapter.rs b/operators/src/adapters/raster_subquery/raster_subquery_adapter.rs index 64d6a6bc6..061a0a7f2 100644 --- a/operators/src/adapters/raster_subquery/raster_subquery_adapter.rs +++ b/operators/src/adapters/raster_subquery/raster_subquery_adapter.rs @@ -23,6 +23,7 @@ use geoengine_datatypes::{ primitives::TimeInstance, raster::{Pixel, RasterTile2D, TileInformation}, }; +use log::debug; use pin_project::pin_project; use rayon::ThreadPool; use std::marker::PhantomData; @@ -289,6 +290,7 @@ where } Ok(None) => this.state.set(StateInner::ReturnResult(None)), Err(e) => { + debug!(">>>> Tile query rectangle not valid: {e}"); this.state.set(StateInner::Ended); return Poll::Ready(Some(Err(e))); } @@ -382,6 +384,7 @@ where // If there is a tile, set the current_time_end option. if let Some(tile) = &tile_option { + debug_assert!(tile.time.end() > tile.time.start()); debug_assert!(*this.current_time_start >= tile.time.start()); *this.current_time_end = Some(tile.time.end()); } @@ -408,6 +411,7 @@ where (None, None) => { // end the stream since we never recieved a tile from any subquery. Should only happen if we end the first grid iteration. // NOTE: this assumes that the input operator produces no data tiles for queries where time and space are valid but no data is avalable. + debug!(">>>> Tile stream ended without any data"); debug_assert!(&tile_option.is_none()); debug_assert!( *this.current_time_start == this.query_rect_to_answer.time_interval.start() @@ -437,7 +441,9 @@ where } } } - + if let Some(tile) = &tile_option { + debug_assert!(tile.time.end() > tile.time.start()); + } Poll::Ready(Some(Ok(tile_option))) } } diff --git a/operators/src/adapters/raster_subquery/raster_subquery_reprojection.rs b/operators/src/adapters/raster_subquery/raster_subquery_reprojection.rs index 30e3168d3..bcd4dc6a1 100644 --- a/operators/src/adapters/raster_subquery/raster_subquery_reprojection.rs +++ b/operators/src/adapters/raster_subquery/raster_subquery_reprojection.rs @@ -95,12 +95,25 @@ where self.state.out_spatial_grid.geo_transform() ); - let valid_pixel_bounds = self - .state - .out_spatial_grid - .grid_bounds() - .intersection(&tile_info.global_pixel_bounds()) - .and_then(|b| b.intersection(&query_rect.spatial_query.grid_bounds())); + // TODO: instead of producing an empty stream if the query does not intersect the data or projection, + // we need to actually perform the query in order to give the nodata tiles the correct time interval + // because all tiles in a time step need the same temporal validity, even across diferent quereis + + // so we need a way to reliably query the source in such cases that ensures no data tiles are produced. + // what to do in cases where the bbox coordinates cannot be reprojected? + // - query the data bbox but discard the data? + // - implement a special empty bbox query that makes sure to produce empty tiles but correct time intervals? + // - ideally: implement a way to query the time intervals of the source data and produce empty tiles accordingly + + let valid_pixel_bounds = dbg!( + dbg!( + self.state + .out_spatial_grid + .grid_bounds() + .intersection(&tile_info.global_pixel_bounds()) + ) + .and_then(|b| b.intersection(&query_rect.spatial_query.grid_bounds())) + ); let valid_spatial_bounds = valid_pixel_bounds.map(|pb| { self.state @@ -114,20 +127,25 @@ where let projected_bounds = bounds.reproject(&proj); match projected_bounds { - Ok(pb) => Ok(Some(RasterQueryRectangle::new_with_grid_bounds( - self.state - .in_spatial_grid - .geo_transform() - .spatial_to_grid_bounds(&pb), - TimeInterval::new_instant(start_time)?, - band_idx.into(), - ))), + Ok(pb) => { + dbg!("produce something"); + Ok(Some(RasterQueryRectangle::new_with_grid_bounds( + self.state + .in_spatial_grid + .geo_transform() + .spatial_to_grid_bounds(&pb), + TimeInterval::new_instant(start_time)?, + band_idx.into(), + ))) + } // In some strange cases the reprojection can return an empty box. // We ignore it since it contains no pixels. Err(geoengine_datatypes::error::Error::OutputBboxEmpty { bbox: _ }) => Ok(None), Err(e) => Err(e.into()), } } else { + dbg!("output query rectangle is not valid in source projection => produce empty tile"); + // output query rectangle is not valid in source projection => produce empty tile Ok(None) } @@ -352,6 +370,8 @@ impl FoldTileAccu for TileWithProjectionCoordinates { type RasterType = T; async fn into_tile(self) -> Result> { + debug_assert!(self.accu_tile.time.end() > self.accu_tile.time.start()); + debug_assert!(self.accu_tile.time.end() != self.accu_tile.time.start() + 1); Ok(self.accu_tile) } @@ -362,6 +382,7 @@ impl FoldTileAccu for TileWithProjectionCoordinates { impl FoldTileAccuMut for TileWithProjectionCoordinates { fn set_time(&mut self, time: TimeInterval) { + debug_assert!(time.end() > time.start()); self.accu_tile.time = time; } diff --git a/operators/src/adapters/sparse_tiles_fill_adapter.rs b/operators/src/adapters/sparse_tiles_fill_adapter.rs index b37eab04e..a9579a396 100644 --- a/operators/src/adapters/sparse_tiles_fill_adapter.rs +++ b/operators/src/adapters/sparse_tiles_fill_adapter.rs @@ -10,6 +10,7 @@ use geoengine_datatypes::{ use pin_project::pin_project; use snafu::Snafu; use std::{pin::Pin, task::Poll}; +use tracing::debug; #[derive(Debug, Snafu)] pub enum SparseTilesFillAdapterError { @@ -61,7 +62,11 @@ impl From for FillerTimeBounds { fn from(time: TimeInterval) -> FillerTimeBounds { FillerTimeBounds { start: time.start(), - end: time.end(), + end: if time.is_instant() { + time.end() + 1 + } else { + time.end() + }, } } } @@ -74,11 +79,14 @@ impl FillerTimeBounds { self.end } + // TODO: return result pub fn new(start: TimeInstance, end: TimeInstance) -> Self { + debug_assert!(start < end); Self::new_unchecked(start, end) } pub fn new_unchecked(start: TimeInstance, end: TimeInstance) -> Self { + debug_assert!(start < end); Self { start, end } } } @@ -107,6 +115,12 @@ struct GridIdxAndBand { impl StateContainer { /// Create a new no-data `RasterTile2D` with `GridIdx` and time from the current state fn current_no_data_tile(&self) -> RasterTile2D { + // debug_assert!( + // self.current_time.unwrap().end() != self.current_time.unwrap().start() + 1, + // "current no data tile, current_time: {:?}, current tile time: {:?}", + // self.current_time, + // self.next_tile.as_ref().map(|t| t.time) + // ); RasterTile2D::new( self.current_time .expect("time must exist when a tile is stored."), @@ -231,6 +245,12 @@ impl StateContainer { } fn next_time_interval_from_stored_tile(&self) -> Option { + debug!( + "filladapter ding current time {:?}, stored tile time {:?}", + self.current_time, + self.next_tile.as_ref().map(|t| t.time) + ); + // we wrapped around. We need to do time progress. if let Some(tile) = &self.next_tile { let stored_tile_time = tile.time; @@ -263,6 +283,7 @@ impl StateContainer { } fn set_current_time_from_initial_tile(&mut self, first_tile_time: TimeInterval) { + debug_assert!(first_tile_time.end() > first_tile_time.start()); // if we know a bound we must use it to set the current time let start_data_bound = self.data_time_bounds.start(); let requested_start = self.requested_time_bounds.start(); @@ -279,10 +300,9 @@ impl StateContainer { requested_start, start_data_bound ); - self.current_time = Some(TimeInterval::new_unchecked( - start_data_bound, - first_tile_time.start(), - )); + self.current_time = + Some(TimeInterval::new(start_data_bound, first_tile_time.start()).unwrap()); + debug_assert!(!self.current_time.unwrap().is_instant()); return; } if start_data_bound > first_tile_time.start() { @@ -297,10 +317,10 @@ impl StateContainer { fn set_current_time_from_data_time_bounds(&mut self) { assert!(self.state == State::FillToEnd); - self.current_time = Some(TimeInterval::new_unchecked( - self.data_time_bounds.start(), - self.data_time_bounds.end(), - )); + self.current_time = Some( + TimeInterval::new(self.data_time_bounds.start(), self.data_time_bounds.end()).unwrap(), + ); + debug_assert!(!self.current_time.unwrap().is_instant()); } fn update_current_time(&mut self, new_time: TimeInterval) { @@ -335,6 +355,8 @@ impl StateContainer { debug_assert!(current_time.end() < self.data_time_bounds.end()); + debug_assert!(self.requested_time_bounds.end() <= self.data_time_bounds.end()); + let new_time = if current_time.is_instant() { TimeInterval::new_unchecked(current_time.end() + 1, self.data_time_bounds.end()) } else { @@ -363,10 +385,15 @@ impl StateContainer { } fn store_tile(&mut self, tile: RasterTile2D) { + debug_assert!(tile.time.end() > tile.time.start()); + debug_assert!(self.next_tile.is_none()); let current_time = self .current_time .expect("Time must be set when the first tile arrives"); + + debug_assert!(current_time.end() > current_time.start()); + debug_assert!(current_time.start() <= tile.time.start()); debug_assert!( current_time.start() < tile.time.start() @@ -481,6 +508,11 @@ where // poll for a first (input) tile let result_tile = match ready!(this.stream.as_mut().poll_next(cx)) { Some(Ok(tile)) => { + debug!( + "Initial tile: {:?} with time interval {:?}, currentime: {:?}", + tile.tile_position, tile.time, this.sc.current_time + ); + debug_assert!(tile.time.end() > tile.time.start()); // now we have to inspect the time we got and the bound we need to fill. If there are bounds known, then we need to check if the tile starts with the bounds. this.sc.set_current_time_from_initial_tile(tile.time); @@ -494,11 +526,24 @@ where tile.band, ) { + debug!( + "AAA Initial tile: {:?} with time interval {:?}, currentime: {:?}", + tile.tile_position, tile.time, this.sc.current_time + ); this.sc.state = State::PollingForNextTile; // return the received tile and set state to polling for the next tile tile } else { - this.sc.store_tile(tile); + debug!( + "BBB Initial tile: {:?} with time interval {:?}, currentime: {:?}", + tile.tile_position, tile.time, this.sc.current_time + ); + this.sc.store_tile(tile.clone()); this.sc.state = State::FillAndProduceNextTile; // save the tile and go to fill mode + + debug!( + "CCC Initial tile: {:?} with time interval {:?}, currentime: {:?}", + tile.tile_position, tile.time, this.sc.current_time + ); this.sc.current_no_data_tile() } } @@ -508,6 +553,7 @@ where return Poll::Ready(Some(Err(e))); } // the source never produced a tile. + // TODO: this should never happen?? None => { debug_assert!(this.sc.current_idx == min_idx); this.sc.state = State::FillToEnd; @@ -515,6 +561,9 @@ where this.sc.current_no_data_tile() } }; + + debug_assert!(result_tile.time.end() > result_tile.time.start()); + // move the current_idx. There is no need to do time progress here. Either a new tile triggers that or it is never needed for an empty source. this.sc.current_idx = wrapped_next_idx; this.sc.current_band_idx = wrapped_next_band; @@ -535,6 +584,17 @@ where let res = match ready!(this.stream.as_mut().poll_next(cx)) { Some(Ok(tile)) => { + debug_assert!( + tile.time.end() > tile.time.start(), + "Tile time interval is invalid: {:?}", + tile.time + ); + + debug!( + "DDD next tile: {:?} with time interval {:?}, currentime: {:?}", + tile.tile_position, tile.time, this.sc.current_time + ); + // 1. The start of the recieved TimeInterval MUST NOT BE before the start of the current TimeInterval. if this.sc.time_starts_before_current_state(tile.time) { this.sc.state = State::Ended; @@ -548,9 +608,10 @@ where } if tile.time.start() >= this.sc.requested_time_bounds.end() { log::warn!( - "The tile time start ({}) is outside of the requested time bounds ({})!", + "The tile time start ({}) is outside of the requested time bounds ({})! end is {}", tile.time.start(), - this.sc.requested_time_bounds.end() + this.sc.requested_time_bounds.end(), + tile.time.end() ); } @@ -575,6 +636,7 @@ where if this.sc.time_starts_equals_current_state(tile.time) && !this.sc.time_duration_equals_current_state(tile.time) { + debug!("missmatch tile is empty: {}", tile.is_empty()); this.sc.state = State::Ended; return Poll::Ready(Some(Err( SparseTilesFillAdapterError::TileTimeIntervalLengthMissmatch { diff --git a/operators/src/error.rs b/operators/src/error.rs index f8990bf10..d99494e3f 100644 --- a/operators/src/error.rs +++ b/operators/src/error.rs @@ -360,7 +360,7 @@ pub enum Error { InterpolationOperator { source: crate::processing::InterpolationError, }, - #[snafu(context(false))] + #[snafu(display("Downsampling error: {source}"), context(false))] DownsampleOperator { source: crate::processing::DownsamplingError, }, diff --git a/operators/src/processing/downsample/mod.rs b/operators/src/processing/downsample/mod.rs index 51c5c408e..7135d58f2 100644 --- a/operators/src/processing/downsample/mod.rs +++ b/operators/src/processing/downsample/mod.rs @@ -402,6 +402,7 @@ impl FoldTileAccu for DownsampleAccu { type RasterType = T; async fn into_tile(self) -> Result> { + debug_assert!(self.time.unwrap().end() > self.time.unwrap().start()); // TODO: later do conversation of accu into tile here let output_tile = RasterTile2D::new_with_tile_info( @@ -471,7 +472,9 @@ pub fn fold_impl(mut accu: DownsampleAccu, tile: RasterTile2D) -> Downs where T: Pixel, { + debug_assert!(tile.time.end() > tile.time.start()); // get the time now because it is not known when the accu was created + accu.set_time(tile.time); accu.cache_hint.merge_with(&tile.cache_hint); diff --git a/operators/src/processing/reprojection.rs b/operators/src/processing/reprojection.rs index c492d4ea8..144e98efa 100644 --- a/operators/src/processing/reprojection.rs +++ b/operators/src/processing/reprojection.rs @@ -1480,6 +1480,7 @@ mod tests { for r in result { assert!(r.is_empty()); + assert_eq!(r.time, time_interval); } } diff --git a/operators/src/source/gdal_source/mod.rs b/operators/src/source/gdal_source/mod.rs index 0b75bfcd2..05437f71a 100644 --- a/operators/src/source/gdal_source/mod.rs +++ b/operators/src/source/gdal_source/mod.rs @@ -382,6 +382,7 @@ impl GdalRasterLoader { tile_time: TimeInterval, cache_hint: CacheHint, ) -> Result> { + debug_assert!(tile_time.end() > tile_time.start()); let tile_spatial_grid = tile_information.spatial_grid_definition(); match dataset_params { @@ -546,7 +547,7 @@ impl GdalRasterLoader { info.params.clone(), reader_mode, tile, - info.time, + dbg!(info.time), info.cache_ttl.into(), ) }), @@ -564,6 +565,7 @@ impl GdalRasterLoader { ) -> impl Stream>> + use { loading_info_stream .map_ok(move |info| { + debug!("Loading info: {:?}", info); GdalRasterLoader::temporal_slice_tile_future_stream( spatial_query, info, @@ -614,6 +616,7 @@ where query: RasterQueryRectangle, _ctx: &'a dyn crate::engine::QueryContext, ) -> Result>> { + log::debug!("GdalSource query: {:?}", query); ensure!( query.attributes.as_slice() == [0], crate::error::GdalSourceDoesNotSupportQueryingOtherBandsThanTheFirstOneYet @@ -657,7 +660,9 @@ where let mut empty = false; if !tiling_based_pixel_bounds.intersects(&query_pixel_bounds) { - debug!("query does not intersect spatial data bounds"); + debug!( + "query {query_pixel_bounds:?} does not intersect spatial data bounds {tiling_based_pixel_bounds:?}" + ); empty = true; } diff --git a/services/examples/force-import.rs b/services/examples/force-import.rs new file mode 100644 index 000000000..95d100c40 --- /dev/null +++ b/services/examples/force-import.rs @@ -0,0 +1,418 @@ +use chrono::{NaiveDate, TimeZone}; +use gdal::vector::{Defn, Feature, FieldDefn, LayerOptions, OGRFieldType}; +use gdal::{Dataset as GdalDataset, DriverManager, Metadata}; +use geoengine_datatypes::primitives::{CacheTtlSeconds, DateTime, TimeInstance, TimeInterval}; +use geoengine_datatypes::raster::GdalGeoTransform; +use geoengine_operators::source::{ + FileNotFoundHandling, GdalDatasetParameters, GdalLoadingInfoTemporalSlice, GdalMetaDataList, +}; +use geoengine_operators::util::gdal::raster_descriptor_from_dataset; +use geoengine_services::datasets::storage::{DatasetDefinition, MetaDataDefinition}; +use geoengine_services::datasets::{AddDataset, DatasetName}; +use std::collections::HashMap; +use std::fs; +use std::path::Path; +use std::path::PathBuf; +use std::str::FromStr; + +/// Creates a tile index for the given datasets and writes it to a GeoJSON file. +/// uses the SRS from the first dataset +fn gdaltindex(datasets: &[&Path], gti_file: &Path, tile_index_file: &Path) { + let spatial_ref = { + let first_dataset = GdalDataset::open(datasets.get(0).expect("datasets must not be empty")) + .expect("Failed to open dataset"); + first_dataset + .spatial_ref() + .expect("Failed to get spatial reference") + }; + + let driver = + DriverManager::get_driver_by_name("GeoJSON").expect("Failed to get GeoJSON driver"); + let mut vector_ds = driver + .create_vector_only(tile_index_file) + .expect("Failed to create vector dataset"); + + let layer = vector_ds + .create_layer(LayerOptions { + name: "tile_index", + srs: Some(&spatial_ref), + ty: gdal::vector::OGRwkbGeometryType::wkbPolygon, + options: None, + }) + .expect("Failed to create layer"); + + let field_defn = + FieldDefn::new("location", OGRFieldType::OFTString).expect("Failed to create field defn"); + // field_defn.set_width(80); + field_defn + .add_to_layer(&layer) + .expect("Failed to add field to layer"); + + let defn = Defn::from_layer(&layer); + + let location_idx = defn + .field_index("location") + .expect("Failed to get field index"); + + for dataset_path in datasets { + let dataset = GdalDataset::open(dataset_path).expect("Failed to open dataset"); + let geo_transform = dataset + .geo_transform() + .expect("Failed to get geo-transform"); + let raster_size = dataset.raster_size(); + + // TODO: get bbox from gdal? + let min_x = geo_transform[0]; + let max_y = geo_transform[3]; + let max_x = min_x + geo_transform[1] * raster_size.0 as f64; + let min_y = max_y + geo_transform[5] * raster_size.1 as f64; + + let mut ring = + gdal::vector::Geometry::empty(gdal::vector::OGRwkbGeometryType::wkbLinearRing) + .expect("Failed to create ring"); + + // TODO: reproject bbox to output srs(?) + ring.add_point_2d((min_x, max_y)); + ring.add_point_2d((max_x, max_y)); + ring.add_point_2d((max_x, min_y)); + ring.add_point_2d((min_x, min_y)); + ring.add_point_2d((min_x, max_y)); + + let mut polygon = + gdal::vector::Geometry::empty(gdal::vector::OGRwkbGeometryType::wkbPolygon) + .expect("Failed to create polygon"); + polygon + .add_geometry(ring) + .expect("Failed to add ring to polygon"); + + let mut feature = Feature::new(&defn).expect("Failed to create feature"); + feature + .set_geometry(polygon) + .expect("Failed to set geometry"); + feature + .set_field_string( + location_idx, + dataset_path + .to_str() + .expect("Failed to convert path to string"), + ) + .expect("Failed to set field"); + + feature.create(&layer).expect("Failed to create feature"); + } + + let gti_xml = format!( + r" + {index_dataset} + tile_index + location +", + index_dataset = tile_index_file + .to_str() + .expect("Failed to convert path to string"), + ); + + std::fs::write(gti_file, gti_xml).expect("Failed to write GTI XML to file"); +} + +#[allow(dead_code)] +fn test() { + let datasets: [&Path; 2] = [ + Path::new( + "/mnt/data_raid/geo_data/force/gti/data/X0059_Y0049/20000124_LEVEL2_LND07_BOA.tif", + ), + Path::new( + "/mnt/data_raid/geo_data/force/gti/data/X0059_Y0049/20000124_LEVEL2_LND07_BOA.tif", + ), + ]; + + let gti_file = Path::new("./foo.gti"); + let tile_index_file = Path::new("./foo.tile_index.geojson"); + + gdaltindex(&datasets, gti_file, tile_index_file); + + let gti_dataset = GdalDataset::open(gti_file).expect("Failed to open GTI dataset"); + + let raster_band = gti_dataset + .rasterband(1) + .expect("Failed to get raster band"); + let shape = raster_band.size(); + + let mut data = raster_band + .read_as::((0, 0), shape, shape, None) + .unwrap(); + + let driver = DriverManager::get_driver_by_name("GTiff").unwrap(); + let mut new_ds = driver + .create_with_band_type::( + "output.tif", + shape.0, + shape.1, + 1, // Number of bands + ) + .unwrap(); + new_ds + .set_spatial_ref(>i_dataset.spatial_ref().unwrap()) + .unwrap(); + + // Write the data to the new dataset + let mut new_band = new_ds.rasterband(1).unwrap(); + new_band.write((0, 0), shape, &mut data).unwrap(); +} + +fn naive_date_to_time_instance(date: NaiveDate) -> TimeInstance { + let time: chrono::DateTime = chrono::Utc.from_utc_datetime( + &date + .and_hms_opt(0, 0, 0) + .expect("Failed to create datetime"), + ); + let time: DateTime = time.into(); + time.into() +} + +fn main() { + const FORCE_DATA_DIR: &str = "/home/michael/geodata/force/marburg"; + const GTI_OUTPUT_DIR: &str = "/home/michael/geodata/force/marburg/gti"; + const DATASET_OUTPUT_DIR: &str = "/home/michael/geodata/force/marburg/geoengine"; + + let mut tile_dirs = Vec::new(); + + let entries = fs::read_dir(&FORCE_DATA_DIR).expect("Failed to read FORCE_DATA_DIR"); + + for entry in entries { + let entry = entry.expect("Failed to read directory entry"); + if entry.file_type().expect("Failed to get file type").is_dir() { + let folder_name = entry.file_name(); + // TODO: parse pattern X0059_Y0049 + if folder_name.to_string_lossy().starts_with('X') { + tile_dirs.push(entry.path()); + } + } + } + + println!("Found tiles: {:?}", tile_dirs); + + let mut tif_files = Vec::new(); + + for tile_dir in tile_dirs { + let entries = fs::read_dir(&tile_dir).expect("Failed to read tile directory"); + for entry in entries { + let entry = entry.expect("Failed to read directory entry"); + if entry + .file_type() + .expect("Failed to get file type") + .is_file() + { + if let Some(extension) = entry.path().extension() { + if extension == "tif" { + tif_files.push(entry.path()); + } + } + } + } + } + + println!("Found {:?} tif files", tif_files.len()); + + let mut product_dataset_timesteps: HashMap<(String, String), HashMap>> = + HashMap::new(); + + for tif in &tif_files { + if let Some(filename) = tif.file_name().and_then(|f| f.to_str()) { + let parts: Vec<&str> = filename.split('_').collect(); + if parts.len() >= 4 { + if let Ok(date_obj) = NaiveDate::parse_from_str(parts[0], "%Y%m%d") { + let product = parts[2].to_string(); + let band = parts[3].split('.').next().unwrap_or("").to_string(); + let key = (product, band); + + product_dataset_timesteps + .entry(key) + .or_insert_with(HashMap::new) + .entry(date_obj) + .or_insert_with(Vec::new) + .push(tif.clone()); + } + } + } + } + + println!("Found {} products:", product_dataset_timesteps.len()); + for ((product, dataset), timesteps) in &product_dataset_timesteps { + println!( + " ({}, {}), #timesteps: {}", + product, + dataset, + timesteps.len() + ); + } + + let mut product_dataset_bands: HashMap<(String, String), Vec> = HashMap::new(); + + for ((product, dataset), timesteps) in &product_dataset_timesteps { + if let Some(first_file) = timesteps.values().flat_map(|v| v).next() { + let gdal_dataset = GdalDataset::open(first_file).expect("Failed to open dataset"); + let band_count = gdal_dataset.raster_count(); + + print!("Available bands for {product} {dataset}: ",); + + let mut bands = Vec::new(); + + for band_index in 1..=band_count { + let band = gdal_dataset + .rasterband(band_index) + .expect("Failed to get raster band"); + + let band_desc = band.description().unwrap_or("No description".to_string()); + + bands.push(band_desc.clone()); + + print!("{band_desc}"); + + if band_index < band_count { + print!(", "); + } + } + + product_dataset_bands.insert((product.clone(), dataset.clone()), bands); + println!(); + } + } + + let product_datasets = product_dataset_timesteps.keys().collect::>(); + + let mut product_dataset_gtis: HashMap<(String, String), Vec<(NaiveDate, String)>> = + HashMap::new(); + + for (product, dataset) in product_datasets { + let mut timestep_gtis = Vec::new(); + + for (timestep, tiles) in product_dataset_timesteps + .get(&(product.clone(), dataset.clone())) + .expect("Failed to get timesteps") + { + let gti_file = format!( + "{}/{}_{}_{}.gti", + GTI_OUTPUT_DIR, product, dataset, timestep + ); + let tile_index_file = format!( + "{}/{}_{}_{}.tile_index.geojson", + GTI_OUTPUT_DIR, product, dataset, timestep + ); + + let tile_refs: Vec<&Path> = tiles.iter().map(|tile| tile.as_path()).collect(); + gdaltindex(&tile_refs, gti_file.as_ref(), tile_index_file.as_ref()); + + timestep_gtis.push((timestep.clone(), gti_file)); + } + + // Sort the vector by NaiveDate + timestep_gtis.sort_by_key(|(timestep, _)| *timestep); + + println!( + "Created {} GTIs for {product} {dataset}", + timestep_gtis.len() + ); + + product_dataset_gtis.insert((product.clone(), dataset.clone()), timestep_gtis); + } + + for ((product, dataset), timesteps) in &product_dataset_gtis { + for (band_idx, band_name) in product_dataset_bands + .get(&(product.clone(), dataset.clone())) + .expect("Failed to get bands") + .iter() + .enumerate() + .map(|(i, band)| (i + 1, band)) + { + let gdal_dataset = GdalDataset::open(Path::new( + ×teps + .iter() + .next() + .expect("Failed to get first timestep") + .1, + )) + .expect("Failed to open dataset"); + + let geo_transform: GdalGeoTransform = gdal_dataset + .geo_transform() + .expect("Failed to get geo-transform") + .into(); + let geo_transform: geoengine_operators::source::GdalDatasetGeoTransform = + geo_transform.into(); + + let result_descriptor = raster_descriptor_from_dataset(&gdal_dataset, band_idx) + .expect("Could not get raster descriptor"); + + let mut slices = Vec::new(); + + for (i, (timestep, file)) in timesteps.iter().enumerate() { + let start_time = naive_date_to_time_instance(*timestep); + + let end_time: TimeInstance = if let Some((next_timestep, _)) = timesteps.get(i + 1) + { + naive_date_to_time_instance(*next_timestep) + } else { + TimeInstance::MAX + }; + + let time_interval: TimeInterval = TimeInterval::new(start_time, end_time) + .expect("Failed to create time interval"); + + let rasterband = gdal_dataset + .rasterband(band_idx) + .expect("Failed to get raster band"); + + slices.push(GdalLoadingInfoTemporalSlice { + time: time_interval, + params: Some(GdalDatasetParameters { + file_path: file.into(), + rasterband_channel: band_idx as usize, + geo_transform: geo_transform.try_into().unwrap(), + width: rasterband.size().0, + height: rasterband.size().1, + file_not_found_handling: FileNotFoundHandling::Error, + no_data_value: None, // TODO + properties_mapping: None, + gdal_open_options: None, + gdal_config_options: None, + allow_alphaband_as_mask: false, + retry: None, + }), + cache_ttl: CacheTtlSeconds::new(0), + }); + } + + let dataset_def = DatasetDefinition { + properties: AddDataset { + name: Some( + DatasetName::from_str(&format!("{product}_{dataset}_{band_idx}")) + .expect("Failed to create dataset name"), + ), + display_name: format!("{product} {dataset} {band_name}"), + description: format!("{} {} {}", product, dataset, band_name), + source_operator: "GdalSource".to_string(), + symbology: None, + provenance: None, + tags: None, + }, + meta_data: MetaDataDefinition::GdalMetaDataList(GdalMetaDataList { + result_descriptor: result_descriptor.into(), + params: slices, + }), + }; + + let output_file = format!( + "{}/{}_{}_{}_{}.json", + DATASET_OUTPUT_DIR, product, dataset, band_name, band_idx + ); + + let json = serde_json::to_string_pretty(&dataset_def) + .expect("Failed to serialize dataset definition to JSON"); + + std::fs::write(&output_file, json).expect("Failed to write dataset definition to file"); + + println!("Saved dataset definition to {}", output_file); + } + } +} diff --git a/test_data/api_calls/force/downsampled.http b/test_data/api_calls/force/downsampled.http new file mode 100644 index 000000000..99c41090d --- /dev/null +++ b/test_data/api_calls/force/downsampled.http @@ -0,0 +1,44 @@ + +### + +# @name anonymousSession +POST http://localhost:3030/api/anonymous +Content-Type: application/json + +### + +# @name workflow +POST http://localhost:3030/api/workflow +Authorization: Bearer {{anonymousSession.response.body.$.id}} +Content-Type: application/json + +{ + "type": "Raster", + "operator": { + "type": "GdalSource", + "params": { + "data": "LND05_BOA_1" + } + } +} + +### + +@workflowId = {{workflow.response.body.$.id}} +@crs=EPSG:3035 +@width=500 +@height=1000 +@time = 2017-07-04T00%3A00%3A00.000Z +@minx = 4226026.3630416505 +@miny = 3044919.6079648044 + +# pixel size: 30 +@maxx = 4256026.3630416505 +@maxy = 3104919.6079648044 + +@colorizer_min = -1000 +@colorizer_max = 10000 +@styles = custom%3A%7B%22type%22%3A%22singleBand%22%2C%22band%22%3A0%2C%22bandColorizer%22%3A%7B%22type%22%3A%22linearGradient%22%2C%22breakpoints%22%3A%5B%7B%22value%22%3A{{colorizer_min}}%2C%22color%22%3A%5B0%2C0%2C0%2C255%5D%7D%2C%7B%22value%22%3A{{colorizer_max}}%2C%22color%22%3A%5B255%2C255%2C255%2C255%5D%7D%5D%2C%22noDataColor%22%3A%5B0%2C0%2C0%2C0%5D%2C%22overColor%22%3A%5B246%2C250%2C254%2C255%5D%2C%22underColor%22%3A%5B247%2C251%2C255%2C255%5D%7D%7D + +GET http://localhost:3030/api/wms/{{workflowId}}?REQUEST=GetMap&SERVICE=WMS&VERSION=1.3.0&FORMAT=image%2Fpng&STYLES={{styles}}&TRANSPARENT=true&layers={{workflowId}}&time={{time}}&EXCEPTIONS=application%2Fjson&WIDTH={{width}}&HEIGHT={{height}}&CRS=EPSG:3035&BBOX={{miny}}%2C{{minx}}%2C{{maxy}}%2C{{maxx}} +Authorization: Bearer {{anonymousSession.response.body.$.id}} \ No newline at end of file diff --git a/test_data/api_calls/force/filladaptererror.http b/test_data/api_calls/force/filladaptererror.http new file mode 100644 index 000000000..98a993f42 --- /dev/null +++ b/test_data/api_calls/force/filladaptererror.http @@ -0,0 +1,28 @@ + +### + +# @name anonymousSession +POST http://localhost:3030/api/anonymous +Content-Type: application/json + +### + +# @name workflow +POST http://localhost:3030/api/workflow +Authorization: Bearer {{anonymousSession.response.body.$.id}} +Content-Type: application/json + +{ + "type": "Raster", + "operator": { + "type": "GdalSource", + "params": { + "data": "LND05_BOA_1" + } + } +} + +### + +GET http://localhost:3030/api/wms/c965662b-a5bf-57ce-aae2-e7a6036d8a99?REQUEST=GetMap&SERVICE=WMS&VERSION=1.3.0&FORMAT=image%2Fpng&STYLES=custom%3A%7B%22type%22%3A%22singleBand%22%2C%22band%22%3A0%2C%22bandColorizer%22%3A%7B%22type%22%3A%22linearGradient%22%2C%22breakpoints%22%3A%5B%7B%22value%22%3A1%2C%22color%22%3A%5B0%2C0%2C0%2C255%5D%7D%2C%7B%22value%22%3A255%2C%22color%22%3A%5B255%2C255%2C255%2C255%5D%7D%5D%2C%22noDataColor%22%3A%5B0%2C0%2C0%2C0%5D%2C%22overColor%22%3A%5B255%2C255%2C255%2C127%5D%2C%22underColor%22%3A%5B0%2C0%2C0%2C127%5D%7D%7D&TRANSPARENT=true&layers=c965662b-a5bf-57ce-aae2-e7a6036d8a99&time=2011-09-20T00%3A00%3A00.000Z&EXCEPTIONS=application%2Fjson&WIDTH=256&HEIGHT=256&CRS=EPSG%3A4326&BBOX=47.8125%2C9.84375%2C49.21875%2C11.25 +Authorization: Bearer {{anonymousSession.response.body.$.id}} \ No newline at end of file diff --git a/test_data/api_calls/force/force.http b/test_data/api_calls/force/force.http new file mode 100644 index 000000000..bc45ba1d3 --- /dev/null +++ b/test_data/api_calls/force/force.http @@ -0,0 +1,45 @@ + +### + +# @name anonymousSession +POST http://localhost:3030/api/anonymous +Content-Type: application/json + +### + +# @name workflow +POST http://localhost:3030/api/workflow +Authorization: Bearer {{anonymousSession.response.body.$.id}} +Content-Type: application/json + +{ + "type": "Raster", + "operator": { + "type": "GdalSource", + "params": { + "data": "LND05_BOA_1" + } + } +} + +### + +@workflowId = {{workflow.response.body.$.id}} +@crs=EPSG:3035 +@width=1000 +@height=2000 +@time = 2017-07-04T00%3A00%3A00.000Z +@minx = 4226026.3630416505 +@miny = 3044919.6079648044 + +# pixel size: 30 +@maxx = 4256026.3630416505 +@maxy = 3104919.6079648044 + +@colorizer_min = -1000 +@colorizer_max = 10000 +@styles = custom%3A%7B%22type%22%3A%22singleBand%22%2C%22band%22%3A0%2C%22bandColorizer%22%3A%7B%22type%22%3A%22linearGradient%22%2C%22breakpoints%22%3A%5B%7B%22value%22%3A{{colorizer_min}}%2C%22color%22%3A%5B0%2C0%2C0%2C255%5D%7D%2C%7B%22value%22%3A{{colorizer_max}}%2C%22color%22%3A%5B255%2C255%2C255%2C255%5D%7D%5D%2C%22noDataColor%22%3A%5B0%2C0%2C0%2C0%5D%2C%22overColor%22%3A%5B246%2C250%2C254%2C255%5D%2C%22underColor%22%3A%5B247%2C251%2C255%2C255%5D%7D%7D + +GET http://localhost:3030/api/wms/{{workflowId}}?REQUEST=GetMap&SERVICE=WMS&VERSION=1.3.0&FORMAT=image%2Fpng&STYLES={{styles}}&TRANSPARENT=true&layers={{workflowId}}&time={{time}}&EXCEPTIONS=application%2Fjson&WIDTH={{width}}&HEIGHT={{height}}&CRS={{crs}}&BBOX={{miny}}%2C{{minx}}%2C{{maxy}}%2C{{maxx}} +Authorization: Bearer {{anonymousSession.response.body.$.id}} + diff --git a/test_data/api_calls/force/reprojected.http b/test_data/api_calls/force/reprojected.http new file mode 100644 index 000000000..ad195c53f --- /dev/null +++ b/test_data/api_calls/force/reprojected.http @@ -0,0 +1,43 @@ + +### + +# @name anonymousSession +POST http://localhost:3030/api/anonymous +Content-Type: application/json + +### + +# @name workflow +POST http://localhost:3030/api/workflow +Authorization: Bearer {{anonymousSession.response.body.$.id}} +Content-Type: application/json + +{ + "type": "Raster", + "operator": { + "type": "GdalSource", + "params": { + "data": "LND05_BOA_1" + } + } +} + +### + +@workflowId = {{workflow.response.body.$.id}} +@crs=EPSG:4326 +@minx = 8.66114350470939 +@miny = 50.5083520752026 + +@maxx = 9.07337279440548 +@maxy = 51.0518257240638 +@width=1000 +@height=1000 +@time = 2017-07-04T00%3A00%3A00.000Z + +@colorizer_min = -1000 +@colorizer_max = 10000 +@styles = custom%3A%7B%22type%22%3A%22singleBand%22%2C%22band%22%3A0%2C%22bandColorizer%22%3A%7B%22type%22%3A%22linearGradient%22%2C%22breakpoints%22%3A%5B%7B%22value%22%3A{{colorizer_min}}%2C%22color%22%3A%5B0%2C0%2C0%2C255%5D%7D%2C%7B%22value%22%3A{{colorizer_max}}%2C%22color%22%3A%5B255%2C255%2C255%2C255%5D%7D%5D%2C%22noDataColor%22%3A%5B0%2C0%2C0%2C0%5D%2C%22overColor%22%3A%5B246%2C250%2C254%2C255%5D%2C%22underColor%22%3A%5B247%2C251%2C255%2C255%5D%7D%7D + +GET http://localhost:3030/api/wms/{{workflowId}}?REQUEST=GetMap&SERVICE=WMS&VERSION=1.3.0&FORMAT=image%2Fpng&STYLES={{styles}}&TRANSPARENT=true&layers={{workflowId}}&time={{time}}&EXCEPTIONS=application%2Fjson&WIDTH={{width}}&HEIGHT={{height}}&CRS={{crs}}&BBOX={{miny}}%2C{{minx}}%2C{{maxy}}%2C{{maxx}} +Authorization: Bearer {{anonymousSession.response.body.$.id}} diff --git a/test_data/raster/sentinel2/S2B_32UMB_20220129_0_L2A__B02.tif_downsampled.tif b/test_data/raster/sentinel2/S2B_32UMB_20220129_0_L2A__B02.tif_downsampled.tif new file mode 100644 index 000000000..57999440d Binary files /dev/null and b/test_data/raster/sentinel2/S2B_32UMB_20220129_0_L2A__B02.tif_downsampled.tif differ