From 44dc31c7e30a9074f4661fddeaa709faa0f849ef Mon Sep 17 00:00:00 2001 From: rly Date: Sat, 9 Mar 2024 17:14:14 -0800 Subject: [PATCH 01/11] Add array class example --- .../schema_definition-native-array-2.yaml | 128 ++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 tests/input/examples/schema_definition-native-array-2.yaml diff --git a/tests/input/examples/schema_definition-native-array-2.yaml b/tests/input/examples/schema_definition-native-array-2.yaml new file mode 100644 index 00000000..eb7f65de --- /dev/null +++ b/tests/input/examples/schema_definition-native-array-2.yaml @@ -0,0 +1,128 @@ +id: https://example.org/arrays +name: arrays-temperature-example-2 +title: Array Temperature Example Using NDArray Classes +description: |- + Example LinkML schema to demonstrate a 3D DataArray of temperature values with labeled axes + using LinkML NDArray classes +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + annotations: + array_data_mapping: + data: temperatures_in_K + dims: [x, "y", t] + coords: + latitude_in_deg: x + longitude_in_deg: "y" + time_in_d: t + attributes: + name: + identifier: true + range: string + latitude_in_deg: + implements: + - linkml:axis # is this necessary/useful given the above array_data_mapping? + range: LatitudeSeries + # NOTE: LatitudeSeries could have multiple array attributes. use the one that implements linkml:elements + required: true + annotations: + axis_index: 0 # is this necessary/useful given the above array_data_mapping? + longitude_in_deg: + implements: + - linkml:axis + range: LongitudeSeries + required: true + annotations: + axis_index: 1 + time_in_d: + implements: + - linkml:axis + range: DaySeries + required: true + annotations: + axis_index: 2 + temperatures_in_K: + implements: + - linkml:DataArray_data # changed from linkml:array + range: TemperatureMatrix + required: true + + TemperatureMatrix: + description: A 3D array of temperatures + implements: + - linkml:NDArray + - linkml:RowOrderedArray + attributes: + values: + range: float + multivalued: true + implements: + - linkml:elements + required: true + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + + LatitudeSeries: + description: A series whose values represent latitude + implements: + - linkml:NDArray + attributes: + values: + range: float + multivalued: true + implements: + - linkml:elements + required: true + unit: + ucum_code: deg + array: + exact_number_dimensions: 1 + + LongitudeSeries: + description: A series whose values represent longitude + implements: + - linkml:NDArray + attributes: + values: + range: float + multivalued: true + implements: + - linkml:elements + required: true + unit: + ucum_code: deg + array: + exact_number_dimensions: 1 + + DaySeries: + description: A series whose values represent the days since the start of the measurement period + implements: + - linkml:NDArray + attributes: + values: + range: float + multivalued: true + implements: + - linkml:elements + required: true + unit: + ucum_code: d + array: + exact_number_dimensions: 1 From 7eb956159890c81b92c9952592295c94d731ae21 Mon Sep 17 00:00:00 2001 From: rly Date: Sat, 9 Mar 2024 17:14:47 -0800 Subject: [PATCH 02/11] Replace y with "y" in example because y = True in YAML 1.1 --- tests/input/examples/schema_definition-native-array-1.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/input/examples/schema_definition-native-array-1.yaml b/tests/input/examples/schema_definition-native-array-1.yaml index 7b084924..412104f9 100644 --- a/tests/input/examples/schema_definition-native-array-1.yaml +++ b/tests/input/examples/schema_definition-native-array-1.yaml @@ -22,10 +22,10 @@ classes: annotations: array_data_mapping: data: temperatures_in_K - dims: [x, y, t] + dims: [x, "y", t] coords: latitude_in_deg: x - longitude_in_deg: y + longitude_in_deg: "y" time_in_d: t attributes: name: From 26dc2d3f80e0d4dd3ab35d33ee96ab0a342ae5fe Mon Sep 17 00:00:00 2001 From: rly Date: Sat, 9 Mar 2024 17:20:53 -0800 Subject: [PATCH 03/11] Remove comments --- tests/input/examples/schema_definition-native-array-2.yaml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/input/examples/schema_definition-native-array-2.yaml b/tests/input/examples/schema_definition-native-array-2.yaml index eb7f65de..3779b306 100644 --- a/tests/input/examples/schema_definition-native-array-2.yaml +++ b/tests/input/examples/schema_definition-native-array-2.yaml @@ -36,12 +36,11 @@ classes: range: string latitude_in_deg: implements: - - linkml:axis # is this necessary/useful given the above array_data_mapping? + - linkml:axis range: LatitudeSeries - # NOTE: LatitudeSeries could have multiple array attributes. use the one that implements linkml:elements required: true annotations: - axis_index: 0 # is this necessary/useful given the above array_data_mapping? + axis_index: 0 longitude_in_deg: implements: - linkml:axis @@ -58,7 +57,7 @@ classes: axis_index: 2 temperatures_in_K: implements: - - linkml:DataArray_data # changed from linkml:array + - linkml:array range: TemperatureMatrix required: true From 9f8fad69340d893d9061c11ca2706ab63d3eb30f Mon Sep 17 00:00:00 2001 From: rly Date: Fri, 15 Mar 2024 17:28:45 -0700 Subject: [PATCH 04/11] Update examples --- .../schema_definition-native-array-1.yaml | 14 ++++++--- .../schema_definition-native-array-2.yaml | 31 +++---------------- 2 files changed, 13 insertions(+), 32 deletions(-) diff --git a/tests/input/examples/schema_definition-native-array-1.yaml b/tests/input/examples/schema_definition-native-array-1.yaml index 412104f9..08af4054 100644 --- a/tests/input/examples/schema_definition-native-array-1.yaml +++ b/tests/input/examples/schema_definition-native-array-1.yaml @@ -22,11 +22,11 @@ classes: annotations: array_data_mapping: data: temperatures_in_K - dims: [x, "y", t] + dims: ["x", "y", "t"] # YAML 1.1 treats unquoted y as True coords: - latitude_in_deg: x + latitude_in_deg: "x" longitude_in_deg: "y" - time_in_d: t + time_in_d: "t" attributes: name: identifier: true @@ -38,7 +38,7 @@ classes: unit: ucum_code: deg array: - exact_number_dimensions: 1 + exact_number_dimensions: 2 longitude_in_deg: required: true range: float @@ -46,7 +46,7 @@ classes: unit: ucum_code: deg array: - exact_number_dimensions: 1 + exact_number_dimensions: 2 time_in_d: range: float multivalued: true @@ -65,4 +65,8 @@ classes: ucum_code: K array: exact_number_dimensions: 3 + dimensions: + - range: LatitudeSeries # or range: float, array: ... + - range: LongitudeSeries + - range: DaySeries diff --git a/tests/input/examples/schema_definition-native-array-2.yaml b/tests/input/examples/schema_definition-native-array-2.yaml index 3779b306..b9f173ca 100644 --- a/tests/input/examples/schema_definition-native-array-2.yaml +++ b/tests/input/examples/schema_definition-native-array-2.yaml @@ -25,53 +25,36 @@ classes: annotations: array_data_mapping: data: temperatures_in_K - dims: [x, "y", t] + dims: ["x", "y", "t"] # YAML 1.1 treats unquoted y as True coords: - latitude_in_deg: x + latitude_in_deg: "x" longitude_in_deg: "y" - time_in_d: t + time_in_d: "t" attributes: name: identifier: true range: string latitude_in_deg: - implements: - - linkml:axis range: LatitudeSeries required: true - annotations: - axis_index: 0 longitude_in_deg: - implements: - - linkml:axis range: LongitudeSeries required: true - annotations: - axis_index: 1 time_in_d: - implements: - - linkml:axis range: DaySeries required: true - annotations: - axis_index: 2 temperatures_in_K: - implements: - - linkml:array range: TemperatureMatrix required: true TemperatureMatrix: description: A 3D array of temperatures - implements: - - linkml:NDArray - - linkml:RowOrderedArray attributes: values: range: float multivalued: true implements: - - linkml:elements + - linkml:elements # signals to a containing DataArray that this has the data required: true unit: ucum_code: K @@ -80,8 +63,6 @@ classes: LatitudeSeries: description: A series whose values represent latitude - implements: - - linkml:NDArray attributes: values: range: float @@ -96,8 +77,6 @@ classes: LongitudeSeries: description: A series whose values represent longitude - implements: - - linkml:NDArray attributes: values: range: float @@ -112,8 +91,6 @@ classes: DaySeries: description: A series whose values represent the days since the start of the measurement period - implements: - - linkml:NDArray attributes: values: range: float From f7f565b06a7f7d483d3985a2298cc632fe011b0c Mon Sep 17 00:00:00 2001 From: rly Date: Mon, 18 Mar 2024 07:24:12 -0700 Subject: [PATCH 05/11] Update examples --- .../examples/schema_definition-native-array-1.yaml | 10 +++------- .../examples/schema_definition-native-array-2.yaml | 2 +- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/tests/input/examples/schema_definition-native-array-1.yaml b/tests/input/examples/schema_definition-native-array-1.yaml index 08af4054..05a0655e 100644 --- a/tests/input/examples/schema_definition-native-array-1.yaml +++ b/tests/input/examples/schema_definition-native-array-1.yaml @@ -3,6 +3,7 @@ name: arrays-temperature-example title: Array Temperature Example description: |- Example LinkML schema to demonstrate a 3D DataArray of temperature values with labeled axes + using array slots for the axes and data instead of classes containing arrays license: MIT prefixes: @@ -38,7 +39,7 @@ classes: unit: ucum_code: deg array: - exact_number_dimensions: 2 + exact_number_dimensions: 1 longitude_in_deg: required: true range: float @@ -46,7 +47,7 @@ classes: unit: ucum_code: deg array: - exact_number_dimensions: 2 + exact_number_dimensions: 1 time_in_d: range: float multivalued: true @@ -65,8 +66,3 @@ classes: ucum_code: K array: exact_number_dimensions: 3 - dimensions: - - range: LatitudeSeries # or range: float, array: ... - - range: LongitudeSeries - - range: DaySeries - diff --git a/tests/input/examples/schema_definition-native-array-2.yaml b/tests/input/examples/schema_definition-native-array-2.yaml index b9f173ca..fc4e50aa 100644 --- a/tests/input/examples/schema_definition-native-array-2.yaml +++ b/tests/input/examples/schema_definition-native-array-2.yaml @@ -3,7 +3,7 @@ name: arrays-temperature-example-2 title: Array Temperature Example Using NDArray Classes description: |- Example LinkML schema to demonstrate a 3D DataArray of temperature values with labeled axes - using LinkML NDArray classes + using classes containing arrays for the axes and data instead of using array slots license: MIT prefixes: From 2d7bb683d38a58797e139f7283fc879eb4e5d804 Mon Sep 17 00:00:00 2001 From: rly Date: Mon, 18 Mar 2024 07:43:45 -0700 Subject: [PATCH 06/11] Add example --- .../schema_definition-native-array-3.yaml | 112 ++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 tests/input/examples/schema_definition-native-array-3.yaml diff --git a/tests/input/examples/schema_definition-native-array-3.yaml b/tests/input/examples/schema_definition-native-array-3.yaml new file mode 100644 index 00000000..a56d27ee --- /dev/null +++ b/tests/input/examples/schema_definition-native-array-3.yaml @@ -0,0 +1,112 @@ +id: https://example.org/arrays +name: arrays-temperature-example-3 +title: Array Temperature Example Using NDArray Classes +description: |- + Example LinkML schema to demonstrate a complex 3D DataArray of temperature values + with labeled axes using array slots for the axes and data instead of classes containing + arrays +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + annotations: + # See also the Xarray DataArray data structure + # https://docs.xarray.dev/en/latest/user-guide/data-structures.html#dataarray + # with the main differences being + # 1) the coordinates are not DataArray objects + # 2) coordinates are not named + array_data_mapping: + # The name of the array attribute within this class that contains the data. + # This maps to the "values" attribute of an Xarray DataArray. + data: temperatures_in_K + + # The 3 dimensions of the array attribute "temperatures_in_K" are named + # "x", "y", and "t" in the DataArray. These dims do not need to be the same as the + # dimensions of the array attribute. A name must be provided for each dimension + # of the data array. + # This maps to the "dims" attribute of an Xarray DataArray. + dims: ["x", "y", "t"] # NOTE: y without quotes is parsed as True in YAML 1.1 + + # An array attribute within this class that is not the data array above may + # serve as a coordinate for a set of dimensions in the data array. + # In most cases, a 1D array is a coordinate for a single dimension in the + # data array, but it is possible to have an N-dimensional array that is a + # coordinate for N dimensions in the data array. The format is: + # : + # The number of dimensions of a coordinate must equal the length of the + # list specified here. Multiple coordinates can be specified for the same + # dimension or set of dimensions. + # This maps to the "coords" attribute of an Xarray DataArray. + coords: + # Here, the latitude for the temperature value at index (i,j,k) is equal to + # latitude_in_deg[i,j]. Similarly, the longitude for the temperature value at + # index (i,j,k) is equal to longitude_in_deg[i,j]. The date for the temperature + # value at index (i,j,k) is equal to date_in_d[k]. The days_with_rain for the + # temperature value at index (i,j,k) is equal to days_with_rain[k]. + latitude_in_deg: ["x", "y"] + longitude_in_deg: ["x", "y"] + date_in_d: "t" + day: "t" + # The reference date for the temperature values is the same for all values + # in the array. In Xarray terms, this is a non-dimension coordinate. + reference_date: False + + # Additional attributes for storing arbitrary metadata about the DataArray may + # be specified in the "attributes" below. These map to the "attrs" attribute of + # an Xarray DataArray. + attributes: + name: + identifier: true + range: string + latitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + longitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + date_in_d: + required: true + range: date + array: + exact_number_dimensions: 1 + day: + required: true + range: integer + array: + exact_number_dimensions: 1 + reference_date: + required: true + range: date + temperatures_in_K: + required: true + range: float + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + dimensions: + - alias: lat # TODO when would the "dims" be used vs "alias"? + - alias: lon + - alias: date From 65443374b5918e65c39644e6764d458e622d7024 Mon Sep 17 00:00:00 2001 From: rly Date: Tue, 26 Mar 2024 18:54:53 -0700 Subject: [PATCH 07/11] Add example --- .../schema_definition-native-array-3b.yaml | 91 +++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 tests/input/examples/schema_definition-native-array-3b.yaml diff --git a/tests/input/examples/schema_definition-native-array-3b.yaml b/tests/input/examples/schema_definition-native-array-3b.yaml new file mode 100644 index 00000000..7429c40b --- /dev/null +++ b/tests/input/examples/schema_definition-native-array-3b.yaml @@ -0,0 +1,91 @@ +id: https://example.org/arrays +name: arrays-temperature-example-3 +title: Array Temperature Example Using NDArray Classes +description: |- + ... +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + attributes: + name: + identifier: true + range: string + latitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + modifies: + temperatures_in_K: + type: indexes + dimension: [0, 1] + # The latitude for the temperature value at index (i,j,k) is equal to latitude_in_deg[i,j]. + # NOTE in xarray, multi-dimensional coordinates are referenced by name, but here we reference + # by index because dimensions are not required to have names. + longitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + modifies: + temperatures_in_K: + type: indexes + dimension: [0, 1] + date_in_d: + required: true + range: date + array: + exact_number_dimensions: 1 + modifies: + temperatures_in_K: + type: indexes + dimension: 2 + day: + description: Number of days since the reference date + required: true + range: integer + array: + exact_number_dimensions: 1 + modifies: + temperatures_in_K: + type: indexes + dimension: 2 + reference_date: + description: The reference date for the day values + required: true + range: date + modifies: + temperatures_in_K: + type: indexes + # this is a constant coordinate for the entire array. realistically, this should just be an attribute + # on `day`. not sure when a constant coordinate would be used, but we might as well support it + temperatures_in_K: + required: true + range: float + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + dimensions: + - alias: "x" + - alias: "y" + - alias: "date" From d0aacdf9b030af48bc8c6e641942c6a8407490b9 Mon Sep 17 00:00:00 2001 From: rly Date: Fri, 12 Apr 2024 22:30:11 -0700 Subject: [PATCH 08/11] Update array examples --- ...=> schema_definition-native-array-3a.yaml} | 10 ++- .../schema_definition-native-array-3b.yaml | 51 ++++++----- .../schema_definition-native-array-3c.yaml | 86 +++++++++++++++++++ 3 files changed, 116 insertions(+), 31 deletions(-) rename tests/input/examples/{schema_definition-native-array-3.yaml => schema_definition-native-array-3a.yaml} (94%) create mode 100644 tests/input/examples/schema_definition-native-array-3c.yaml diff --git a/tests/input/examples/schema_definition-native-array-3.yaml b/tests/input/examples/schema_definition-native-array-3a.yaml similarity index 94% rename from tests/input/examples/schema_definition-native-array-3.yaml rename to tests/input/examples/schema_definition-native-array-3a.yaml index a56d27ee..951250cf 100644 --- a/tests/input/examples/schema_definition-native-array-3.yaml +++ b/tests/input/examples/schema_definition-native-array-3a.yaml @@ -62,7 +62,7 @@ classes: date_in_d: "t" day: "t" # The reference date for the temperature values is the same for all values - # in the array. In Xarray terms, this is a non-dimension coordinate. + # in the array. In Xarray terms, this is a non-dimension (constant) coordinate. reference_date: False # Additional attributes for storing arbitrary metadata about the DataArray may @@ -92,11 +92,13 @@ classes: array: exact_number_dimensions: 1 day: + description: Number of days since `reference_date` required: true range: integer array: exact_number_dimensions: 1 reference_date: + description: The reference date for the `day` values required: true range: date temperatures_in_K: @@ -107,6 +109,6 @@ classes: array: exact_number_dimensions: 3 dimensions: - - alias: lat # TODO when would the "dims" be used vs "alias"? - - alias: lon - - alias: date + - alias: "x" + - alias: "y" + - alias: "date" diff --git a/tests/input/examples/schema_definition-native-array-3b.yaml b/tests/input/examples/schema_definition-native-array-3b.yaml index 7429c40b..e803658e 100644 --- a/tests/input/examples/schema_definition-native-array-3b.yaml +++ b/tests/input/examples/schema_definition-native-array-3b.yaml @@ -2,7 +2,9 @@ id: https://example.org/arrays name: arrays-temperature-example-3 title: Array Temperature Example Using NDArray Classes description: |- - ... + Example LinkML schema to demonstrate a complex 3D DataArray of temperature values + with labeled axes using array slots for the axes and data instead of classes containing + arrays license: MIT prefixes: @@ -32,13 +34,12 @@ classes: ucum_code: deg array: exact_number_dimensions: 2 - modifies: - temperatures_in_K: - type: indexes - dimension: [0, 1] - # The latitude for the temperature value at index (i,j,k) is equal to latitude_in_deg[i,j]. - # NOTE in xarray, multi-dimensional coordinates are referenced by name, but here we reference - # by index because dimensions are not required to have names. + indexes: + temperatures_in_K: + dimension: [0, 1] + # The latitude for the temperature value at index (i,j,k) is equal to latitude_in_deg[i,j]. + # NOTE in xarray, multi-dimensional coordinates are referenced by name, but here we reference + # by index because dimensions are not required to have names. longitude_in_deg: required: true range: float @@ -46,38 +47,34 @@ classes: ucum_code: deg array: exact_number_dimensions: 2 - modifies: - temperatures_in_K: - type: indexes - dimension: [0, 1] + indexes: + temperatures_in_K: + dimension: [0, 1] date_in_d: required: true range: date array: exact_number_dimensions: 1 - modifies: - temperatures_in_K: - type: indexes - dimension: 2 + indexes: + temperatures_in_K: + dimension: 2 day: - description: Number of days since the reference date + description: Number of days since `reference_date` required: true range: integer array: exact_number_dimensions: 1 - modifies: - temperatures_in_K: - type: indexes - dimension: 2 + indexes: + temperatures_in_K: + dimension: 2 reference_date: - description: The reference date for the day values + description: The reference date for the `day` values required: true range: date - modifies: - temperatures_in_K: - type: indexes - # this is a constant coordinate for the entire array. realistically, this should just be an attribute - # on `day`. not sure when a constant coordinate would be used, but we might as well support it + # indexes: + # temperatures_in_K: + # # this is a non-dimension (constant) coordinate for the entire array and is supported by xarray + # # but not supported here. the use case is not clear; this should just be an attribute on `day`. temperatures_in_K: required: true range: float diff --git a/tests/input/examples/schema_definition-native-array-3c.yaml b/tests/input/examples/schema_definition-native-array-3c.yaml new file mode 100644 index 00000000..4eec3233 --- /dev/null +++ b/tests/input/examples/schema_definition-native-array-3c.yaml @@ -0,0 +1,86 @@ +id: https://example.org/arrays +name: arrays-temperature-example-3 +title: Array Temperature Example Using NDArray Classes +description: |- + Example LinkML schema to demonstrate a complex 3D DataArray of temperature values + with labeled axes using array slots for the axes and data instead of classes containing + arrays +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + attributes: + name: + identifier: true + range: string + latitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + longitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + date_in_d: + required: true + range: date + array: + exact_number_dimensions: 1 + day: + description: Number of days since `reference_date` + required: true + range: integer + array: + exact_number_dimensions: 1 + reference_date: + description: The reference date for the `day` values + required: true + range: date + temperatures_in_K: + required: true + range: float + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + dimensions: + - alias: "x" + - alias: "y" + - alias: "date" + indexed_by: + - alias: lat + index_slot: latitude_in_deg + index_dims: [0, 1] + - alias: lon + index_slot: longitude_in_deg + index_dims: [0, 1] + - alias: date + index_slot: date + index_dims: [2] + - alias: day + index_slot: day + index_dims: [2] + - alias: reference_date + index_slot: reference_date + # this is a non-dimension (constant) coordinate for the entire array and is supported by xarray. + # the use case is not clear; this should just be an attribute on `day`. From faca6ece03103849476f53c0807ea0c87fa79540 Mon Sep 17 00:00:00 2001 From: rly Date: Fri, 12 Apr 2024 23:31:28 -0700 Subject: [PATCH 09/11] Revamp the experimental array classes --- linkml_model/model/schema/array.yaml | 150 ++++----------------------- 1 file changed, 20 insertions(+), 130 deletions(-) diff --git a/linkml_model/model/schema/array.yaml b/linkml_model/model/schema/array.yaml index 1d5f67fd..9cdd62c6 100644 --- a/linkml_model/model/schema/array.yaml +++ b/linkml_model/model/schema/array.yaml @@ -2,8 +2,7 @@ id: https://w3id.org/linkml/lib/arrays name: arrays title: LinkML Arrays description: >- - LinkML templates for storing one-dimensional series, two-dimensional arrays, - and arrays of higher dimensionality. + LinkML templates for storing arrays. Status: Experimental @@ -19,6 +18,9 @@ status: testing # - github:mavaylon1 # - github:ialarmedalien # - github:cmungall +# - github:sneakers-the-rat +# - github:bendichter +# - github:melonora prefixes: linkml: https://w3id.org/linkml/ @@ -39,141 +41,29 @@ classes: DataStructure: abstract: true - NDArray: + Array: description: >- - a data structure consisting of a collection of *elements*, each identified by at least one array index tuple. - abstract: true + A data structure where an N-dimensional array is represented as a class rather than an attribute. There + must be exactly one attribute that is an array. There may be other attributes associated with the array + but they must not be arrays themselves. is_a: DataStructure - slots: - - dimensions - - elements - - array_linearization_order - slot_usage: - elements: - description: >- - the collection of values that make up the array. The elements have a *direct* representation which is - an ordered sequence of values. The elements also have an *array interpretation*, where each - element has a unique index which is determined by array_linearization_order DataArray: description: >- - a data structure containing an NDArray and a set of one-dimensional series that are used to label - the elements of the array + A data structure containing an Array and a set of Arrays that are used to label the elements of the Array. + The set of Arrays are also known as coordinates. is_a: DataStructure - slots: - - axis - - array see_also: - https://docs.xarray.dev/en/stable/generated/xarray.DataArray.html - GroupingByArrayOrder: - mixin: true - description: >- - A mixin that describes an array whose elements are mapped from a linear sequence to an array index - via a specified mapping - - ColumnOrderedArray: - mixin: true - is_a: GroupingByArrayOrder - description: >- - An array ordering that is column-order - slots: - - array_linearization_order - slot_usage: - array_linearization_order: - equals_string: COLUMN_MAJOR_ARRAY_ORDER - - RowOrderedArray: - mixin: true - is_a: GroupingByArrayOrder - description: >- - An array ordering that is row-order or generalizations thereof - slots: - - array_linearization_order - slot_usage: - array_linearization_order: - equals_string: ROW_MAJOR_ARRAY_ORDER - -slots: - dimensions: - description: >- - The number of elements in the tuple used to access elements of an array - aliases: - - rank - - dimensionality - - number of axes - - number of elements - range: integer - axis: - range: NDArray - slot_usage: - dimensions: - equals_number: 1 - aliases: - - dimension - description: >- - A one-dimensional series that contains elements that form one part of a tuple used to access an array - required: true - axis_index: - range: integer - description: >- - The position of an axis in a tuple used to access an array - array: - range: NDArray - description: >- - An array that is labeled by a set of one-dimensional series - required: true - elements: - # this will be serialized as one big long list that should be interpreted as a 2D array - range: Any - aliases: - - values - required: true - multivalued: true - description: >- - A collection of values that make up the contents of an array. These elements may be interpreted - as a contiguous linear sequence (direct representation) or as elements to be accessed via an - array index - series_label: # the row label - key: true - description: >- - A name that uniquely identifiers a series - length: - description: >- - The number of elements in the array - range: integer - equals_expression: "length(elements)" - array_linearization_order: - range: ArrayLinearizationOrderOptions - ifabsent: "string(ROW_MAJOR_ARRAY_ORDER)" - - specified_input: - range: DataStructure - multivalued: true - specified_output: - range: DataStructure - multivalued: true - operation_parameters: - range: Any - multivalued: true - -enums: - ArrayLinearizationOrderOptions: + Dataset: description: >- - Determines how a linear contiguous representation of the elements of an array map - to array indices - permissible_values: - COLUMN_MAJOR_ARRAY_ORDER: - meaning: gom:columnMajorArray - description: >- - An array layout option in which the elements in each column is stored in consecutive positions, - or any generalization thereof to dimensionality greater than 2 - aliases: - - F order - ROW_MAJOR_ARRAY_ORDER: - meaning: gom:rowMajorArray - description: >- - An array layout option in which the elements in each row is stored in consecutive positions, - or any generalization thereof to dimensionality greater than 2 - aliases: - - C order + A data structure containing one or more main Arrays with aligned dimensions and a set of Arrays that are used to + label the elements of the Arrays. The set of Arrays are also known as coordinates. A Dataset with only one + main Array is equivalent to a DataArray. If there are multiple main Arrays, then all dimensions must refer to + points in the same shared coordinate system, i.e., if two Arrays have the same dimension "x", that dimension + must be identical in both Arrays. + is_a: DataStructure + see_also: + - https://docs.xarray.dev/en/stable/generated/xarray.Dataset.html + - https://docs.unidata.ucar.edu/netcdf-c/current/netcdf_data_model.html From 8ff7d6d4963ae6dd73a3ad3aa0c862597933f14d Mon Sep 17 00:00:00 2001 From: rly Date: Fri, 12 Apr 2024 23:53:15 -0700 Subject: [PATCH 10/11] Update examples --- .../schema_definition-native-array-2c.yaml | 146 ++++++++++++++++++ .../schema_definition-native-array-3a.yaml | 14 +- .../schema_definition-native-array-3b.yaml | 8 +- .../schema_definition-native-array-3c.yaml | 12 +- 4 files changed, 167 insertions(+), 13 deletions(-) create mode 100644 tests/input/examples/schema_definition-native-array-2c.yaml diff --git a/tests/input/examples/schema_definition-native-array-2c.yaml b/tests/input/examples/schema_definition-native-array-2c.yaml new file mode 100644 index 00000000..a67dd576 --- /dev/null +++ b/tests/input/examples/schema_definition-native-array-2c.yaml @@ -0,0 +1,146 @@ +id: https://example.org/arrays +name: arrays-temperature-example-2 +title: Array Temperature Example Using NDArray Classes +description: |- + Example LinkML schema to demonstrate a 3D DataArray of temperature values with labeled axes + using classes containing arrays for the axes and data instead of using array slots/attributes. + Creating separate types for the array slots enables reuse and extension. +license: MIT + +prefixes: + linkml: https://w3id.org/linkml/ + wgs84: http://www.w3.org/2003/01/geo/wgs84_pos# + example: https://example.org/ + +default_prefix: example + +imports: + - linkml:types + +classes: + + TemperatureDataset: + tree_root: true + implements: + - linkml:DataArray + attributes: + name: + identifier: true + range: string + latitude_in_deg: + range: LatitudeSeries + required: true + longitude_in_deg: + range: LongitudeSeries + required: true + date: + range: DateSeries + required: true + day_in_d: + range: DaysSinceSeries + # one could define `reference_date` at this level but it really should be an attribute on `DaysSinceSeries`. + # however, this means `reference_date` cannot be a non-dimension (constant) coordinate of `temperatures_in_K` + # as structured in Xarray. + temperatures_in_K: + range: TemperatureMatrix + required: true + array: + # it does not make sense to put `indexed_by` on `TemperatureMatrix` because the index slots are only + # accessible from this DataArray class. + indexed_by: + - alias: lat + index_slot: latitude_in_deg + index_dims: [0, 1] + - alias: lon + index_slot: longitude_in_deg + index_dims: [0, 1] + - alias: date + index_slot: date + index_dims: [2] + - alias: day + index_slot: day_in_d + index_dims: [2] + + LatitudeSeries: + description: A 2D array whose values represent latitude + implements: + - linkml:Array + attributes: + name: + identifier: true # an identifier is required for referencing in other classes + range: string + latitude_in_deg: # the name of the attribute does not matter when it is used within a DataArray or Dataset + required: true + range: float + unit: + ucum_code: deg + array: # exactly one attribute within this class must be an array + exact_number_dimensions: 2 + + LongitudeSeries: + description: A 2D array whose values represent longitude + implements: + - linkml:Array + attributes: + name: + identifier: true + range: string + longitude_in_deg: + required: true + range: float + unit: + ucum_code: deg + array: + exact_number_dimensions: 2 + + DateSeries: + description: A 1D series of dates + implements: + - linkml:Array + attributes: + name: + identifier: true + range: string + date: + required: true + range: date + array: + exact_number_dimensions: 1 + + DaysSinceSeries: + description: A 1D series whose values represent the number of days since a reference date + implements: + - linkml:Array + attributes: + name: + identifier: true + range: string + day_in_d: + required: true + range: integer + unit: + ucum_code: d + array: + exact_number_dimensions: 1 + reference_date: + description: The reference date for the `day_in_d` values + required: true + range: date + + TemperatureMatrix: + description: A 3D array of temperatures + attributes: + name: + identifier: true + range: string + temperatures_in_K: + required: true + range: float + unit: + ucum_code: K + array: + exact_number_dimensions: 3 + dimensions: + - alias: "x" + - alias: "y" + - alias: "date" diff --git a/tests/input/examples/schema_definition-native-array-3a.yaml b/tests/input/examples/schema_definition-native-array-3a.yaml index 951250cf..670b4336 100644 --- a/tests/input/examples/schema_definition-native-array-3a.yaml +++ b/tests/input/examples/schema_definition-native-array-3a.yaml @@ -59,8 +59,8 @@ classes: # temperature value at index (i,j,k) is equal to days_with_rain[k]. latitude_in_deg: ["x", "y"] longitude_in_deg: ["x", "y"] - date_in_d: "t" - day: "t" + date: "t" + day_in_d: "t" # The reference date for the temperature values is the same for all values # in the array. In Xarray terms, this is a non-dimension (constant) coordinate. reference_date: False @@ -86,21 +86,25 @@ classes: ucum_code: deg array: exact_number_dimensions: 2 - date_in_d: + date: required: true range: date array: exact_number_dimensions: 1 - day: + day_in_d: description: Number of days since `reference_date` required: true range: integer + unit: + ucum_code: d array: exact_number_dimensions: 1 reference_date: - description: The reference date for the `day` values + description: The reference date for the `day_in_d` values required: true range: date + unit: + ucum_code: d temperatures_in_K: required: true range: float diff --git a/tests/input/examples/schema_definition-native-array-3b.yaml b/tests/input/examples/schema_definition-native-array-3b.yaml index e803658e..613d3fa5 100644 --- a/tests/input/examples/schema_definition-native-array-3b.yaml +++ b/tests/input/examples/schema_definition-native-array-3b.yaml @@ -50,7 +50,7 @@ classes: indexes: temperatures_in_K: dimension: [0, 1] - date_in_d: + date: required: true range: date array: @@ -58,17 +58,19 @@ classes: indexes: temperatures_in_K: dimension: 2 - day: + day_in_d: description: Number of days since `reference_date` required: true range: integer + unit: + ucum_code: d array: exact_number_dimensions: 1 indexes: temperatures_in_K: dimension: 2 reference_date: - description: The reference date for the `day` values + description: The reference date for the `day_in_d` values required: true range: date # indexes: diff --git a/tests/input/examples/schema_definition-native-array-3c.yaml b/tests/input/examples/schema_definition-native-array-3c.yaml index 4eec3233..60251d66 100644 --- a/tests/input/examples/schema_definition-native-array-3c.yaml +++ b/tests/input/examples/schema_definition-native-array-3c.yaml @@ -41,19 +41,21 @@ classes: ucum_code: deg array: exact_number_dimensions: 2 - date_in_d: + date: required: true range: date array: exact_number_dimensions: 1 - day: + day_in_d: description: Number of days since `reference_date` required: true range: integer + unit: + ucum_code: d array: exact_number_dimensions: 1 reference_date: - description: The reference date for the `day` values + description: The reference date for the `day_in_d` values required: true range: date temperatures_in_K: @@ -78,9 +80,9 @@ classes: index_slot: date index_dims: [2] - alias: day - index_slot: day + index_slot: day_in_d index_dims: [2] - alias: reference_date index_slot: reference_date # this is a non-dimension (constant) coordinate for the entire array and is supported by xarray. - # the use case is not clear; this should just be an attribute on `day`. + # the use case is not clear; this should just be an attribute on `day_in_d`. From 55ce4b9908193c8bcae01d44f50e40b279b1668e Mon Sep 17 00:00:00 2001 From: rly Date: Thu, 9 May 2024 01:18:24 -0700 Subject: [PATCH 11/11] Update examples --- .../schema_definition-native-array-2c.yaml | 28 +++++++------------ .../schema_definition-native-array-3b.yaml | 23 +++++++++------ .../schema_definition-native-array-3c.yaml | 24 ++++++++-------- 3 files changed, 38 insertions(+), 37 deletions(-) diff --git a/tests/input/examples/schema_definition-native-array-2c.yaml b/tests/input/examples/schema_definition-native-array-2c.yaml index a67dd576..04faa320 100644 --- a/tests/input/examples/schema_definition-native-array-2c.yaml +++ b/tests/input/examples/schema_definition-native-array-2c.yaml @@ -45,26 +45,24 @@ classes: range: TemperatureMatrix required: true array: - # it does not make sense to put `indexed_by` on `TemperatureMatrix` because the index slots are only + # it does not make sense to put `labeled_by` on `TemperatureMatrix` because the index slots are only # accessible from this DataArray class. - indexed_by: + labeled_by: - alias: lat - index_slot: latitude_in_deg - index_dims: [0, 1] + label_slot: latitude_in_deg + labeled_dimensions: [0, 1] - alias: lon - index_slot: longitude_in_deg - index_dims: [0, 1] + label_slot: longitude_in_deg + labeled_dimensions: [0, 1] - alias: date - index_slot: date - index_dims: [2] + label_slot: date + labeled_dimensions: [2] - alias: day - index_slot: day_in_d - index_dims: [2] + label_slot: day_in_d + labeled_dimensions: [2] LatitudeSeries: description: A 2D array whose values represent latitude - implements: - - linkml:Array attributes: name: identifier: true # an identifier is required for referencing in other classes @@ -79,8 +77,6 @@ classes: LongitudeSeries: description: A 2D array whose values represent longitude - implements: - - linkml:Array attributes: name: identifier: true @@ -95,8 +91,6 @@ classes: DateSeries: description: A 1D series of dates - implements: - - linkml:Array attributes: name: identifier: true @@ -109,8 +103,6 @@ classes: DaysSinceSeries: description: A 1D series whose values represent the number of days since a reference date - implements: - - linkml:Array attributes: name: identifier: true diff --git a/tests/input/examples/schema_definition-native-array-3b.yaml b/tests/input/examples/schema_definition-native-array-3b.yaml index 613d3fa5..f1725519 100644 --- a/tests/input/examples/schema_definition-native-array-3b.yaml +++ b/tests/input/examples/schema_definition-native-array-3b.yaml @@ -36,7 +36,8 @@ classes: exact_number_dimensions: 2 indexes: temperatures_in_K: - dimension: [0, 1] + alias: lat + index_dims: [0, 1] # The latitude for the temperature value at index (i,j,k) is equal to latitude_in_deg[i,j]. # NOTE in xarray, multi-dimensional coordinates are referenced by name, but here we reference # by index because dimensions are not required to have names. @@ -49,7 +50,8 @@ classes: exact_number_dimensions: 2 indexes: temperatures_in_K: - dimension: [0, 1] + alias: lon + index_dims: [0, 1] date: required: true range: date @@ -57,7 +59,8 @@ classes: exact_number_dimensions: 1 indexes: temperatures_in_K: - dimension: 2 + alias: date + index_dims: [2] day_in_d: description: Number of days since `reference_date` required: true @@ -68,15 +71,19 @@ classes: exact_number_dimensions: 1 indexes: temperatures_in_K: - dimension: 2 + alias: day + index_dims: [2] reference_date: description: The reference date for the `day_in_d` values required: true range: date - # indexes: - # temperatures_in_K: - # # this is a non-dimension (constant) coordinate for the entire array and is supported by xarray - # # but not supported here. the use case is not clear; this should just be an attribute on `day`. + indexes: + temperatures_in_K: + alias: reference_date + index_dims: null + # this is a non-dimension (constant) coordinate for the entire array and is supported by xarray. + # the use case is not clear; this should just be an attribute on `day`. but we can support it by + # allowing index_dims: null temperatures_in_K: required: true range: float diff --git a/tests/input/examples/schema_definition-native-array-3c.yaml b/tests/input/examples/schema_definition-native-array-3c.yaml index 60251d66..6a97847b 100644 --- a/tests/input/examples/schema_definition-native-array-3c.yaml +++ b/tests/input/examples/schema_definition-native-array-3c.yaml @@ -69,20 +69,22 @@ classes: - alias: "x" - alias: "y" - alias: "date" - indexed_by: + labeled_by: - alias: lat - index_slot: latitude_in_deg - index_dims: [0, 1] + label_slot: latitude_in_deg + labeled_dimensions: [0, 1] - alias: lon - index_slot: longitude_in_deg - index_dims: [0, 1] + label_slot: longitude_in_deg + labeled_dimensions: [0, 1] - alias: date - index_slot: date - index_dims: [2] + label_slot: date + labeled_dimensions: [2] - alias: day - index_slot: day_in_d - index_dims: [2] + label_slot: day_in_d + labeled_dimensions: [2] - alias: reference_date - index_slot: reference_date + label_slot: reference_date + labeled_dimensions: null # this is a non-dimension (constant) coordinate for the entire array and is supported by xarray. - # the use case is not clear; this should just be an attribute on `day_in_d`. + # the use case is not clear; this should just be an attribute on `day_in_d`. but we can support it by + # allowing index_dims: null