From 714c95dbfd1d1b1181e4c6f69cc45dac4c7ded89 Mon Sep 17 00:00:00 2001 From: Emmanuel Mathot Date: Mon, 29 Sep 2025 17:30:17 +0200 Subject: [PATCH 1/7] Add Geo Multiscales Attribute Extension specification and schema --- attributes/geo/multiscales/README.md | 420 +++++++++++++++++++++++++ attributes/geo/multiscales/schema.json | 190 +++++++++++ 2 files changed, 610 insertions(+) create mode 100644 attributes/geo/multiscales/README.md create mode 100644 attributes/geo/multiscales/schema.json diff --git a/attributes/geo/multiscales/README.md b/attributes/geo/multiscales/README.md new file mode 100644 index 0000000..93de98b --- /dev/null +++ b/attributes/geo/multiscales/README.md @@ -0,0 +1,420 @@ +# Geo Multiscales Attribute Extension for Zarr + +- **Extension Name**: Geo Multiscales Attribute Extension +- **Version**: 0.1.0 +- **Extension Type**: Attribute +- **Status**: Proposed +- **Owners**: @emmanuelmathot + +## Description + +This specification defines a JSON object that encodes multiscale pyramid information for geospatial data stored in Zarr groups under the `multiscales` key within the `geo` dictionary in the attributes of Zarr groups. Additionally, it specifies the hierarchical layout of datasets representing different resolution levels, discovery mechanisms for clients to enumerate available levels and variables, and requirements for consolidated metadata to ensure complete discoverability of the multiscale structure. + +## Motivation + +- Provides standardized multiscale pyramid encoding for geospatial overviews +- Supports flexible decimation schemes (factor-of-2, factor-of-3, custom factors) +- Compatible with OGC TileMatrixSet 2.0 specification while enabling generic pyramids +- Supports both consistent and inconsistent pyramid structures (where variables may not be available at all resolution levels) +- Enables optimized data access patterns for visualization and analysis at different scales +- Accommodates scientific coordinate systems beyond web mapping (UTM, polar stereographic, etc.) + +## Inheritance Model + +The `multiscales` key under the `geo` object is defined at the group level and applies to the hierarchical structure within that group. +There is no inheritance of `multiscales` metadata from parent groups to child groups. Each multiscale group defines its own pyramid structure independently. + +## Specification + +The `multiscales` key under the `geo` dictionary can be added to Zarr groups to define multiscale pyramid information. + + +**`geo -> multiscales` Properties** + +| | Type | Description | Required | Reference | +| -------------------------- | ------------------ | -------------------------------------------------- | -------- | ------------------------------------------------------------------------------------- | +| **version** | `string` | Multiscales metadata version | ✓ Yes | [geo -> multiscales.version](#geo---multiscalesversion) | +| **tile_matrix_set** | `string \| object` | Tile matrix set definition or reference | No | [geo -> multiscales.tile_matrix_set](#geo---multiscalestile_matrix_set) | +| **resampling_method** | `string` | Resampling method used for downsampling | No | [geo -> multiscales.resampling_method](#geo---multiscalesresampling_method) | +| **tile_matrix_set_limits** | `object` | Optional limits for available tiles per zoom level | No | [geo -> multiscales.tile_matrix_set_limits](#geo---multiscalestile_matrix_set_limits) | + +### Field Details + +Additional properties are allowed. + +#### geo -> multiscales.version + +Multiscales metadata version + +* **Type**: `string` +* **Required**: ✓ Yes +* **Allowed values**: `0.1` + +#### geo -> multiscales.layout + +TileMatrixSet definition or reference + +* **Type**: `string | object` +* **Required**: No + +This field can contain either: +1. **Reference by identifier**: A string identifier referencing a well-known TileMatrixSet (e.g., "WebMercatorQuad") +2. **URI reference**: A URI pointing to a JSON document describing the tile matrix set +3. **Inline definition**: A complete TileMatrixSet JSON object following OGC TileMatrixSet 2.0 specification + +**Reference by identifier:** +```json +{ + "tile_matrix_set": "WebMercatorQuad" +} +``` + +**URI reference:** +```json +{ + "tile_matrix_set": "https://maps.example.org/tileMatrixSets/WebMercatorQuad.json" +} +``` + +**Inline definition:** +```json +{ + "tile_matrix_set": { + "id": "Custom_Grid", + "title": "Custom Grid for Scientific Data", + "crs": "EPSG:4326", + "tileMatrices": [ + { + "id": "0", + "scaleDenominator": 0.703125, + "cellSize": 0.0625, + "pointOfOrigin": [-180.0, 90.0], + "tileWidth": 256, + "tileHeight": 256, + "matrixWidth": 2, + "matrixHeight": 1 + } + ] + } +} +``` + +#### geo -> multiscales.resampling_method + +Resampling method used for downsampling + +* **Type**: `string` +* **Required**: No +* **Allowed values**: `"nearest"`, `"average"`, `"bilinear"`, `"cubic"`, `"cubic_spline"`, `"lanczos"`, `"mode"`, `"max"`, `"min"`, `"med"`, `"sum"`, `"q1"`, `"q3"`, `"rms"`, `"gauss"` +* **Default**: `"nearest"` + +The same method SHALL apply across all levels. + +#### geo -> multiscales.tile_matrix_set_limits + +Optional limits for available tiles per zoom level + +* **Type**: `object` +* **Required**: No + +Defines the available tile ranges for each zoom level. Keys must match TileMatrix.id values from the TileMatrixSet. + +```json +{ + "tile_matrix_set_limits": { + "0": { + "minTileRow": 0, + "maxTileRow": 0, + "minTileCol": 0, + "maxTileCol": 0 + }, + "1": { + "minTileRow": 0, + "maxTileRow": 1, + "minTileCol": 0, + "maxTileCol": 1 + } + } +} +``` + + + + +### Hierarchical Layout + +Multiscale datasets follow a specific hierarchical structure that accommodates both native resolution storage and overview levels: + +1. **Dataset Group**: Contains native resolution data and multiscales metadata +2. **Overview Level Groups**: Child groups containing overview data at different resolutions + +``` +/measurements/ # Dataset Group with multiscales metadata +├── 0/ # First overview level +| ├── b02 # Native resolution variable +| ├── b03 # Native resolution variable +| ├── b04 # Native resolution variable +| ├── spatial_ref # Coordinate reference variable +├── 1/ # First overview level +│ ├── b01 # All bands available at overview level +│ ├── b02 +│ ├── b03 +│ ├── ... +│ └── spatial_ref +└── 2/ # Second overview level + ├── b01 + ├── b02 + ├── b03 + ├── ... + └── spatial_ref +``` + +**Key principles:** +- Native resolution variables are stored directly in the Dataset Group (not in a separate "0/" group) +- Overview levels are stored in child groups with names matching TileMatrix identifiers +- This approach maintains efficiency by avoiding the need to restructure existing datasets when adding overviews + +### Group Discovery Methods + +The multiscales metadata enables complete discovery of the multiscale collection structure through multiple mechanisms: + +1. **TileMatrixSet-based discovery**: + - The TileMatrixSet definition specifies the exact set of zoom levels through its tileMatrices array + - Each TileMatrix.id value corresponds to a child group in the multiscale hierarchy + - Variable discovery within each zoom level group follows standard Zarr metadata conventions + +2. **Generic datasets-based discovery**: + - The `datasets` array explicitly lists all resolution levels and their paths + - Scale factors provide resolution relationships + +3. **Explicit limits**: + - `tile_matrix_set_limits` explicitly declares which zoom levels contain data + - For storage backends that do not support directory listing, this is the primary mechanism for discovering available zoom levels + +### Consolidated Metadata Requirements + +**Consolidated metadata is MANDATORY for multiscale groups** to ensure complete discoverability of pyramid structure and metadata without requiring individual access to each child dataset. + +#### Requirements + +1. **Zarr Consolidated Metadata**: The multiscale group SHALL use Zarr's consolidated metadata feature to expose metadata from all child groups and arrays at the group level. + +2. **Projection Information Access**: All projection information (CRS, transforms, grid mappings) from child datasets SHALL be accessible through the consolidated metadata at the multiscale group level. + +3. **Variable Discovery**: The consolidated metadata SHALL include complete variable listings for all resolution levels, enabling clients to understand the full pyramid structure without traversing child groups. + +4. **Coordinate Information**: Coordinate arrays and their metadata from all resolution levels SHALL be included in the consolidated metadata. + +#### Client Implementation Guidelines + +1. **Priority Order**: Clients SHOULD first attempt to read consolidated metadata, falling back to individual metadata requests only if consolidated metadata is unavailable. + +2. **Projection Discovery**: Use consolidated metadata to discover CRS information from any resolution level, typically from the native resolution or a representative overview level. + +3. **Variable Enumeration**: Enumerate available variables across all resolution levels using the consolidated metadata catalog. + +4. **Fallback Behavior**: When variables are not available at the optimal resolution level, use the consolidated metadata to identify the finest available resolution level containing that variable. + +### Validation Rules + +- **Consolidated Metadata**: Multiscale groups SHALL provide consolidated metadata as specified above +- **Level Consistency**: Resolution level group names SHALL match either TileMatrix.id values (when using TileMatrixSet) or dataset path values +- **Structural Consistency**: All resolution level groups SHALL have the same member structure for variables they contain +- **Coordinate System Consistency**: All resolution levels SHALL use the same coordinate reference system +- **Chunking Alignment**: Chunks SHALL be aligned with the tile grid (1:1 mapping between chunks and tiles) when using TileMatrixSet + +### Decimation Requirements and Custom Scaling + +While TileMatrixSet commonly assumes quadtree decimation (scaling by factor of 2), custom TileMatrixSets MAY use alternative decimation factors: + +- **Factor of 2 (quadtree)**: Standard web mapping approach where each zoom level has 4x more tiles +- **Factor of 3 (nonary tree)**: Each zoom level has 9x more tiles, useful for certain scientific gridding schemes +- **Other integer factors**: Application-specific requirements may dictate alternative decimation + +When using non-standard decimation factors, the TileMatrixSet definition SHALL explicitly specify the matrixWidth and matrixHeight values for each TileMatrix to ensure correct spatial alignment and resolution relationships. + +## Examples + +### Example 1: Simple TileMatrixSet Reference + +```json +{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "geo": { + "multiscales": { + "version": "0.1", + "tile_matrix_set": "WebMercatorQuad", + "resampling_method": "average", + "tile_matrix_set_limits": { + "7": {"minTileRow": 42, "maxTileRow": 43, "minTileCol": 67, "maxTileCol": 68}, + "8": {"minTileRow": 85, "maxTileRow": 87, "minTileCol": 134, "maxTileCol": 137} + } + } + } + } +} +``` + +### Example 2: Custom UTM TileMatrixSet + +```json +{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "geo": { + "multiscales": { + "version": "0.1", + "tile_matrix_set": { + "id": "UTM_Zone_33N_Custom", + "title": "UTM Zone 33N for Sentinel-2 native resolution", + "crs": "EPSG:32633", + "orderedAxes": ["E", "N"], + "tileMatrices": [ + { + "id": "0", + "scaleDenominator": 35.28, + "cellSize": 10.0, + "pointOfOrigin": [299960.0, 9000000.0], + "tileWidth": 1024, + "tileHeight": 1024, + "matrixWidth": 1094, + "matrixHeight": 1094 + }, + { + "id": "1", + "scaleDenominator": 70.56, + "cellSize": 20.0, + "pointOfOrigin": [299960.0, 9000000.0], + "tileWidth": 512, + "tileHeight": 512, + "matrixWidth": 547, + "matrixHeight": 547 + } + ] + }, + "resampling_method": "average" + } + } + } +} +``` + +### Example 3: Generic Datasets-based Pyramid + +```json +{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "geo": { + "multiscales": { + "version": "0.1", + "datasets": [ + {"path": "", "scale": [1.0, 1.0, 1.0]}, + {"path": "level_1", "scale": [1.0, 2.0, 2.0]}, + {"path": "level_2", "scale": [1.0, 4.0, 4.0]} + ], + "resampling_method": "average" + } + } + } +} +``` + +### Example 4: Factor-of-3 Decimation + +```json +{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "geo": { + "multiscales": { + "version": "0.1", + "tile_matrix_set": { + "id": "Custom_Nonary_Grid", + "crs": "EPSG:4326", + "tileMatrices": [ + { + "id": "0", + "matrixWidth": 1, + "matrixHeight": 1, + "tileWidth": 256, + "tileHeight": 256 + }, + { + "id": "1", + "matrixWidth": 3, + "matrixHeight": 3, + "tileWidth": 256, + "tileHeight": 256 + }, + { + "id": "2", + "matrixWidth": 9, + "matrixHeight": 9, + "tileWidth": 256, + "tileHeight": 256 + } + ] + }, + "resampling_method": "average" + } + } + } +} +``` + +## Versioning and Compatibility + +This specification uses semantic versioning (SemVer) for version management: + +- **Major version** changes indicate backward-incompatible changes to the attribute schema +- **Minor version** changes add new optional fields while maintaining backward compatibility +- **Patch version** changes fix documentation, clarify behavior, or make other non-breaking updates + +### Compatibility Guarantees + +- Parsers MUST support all fields defined in their major version +- Parsers SHOULD gracefully handle unknown optional fields from newer minor versions +- Producers SHOULD include the `version` field to indicate specification compliance level + +## Implementation Notes + +### Inconsistent Pyramid Support + +When implementing support for inconsistent pyramids: + +1. **Variable Discovery**: Scan all resolution levels to build a comprehensive variable catalog +2. **Fallback Logic**: When a variable is not available at the optimal resolution level, fall back to the finest resolution level containing that variable +3. **Metadata Consistency**: Ensure that coordinate system and chunking information remains consistent across levels + +### Performance Considerations + +- **Chunking Alignment**: For TileMatrixSet-based pyramids, chunks SHOULD be aligned with the tile grid (1:1 mapping between chunks and tiles) +- **Chunk Sizes**: Chunk sizes SHOULD match the `tileWidth` and `tileHeight` declared in the TileMatrix +- **Compression**: Use compression codecs appropriate for the data type and use case +- **Access Patterns**: Structure data to optimize common access patterns (spatial locality, multi-resolution queries) + +### TileMatrixSet Integration + +- **CRS Consistency**: The spatial reference system declared in `supportedCRS` SHALL match the one declared in the corresponding `grid_mapping` of the data variables +- **Group Naming**: Group names in the multiscale hierarchy SHALL correspond exactly to the TileMatrix identifier values +- **Conflict Avoidance**: Additional groups or arrays MAY be present alongside zoom level groups, but SHALL NOT use names that conflict with TileMatrix identifiers + +## Compatibility Notes + +- The specification supports both TileMatrixSet-based and generic datasets-based approaches for maximum flexibility +- Consolidated metadata at the multiscale group level provides complete information about all child datasets +- Integration with existing `geo.proj` attributes provides complete geospatial metadata coverage +- Native resolution storage in the Dataset Group maintains efficiency and compatibility with existing datasets + +## References + +- [OGC TileMatrixSet 2.0 Specification](https://docs.ogc.org/is/17-083r4/17-083r4.html) +- [GeoZarr Specification](https://github.com/zarr-developers/geozarr-spec) +- [OME-NGFF Multiscale Specification](https://ngff.openmicroscopy.org/latest/#multiscale-md) +- [STAC Projection Extension](https://github.com/stac-extensions/projection) diff --git a/attributes/geo/multiscales/schema.json b/attributes/geo/multiscales/schema.json new file mode 100644 index 0000000..e137cc1 --- /dev/null +++ b/attributes/geo/multiscales/schema.json @@ -0,0 +1,190 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://eopf-explorer.github.io/data-model/attributes/geo/multiscales/schema.json", + "title": "Geo Multiscales Attribute Extension", + "description": "JSON schema for the geo.multiscales attribute in Zarr groups", + "type": "object", + "properties": { + "version": { + "type": "string", + "enum": ["0.1"], + "description": "Multiscales metadata version" + }, + "tile_matrix_set": { + "oneOf": [ + { + "type": "string", + "description": "Reference by identifier or URI to a TileMatrixSet" + }, + { + "type": "object", + "description": "Inline TileMatrixSet definition following OGC TileMatrixSet 2.0 specification", + "properties": { + "id": { + "type": "string", + "description": "Identifier for the TileMatrixSet" + }, + "title": { + "type": "string", + "description": "Human-readable title for the TileMatrixSet" + }, + "crs": { + "type": "string", + "description": "Coordinate reference system identifier" + }, + "orderedAxes": { + "type": "array", + "items": { + "type": "string" + }, + "description": "Ordered list of axis abbreviations" + }, + "tileMatrices": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": { + "type": "string", + "description": "TileMatrix identifier" + }, + "scaleDenominator": { + "type": "number", + "description": "Scale denominator for this zoom level" + }, + "cellSize": { + "type": "number", + "description": "Cell size in CRS units" + }, + "pointOfOrigin": { + "type": "array", + "items": { + "type": "number" + }, + "minItems": 2, + "maxItems": 2, + "description": "Top-left corner coordinates" + }, + "tileWidth": { + "type": "integer", + "minimum": 1, + "description": "Width of each tile in pixels" + }, + "tileHeight": { + "type": "integer", + "minimum": 1, + "description": "Height of each tile in pixels" + }, + "matrixWidth": { + "type": "integer", + "minimum": 1, + "description": "Number of tiles horizontally" + }, + "matrixHeight": { + "type": "integer", + "minimum": 1, + "description": "Number of tiles vertically" + } + }, + "required": ["id", "tileWidth", "tileHeight", "matrixWidth", "matrixHeight"], + "additionalProperties": false + }, + "description": "Array of TileMatrix definitions" + } + }, + "required": ["id", "crs", "tileMatrices"], + "additionalProperties": true + } + ], + "description": "TileMatrixSet definition or reference" + }, + "datasets": { + "type": "array", + "items": { + "type": "object", + "properties": { + "path": { + "type": "string", + "description": "Path to the dataset group (empty string for root level)" + }, + "scale": { + "type": "array", + "items": { + "type": "number", + "minimum": 0 + }, + "description": "Scale factors for each axis relative to the finest resolution" + } + }, + "required": ["path"], + "additionalProperties": false + }, + "description": "Array of dataset definitions for each resolution level" + }, + "resampling_method": { + "type": "string", + "enum": [ + "nearest", + "average", + "bilinear", + "cubic", + "cubic_spline", + "lanczos", + "mode", + "max", + "min", + "med", + "sum", + "q1", + "q3", + "rms", + "gauss" + ], + "default": "nearest", + "description": "Resampling method used for downsampling" + }, + "tile_matrix_set_limits": { + "type": "object", + "patternProperties": { + "^.*$": { + "type": "object", + "properties": { + "minTileRow": { + "type": "integer", + "minimum": 0, + "description": "Minimum tile row index" + }, + "maxTileRow": { + "type": "integer", + "minimum": 0, + "description": "Maximum tile row index" + }, + "minTileCol": { + "type": "integer", + "minimum": 0, + "description": "Minimum tile column index" + }, + "maxTileCol": { + "type": "integer", + "minimum": 0, + "description": "Maximum tile column index" + } + }, + "required": ["minTileRow", "maxTileRow", "minTileCol", "maxTileCol"], + "additionalProperties": false + } + }, + "description": "Optional limits for available tiles per zoom level" + } + }, + "required": ["version"], + "anyOf": [ + { + "required": ["tile_matrix_set"] + }, + { + "required": ["datasets"] + } + ], + "additionalProperties": true +} From b63fe601226eb1c5b8b770d5c30b2d475f037de1 Mon Sep 17 00:00:00 2001 From: Emmanuel Mathot Date: Mon, 29 Sep 2025 22:25:49 +0200 Subject: [PATCH 2/7] Update Geo Multiscales Attribute Extension schema to version 0.1.0 and refine layout definition --- attributes/geo/multiscales/README.md | 363 ++++++++----------------- attributes/geo/multiscales/schema.json | 156 +---------- 2 files changed, 118 insertions(+), 401 deletions(-) diff --git a/attributes/geo/multiscales/README.md b/attributes/geo/multiscales/README.md index 93de98b..67a2489 100644 --- a/attributes/geo/multiscales/README.md +++ b/attributes/geo/multiscales/README.md @@ -31,12 +31,11 @@ The `multiscales` key under the `geo` dictionary can be added to Zarr groups to **`geo -> multiscales` Properties** -| | Type | Description | Required | Reference | -| -------------------------- | ------------------ | -------------------------------------------------- | -------- | ------------------------------------------------------------------------------------- | -| **version** | `string` | Multiscales metadata version | ✓ Yes | [geo -> multiscales.version](#geo---multiscalesversion) | -| **tile_matrix_set** | `string \| object` | Tile matrix set definition or reference | No | [geo -> multiscales.tile_matrix_set](#geo---multiscalestile_matrix_set) | -| **resampling_method** | `string` | Resampling method used for downsampling | No | [geo -> multiscales.resampling_method](#geo---multiscalesresampling_method) | -| **tile_matrix_set_limits** | `object` | Optional limits for available tiles per zoom level | No | [geo -> multiscales.tile_matrix_set_limits](#geo---multiscalestile_matrix_set_limits) | +| | Type | Description | Required | Reference | +| --------------------- | ---------- | ---------------------------------------------------- | -------- | --------------------------------------------------------------------------- | +| **version** | `string` | Multiscales metadata version | ✓ Yes | [geo -> multiscales.version](#geo---multiscalesversion) | +| **layout** | `[string]` | Array of group names representing the pyramid layout | ✓ Yes | [geo -> multiscales.layout](#geo---multiscaleslayout) | +| **resampling_method** | `string` | Resampling method used for downsampling | No | [geo -> multiscales.resampling_method](#geo---multiscalesresampling_method) | ### Field Details @@ -48,56 +47,16 @@ Multiscales metadata version * **Type**: `string` * **Required**: ✓ Yes -* **Allowed values**: `0.1` +* **Allowed values**: `0.1.0` #### geo -> multiscales.layout -TileMatrixSet definition or reference +Array of group names representing the pyramid layout -* **Type**: `string | object` -* **Required**: No - -This field can contain either: -1. **Reference by identifier**: A string identifier referencing a well-known TileMatrixSet (e.g., "WebMercatorQuad") -2. **URI reference**: A URI pointing to a JSON document describing the tile matrix set -3. **Inline definition**: A complete TileMatrixSet JSON object following OGC TileMatrixSet 2.0 specification - -**Reference by identifier:** -```json -{ - "tile_matrix_set": "WebMercatorQuad" -} -``` +* **Type**: array of `string` +* **Required**: Yes -**URI reference:** -```json -{ - "tile_matrix_set": "https://maps.example.org/tileMatrixSets/WebMercatorQuad.json" -} -``` - -**Inline definition:** -```json -{ - "tile_matrix_set": { - "id": "Custom_Grid", - "title": "Custom Grid for Scientific Data", - "crs": "EPSG:4326", - "tileMatrices": [ - { - "id": "0", - "scaleDenominator": 0.703125, - "cellSize": 0.0625, - "pointOfOrigin": [-180.0, 90.0], - "tileWidth": 256, - "tileHeight": 256, - "matrixWidth": 2, - "matrixHeight": 1 - } - ] - } -} -``` +This field SHALL describe the pyramid hierarchy with an array of strings representing the group names for each resolution level, ordered from highest to lowest resolution. #### geo -> multiscales.resampling_method @@ -110,58 +69,30 @@ Resampling method used for downsampling The same method SHALL apply across all levels. -#### geo -> multiscales.tile_matrix_set_limits - -Optional limits for available tiles per zoom level - -* **Type**: `object` -* **Required**: No - -Defines the available tile ranges for each zoom level. Keys must match TileMatrix.id values from the TileMatrixSet. - -```json -{ - "tile_matrix_set_limits": { - "0": { - "minTileRow": 0, - "maxTileRow": 0, - "minTileCol": 0, - "maxTileCol": 0 - }, - "1": { - "minTileRow": 0, - "maxTileRow": 1, - "minTileCol": 0, - "maxTileCol": 1 - } - } -} -``` - - ### Hierarchical Layout -Multiscale datasets follow a specific hierarchical structure that accommodates both native resolution storage and overview levels: +Multiscale datasets SHOULD follow a specific hierarchical structure that accommodates both native resolution storage and overview levels: -1. **Dataset Group**: Contains native resolution data and multiscales metadata +1. **Multiscale Group**: Contains `multiscales` metadata 2. **Overview Level Groups**: Child groups containing overview data at different resolutions ``` -/measurements/ # Dataset Group with multiscales metadata +multiscales/ # Group with `multiscales` metadata ├── 0/ # First overview level +│ ├── b01 # Native resolution variable | ├── b02 # Native resolution variable | ├── b03 # Native resolution variable | ├── b04 # Native resolution variable -| ├── spatial_ref # Coordinate reference variable -├── 1/ # First overview level +| ├── spatial_ref # Auxiliary spatial reference variable +├── 1/ # Second overview level │ ├── b01 # All bands available at overview level │ ├── b02 │ ├── b03 │ ├── ... │ └── spatial_ref -└── 2/ # Second overview level +└── 2/ # Third overview level ├── b01 ├── b02 ├── b03 @@ -169,41 +100,32 @@ Multiscale datasets follow a specific hierarchical structure that accommodates b └── spatial_ref ``` -**Key principles:** -- Native resolution variables are stored directly in the Dataset Group (not in a separate "0/" group) -- Overview levels are stored in child groups with names matching TileMatrix identifiers -- This approach maintains efficiency by avoiding the need to restructure existing datasets when adding overviews +**Recommendations:** -### Group Discovery Methods +- All levels are stored in child groups with names matching layout keys (e.g., `0`, `1`, `2`, or custom names) +- The native resolution dataset is stored in a child group (e.g., `0`) alongside overview levels -The multiscales metadata enables complete discovery of the multiscale collection structure through multiple mechanisms: +> [!Note] Layout can describe native resolution stored in the multiscale group directly by using the key `.` (dot) to represent the current group. This is not recommended but MAY be used for backward compatibility with existing datasets that are augmented with multiscale metadata. It is important to acknowledge that this layout is less optimal for clients and MAY lead to errors. For instance, xarray's `open_dataset` function does not support data tree where parent and children shape do not align. -1. **TileMatrixSet-based discovery**: - - The TileMatrixSet definition specifies the exact set of zoom levels through its tileMatrices array - - Each TileMatrix.id value corresponds to a child group in the multiscale hierarchy - - Variable discovery within each zoom level group follows standard Zarr metadata conventions +### Group Discovery Methods -2. **Generic datasets-based discovery**: - - The `datasets` array explicitly lists all resolution levels and their paths - - Scale factors provide resolution relationships +The multiscales metadata enables complete discovery of the multiscale collection structure through a simple layout mechanisms: -3. **Explicit limits**: - - `tile_matrix_set_limits` explicitly declares which zoom levels contain data - - For storage backends that do not support directory listing, this is the primary mechanism for discovering available zoom levels +- The `layout` definition specifies the exact set of zoom levels through its array of group names +- Each group name corresponds to a child group in the multiscale hierarchy +- Variable discovery within each zoom level group follows standard Zarr metadata conventions and should use the consolidated metadata feature for efficiency ### Consolidated Metadata Requirements -**Consolidated metadata is MANDATORY for multiscale groups** to ensure complete discoverability of pyramid structure and metadata without requiring individual access to each child dataset. +**Consolidated metadata is HIGHLY RECOMMENDED for multiscale groups** to ensure complete discoverability of pyramid structure and metadata without requiring individual access to each child dataset. #### Requirements 1. **Zarr Consolidated Metadata**: The multiscale group SHALL use Zarr's consolidated metadata feature to expose metadata from all child groups and arrays at the group level. -2. **Projection Information Access**: All projection information (CRS, transforms, grid mappings) from child datasets SHALL be accessible through the consolidated metadata at the multiscale group level. - -3. **Variable Discovery**: The consolidated metadata SHALL include complete variable listings for all resolution levels, enabling clients to understand the full pyramid structure without traversing child groups. +2. **Variable Discovery**: The consolidated metadata SHALL include complete variable listings for all resolution levels, enabling clients to understand the full pyramid structure without traversing child groups. -4. **Coordinate Information**: Coordinate arrays and their metadata from all resolution levels SHALL be included in the consolidated metadata. +3. **Projection Information Access**: All projection information via the [`geo/proj` attribute](../proj/README.md) from child datasets SHALL be accessible through the consolidated metadata at the multiscale group level. According to the attributes provided, the client shall be able to discover the CRS, bounding box, and resolution information from any resolution level. #### Client Implementation Guidelines @@ -218,24 +140,12 @@ The multiscales metadata enables complete discovery of the multiscale collection ### Validation Rules - **Consolidated Metadata**: Multiscale groups SHALL provide consolidated metadata as specified above -- **Level Consistency**: Resolution level group names SHALL match either TileMatrix.id values (when using TileMatrixSet) or dataset path values -- **Structural Consistency**: All resolution level groups SHALL have the same member structure for variables they contain +- **Level Consistency**: Resolution level group names SHALL match children group path values in the `layout` array - **Coordinate System Consistency**: All resolution levels SHALL use the same coordinate reference system -- **Chunking Alignment**: Chunks SHALL be aligned with the tile grid (1:1 mapping between chunks and tiles) when using TileMatrixSet - -### Decimation Requirements and Custom Scaling - -While TileMatrixSet commonly assumes quadtree decimation (scaling by factor of 2), custom TileMatrixSets MAY use alternative decimation factors: - -- **Factor of 2 (quadtree)**: Standard web mapping approach where each zoom level has 4x more tiles -- **Factor of 3 (nonary tree)**: Each zoom level has 9x more tiles, useful for certain scientific gridding schemes -- **Other integer factors**: Application-specific requirements may dictate alternative decimation - -When using non-standard decimation factors, the TileMatrixSet definition SHALL explicitly specify the matrixWidth and matrixHeight values for each TileMatrix to ensure correct spatial alignment and resolution relationships. ## Examples -### Example 1: Simple TileMatrixSet Reference +### Example 1: Simple UTM Pyramid ```json { @@ -243,126 +153,90 @@ When using non-standard decimation factors, the TileMatrixSet definition SHALL e "node_type": "group", "attributes": { "geo": { + "proj": { + "epsg": 32633, + "bbox": [500000.0, 0.0, 600000.0, 1000000.0], + }, "multiscales": { - "version": "0.1", - "tile_matrix_set": "WebMercatorQuad", - "resampling_method": "average", - "tile_matrix_set_limits": { - "7": {"minTileRow": 42, "maxTileRow": 43, "minTileCol": 67, "maxTileCol": 68}, - "8": {"minTileRow": 85, "maxTileRow": 87, "minTileCol": 134, "maxTileCol": 137} - } - } - } - } -} -``` - -### Example 2: Custom UTM TileMatrixSet - -```json -{ - "zarr_format": 3, - "node_type": "group", - "attributes": { - "geo": { - "multiscales": { - "version": "0.1", - "tile_matrix_set": { - "id": "UTM_Zone_33N_Custom", - "title": "UTM Zone 33N for Sentinel-2 native resolution", - "crs": "EPSG:32633", - "orderedAxes": ["E", "N"], - "tileMatrices": [ - { - "id": "0", - "scaleDenominator": 35.28, - "cellSize": 10.0, - "pointOfOrigin": [299960.0, 9000000.0], - "tileWidth": 1024, - "tileHeight": 1024, - "matrixWidth": 1094, - "matrixHeight": 1094 - }, - { - "id": "1", - "scaleDenominator": 70.56, - "cellSize": 20.0, - "pointOfOrigin": [299960.0, 9000000.0], - "tileWidth": 512, - "tileHeight": 512, - "matrixWidth": 547, - "matrixHeight": 547 - } - ] - }, + "version": "0.1.0", + "layout": ["0", "1", "2", "3"], "resampling_method": "average" } } - } -} -``` - -### Example 3: Generic Datasets-based Pyramid - -```json -{ - "zarr_format": 3, - "node_type": "group", - "attributes": { - "geo": { - "multiscales": { - "version": "0.1", - "datasets": [ - {"path": "", "scale": [1.0, 1.0, 1.0]}, - {"path": "level_1", "scale": [1.0, 2.0, 2.0]}, - {"path": "level_2", "scale": [1.0, 4.0, 4.0]} - ], - "resampling_method": "average" - } - } - } -} -``` - -### Example 4: Factor-of-3 Decimation - -```json -{ - "zarr_format": 3, - "node_type": "group", - "attributes": { - "geo": { - "multiscales": { - "version": "0.1", - "tile_matrix_set": { - "id": "Custom_Nonary_Grid", - "crs": "EPSG:4326", - "tileMatrices": [ - { - "id": "0", - "matrixWidth": 1, - "matrixHeight": 1, - "tileWidth": 256, - "tileHeight": 256 - }, - { - "id": "1", - "matrixWidth": 3, - "matrixHeight": 3, - "tileWidth": 256, - "tileHeight": 256 - }, - { - "id": "2", - "matrixWidth": 9, - "matrixHeight": 9, - "tileWidth": 256, - "tileHeight": 256 + }, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": { + "0": { + "zarr_format": 3, + "node_type": "group", + "attributes": { + "geo": { + "proj": { + "epsg": 32633, + "bbox": [50000.0, 0.0, 60000.0, 100000.0], + "transform": [10.0, 0.0, 50000.0, 0.0, -10.0, 100000.0, 0.0, 0.0, 1.0] } - ] - }, - "resampling_method": "average" - } + } + } + }, + "0/b01": { + "zarr_format": 3, + "node_type": "array", + "shape": [10000, 10000], + "dtype": " Date: Mon, 29 Sep 2025 22:31:22 +0200 Subject: [PATCH 3/7] Refactor README to update variable names and enhance clarity in multiscale pyramid examples --- attributes/geo/multiscales/README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/attributes/geo/multiscales/README.md b/attributes/geo/multiscales/README.md index 67a2489..dd07627 100644 --- a/attributes/geo/multiscales/README.md +++ b/attributes/geo/multiscales/README.md @@ -85,25 +85,25 @@ multiscales/ # Group with `multiscales` metadata | ├── b02 # Native resolution variable | ├── b03 # Native resolution variable | ├── b04 # Native resolution variable -| ├── spatial_ref # Auxiliary spatial reference variable +| ├── y # Coordinate variable +| ├── x # Coordinate variable ├── 1/ # Second overview level │ ├── b01 # All bands available at overview level │ ├── b02 │ ├── b03 │ ├── ... -│ └── spatial_ref +│ ├── y +│ └── x └── 2/ # Third overview level ├── b01 ├── b02 ├── b03 ├── ... - └── spatial_ref + ├── y + └── x ``` -**Recommendations:** - -- All levels are stored in child groups with names matching layout keys (e.g., `0`, `1`, `2`, or custom names) -- The native resolution dataset is stored in a child group (e.g., `0`) alongside overview levels +All levels SHOULD be stored in child groups with names matching layout keys (e.g., `0`, `1`, `2`, or custom names) > [!Note] Layout can describe native resolution stored in the multiscale group directly by using the key `.` (dot) to represent the current group. This is not recommended but MAY be used for backward compatibility with existing datasets that are augmented with multiscale metadata. It is important to acknowledge that this layout is less optimal for clients and MAY lead to errors. For instance, xarray's `open_dataset` function does not support data tree where parent and children shape do not align. @@ -145,7 +145,7 @@ The multiscales metadata enables complete discovery of the multiscale collection ## Examples -### Example 1: Simple UTM Pyramid +### Example 1: Simple Multiscale Pyramid with UTM Grid ```json { From e8e44987c1ce33e138cc7e633ae0a4c2d913d234 Mon Sep 17 00:00:00 2001 From: Emmanuel Mathot Date: Mon, 29 Sep 2025 22:37:20 +0200 Subject: [PATCH 4/7] Add example for WebMercatorQuad TileMatrixSet-compatible multiscale pyramid in README --- attributes/geo/multiscales/README.md | 84 ++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/attributes/geo/multiscales/README.md b/attributes/geo/multiscales/README.md index dd07627..57f66ed 100644 --- a/attributes/geo/multiscales/README.md +++ b/attributes/geo/multiscales/README.md @@ -242,6 +242,90 @@ The multiscales metadata enables complete discovery of the multiscale collection } ``` +### Example 2: WebMercatorQuad TileMatrixSet-Compatible Pyramid + +This example shows a multiscale pyramid that follows the OGC WebMercatorQuad TileMatrixSet structure, commonly used for web mapping applications. + +```json +{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "geo": { + "proj": { + "epsg": 3857, + "bbox": [-20037508.34, -20037508.34, 20037508.34, 20037508.34] + }, + "multiscales": { + "version": "0.1.0", + "layout": ["18", "17", "16", "15", "14"], + "resampling_method": "average" + } + } + }, + "consolidated_metadata": { + "kind": "inline", + "must_understand": false, + "metadata": { + "18": { + "zarr_format": 3, + "node_type": "group", + "attributes": { + "geo": { + "proj": { + "epsg": 3857, + "bbox": [-20037508.34, -20037508.34, 20037508.34, 20037508.34], + "transform": [0.5971642834779395, 0.0, -20037508.34, 0.0, -0.5971642834779395, 20037508.34, 0.0, 0.0, 1.0] + } + } + } + }, + "18/red": { + "zarr_format": 3, + "node_type": "array", + "shape": [8192, 8192], + "dtype": " Date: Mon, 29 Sep 2025 22:50:27 +0200 Subject: [PATCH 5/7] Update README to clarify hierarchical layout of Zarr groups and replace STAC Projection Extension link with GDAL Raster Data Model reference --- attributes/geo/multiscales/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/attributes/geo/multiscales/README.md b/attributes/geo/multiscales/README.md index 57f66ed..7eb5426 100644 --- a/attributes/geo/multiscales/README.md +++ b/attributes/geo/multiscales/README.md @@ -8,7 +8,7 @@ ## Description -This specification defines a JSON object that encodes multiscale pyramid information for geospatial data stored in Zarr groups under the `multiscales` key within the `geo` dictionary in the attributes of Zarr groups. Additionally, it specifies the hierarchical layout of datasets representing different resolution levels, discovery mechanisms for clients to enumerate available levels and variables, and requirements for consolidated metadata to ensure complete discoverability of the multiscale structure. +This specification defines a JSON object that encodes multiscale pyramid information for geospatial data stored in Zarr groups under the `multiscales` key within the `geo` dictionary in the attributes of Zarr groups. Additionally, it specifies the hierarchical layout of Zarr groups representing different resolution levels, discovery mechanisms for clients to enumerate available levels and variables, and requirements for consolidated metadata to ensure complete discoverability of the multiscale structure. ## Motivation @@ -362,4 +362,4 @@ When implementing support for inconsistent pyramids: - [OGC TileMatrixSet 2.0 Specification](https://docs.ogc.org/is/17-083r4/17-083r4.html) - [GeoZarr Specification](https://github.com/zarr-developers/geozarr-spec) - [OME-NGFF Multiscale Specification](https://ngff.openmicroscopy.org/latest/#multiscale-md) -- [STAC Projection Extension](https://github.com/stac-extensions/projection) +- [GDAL Raster Data Model - Overviews](https://gdal.org/en/stable/user/raster_data_model.html#overviews) From 6e66446837ff4304e804b88a987c2f1323dd78c3 Mon Sep 17 00:00:00 2001 From: Emmanuel Mathot Date: Tue, 30 Sep 2025 16:21:35 +0200 Subject: [PATCH 6/7] Update Geo Multiscales Attribute Extension schema to use objects for layout representation and enhance field descriptions --- attributes/geo/multiscales/README.md | 32 +++++++++++++----- attributes/geo/multiscales/schema.json | 45 ++++++++++++++++++++++++-- 2 files changed, 67 insertions(+), 10 deletions(-) diff --git a/attributes/geo/multiscales/README.md b/attributes/geo/multiscales/README.md index 7eb5426..27976af 100644 --- a/attributes/geo/multiscales/README.md +++ b/attributes/geo/multiscales/README.md @@ -34,7 +34,7 @@ The `multiscales` key under the `geo` dictionary can be added to Zarr groups to | | Type | Description | Required | Reference | | --------------------- | ---------- | ---------------------------------------------------- | -------- | --------------------------------------------------------------------------- | | **version** | `string` | Multiscales metadata version | ✓ Yes | [geo -> multiscales.version](#geo---multiscalesversion) | -| **layout** | `[string]` | Array of group names representing the pyramid layout | ✓ Yes | [geo -> multiscales.layout](#geo---multiscaleslayout) | +| **layout** | `[object]` | Array of objects representing the pyramid layout | ✓ Yes | [geo -> multiscales.layout](#geo---multiscaleslayout) | | **resampling_method** | `string` | Resampling method used for downsampling | No | [geo -> multiscales.resampling_method](#geo---multiscalesresampling_method) | ### Field Details @@ -51,12 +51,19 @@ Multiscales metadata version #### geo -> multiscales.layout -Array of group names representing the pyramid layout +Array of objects representing the pyramid layout and decimation relationships -* **Type**: array of `string` +* **Type**: array of `object` * **Required**: Yes -This field SHALL describe the pyramid hierarchy with an array of strings representing the group names for each resolution level, ordered from highest to lowest resolution. +This field SHALL describe the pyramid hierarchy with an array of objects representing each resolution level, ordered from highest to lowest resolution. Each object contains: + +- **`group`** (required): Group name for this resolution level +- **`from_group`** (optional): Source group used to generate this level +- **`factors`** (optional): Array of decimation factors per axis (e.g., `[2, 2]` for 2x decimation in X and Y) +- **`resampling_method`** (optional): Resampling method for this specific level + +The first level typically contains only the `group` field (native resolution), while subsequent levels include derivation information. #### geo -> multiscales.resampling_method @@ -159,8 +166,12 @@ The multiscales metadata enables complete discovery of the multiscale collection }, "multiscales": { "version": "0.1.0", - "layout": ["0", "1", "2", "3"], - "resampling_method": "average" + "layout": [ + {"group": "0"}, + {"group": "1", "from_group": "0", "factors": [2, 2], "resampling_method": "average"}, + {"group": "2", "from_group": "1", "factors": [2, 2], "resampling_method": "average"}, + {"group": "3", "from_group": "2", "factors": [2, 2], "resampling_method": "average"} + ] } } }, @@ -258,8 +269,13 @@ This example shows a multiscale pyramid that follows the OGC WebMercatorQuad Til }, "multiscales": { "version": "0.1.0", - "layout": ["18", "17", "16", "15", "14"], - "resampling_method": "average" + "layout": [ + {"group": "18"}, + {"group": "17", "from_group": "18", "factors": [2, 2], "resampling_method": "average"}, + {"group": "16", "from_group": "17", "factors": [2, 2], "resampling_method": "average"}, + {"group": "15", "from_group": "16", "factors": [2, 2], "resampling_method": "average"}, + {"group": "14", "from_group": "15", "factors": [2, 2], "resampling_method": "average"} + ] } } }, diff --git a/attributes/geo/multiscales/schema.json b/attributes/geo/multiscales/schema.json index 04649f4..9a80348 100644 --- a/attributes/geo/multiscales/schema.json +++ b/attributes/geo/multiscales/schema.json @@ -13,10 +13,51 @@ "layout": { "type": "array", "items": { - "type": "string" + "type": "object", + "properties": { + "group": { + "type": "string", + "description": "Group name for this resolution level" + }, + "from_group": { + "type": "string", + "description": "Source group used to generate this level" + }, + "factors": { + "type": "array", + "items": { + "type": "number", + "minimum": 1 + }, + "description": "Array of decimation factors per axis (e.g., [2, 2] for 2x decimation in X and Y)" + }, + "resampling_method": { + "type": "string", + "enum": [ + "nearest", + "average", + "bilinear", + "cubic", + "cubic_spline", + "lanczos", + "mode", + "max", + "min", + "med", + "sum", + "q1", + "q3", + "rms", + "gauss" + ], + "description": "Resampling method for this specific level" + } + }, + "required": ["group"], + "additionalProperties": false }, "minItems": 1, - "description": "Array of group names representing the pyramid layout, ordered from highest to lowest resolution" + "description": "Array of objects representing the pyramid layout and decimation relationships, ordered from highest to lowest resolution" }, "resampling_method": { "type": "string", From 101903fc5e0e7a47897820eb8ddca1c1ed05a9f7 Mon Sep 17 00:00:00 2001 From: Emmanuel Mathot Date: Fri, 3 Oct 2025 15:12:47 +0200 Subject: [PATCH 7/7] Add Multiscales Attribute Extension schema and README documentation --- attributes/multiscales/README.md | 286 +++++++++++++++++++++++++++++ attributes/multiscales/schema.json | 101 ++++++++++ 2 files changed, 387 insertions(+) create mode 100644 attributes/multiscales/README.md create mode 100644 attributes/multiscales/schema.json diff --git a/attributes/multiscales/README.md b/attributes/multiscales/README.md new file mode 100644 index 0000000..3d717f1 --- /dev/null +++ b/attributes/multiscales/README.md @@ -0,0 +1,286 @@ +# Multiscales Attribute Extension for Zarr + +- **Extension Name**: Multiscales Attribute Extension +- **Version**: 0.1.0 +- **Extension Type**: Attribute +- **Status**: Proposed +- **Owners**: @emmanuelmathot + +## Description + +This specification defines a JSON object that encodes multiscale pyramid information for data stored in Zarr groups under the `multiscales` key in the attributes of Zarr groups. This is a domain-agnostic specification that describes the hierarchical layout of Zarr groups representing different resolution levels and the transformations between them. + +## Motivation + +- Provides standardized multiscale pyramid encoding applicable across domains (geospatial, bioimaging, etc.) +- Supports flexible decimation schemes (factor-of-2, factor-of-3, custom factors) +- Explicitly captures scale and translation transformations induced by downsampling +- Enables optimized data access patterns for visualization and analysis at different scales +- Composable with domain-specific metadata (e.g., geo/proj for geospatial CRS information) + +## Background + +This specification emerged from discussions between the geospatial and bioimaging communities about multiscale data representation in Zarr. While both domains need multiscale pyramids, they differ in how spatial metadata is handled: + +- **Bioimaging** (OME-NGFF): Multiscale metadata includes all spatial transformation information +- **Geospatial**: Coordinate Reference System (CRS) information is typically separate from multiscale metadata + +This generic specification captures the **transformation induced by downsampling** (scale and translation), allowing domain-specific extensions to provide additional spatial metadata as needed. + +## Inheritance Model + +The `multiscales` key is defined at the group level and applies to the hierarchical structure within that group. There is no inheritance of `multiscales` metadata from parent groups to child groups. Each multiscale group defines its own pyramid structure independently. + +## Specification + +The `multiscales` key can be added to Zarr group attributes to define multiscale pyramid information. + + +**`multiscales` Properties** + +| | Type | Description | Required | Reference | +| --------------------- | ---------- | ---------------------------------------------------- | -------- | ------------------------------------------------------------ | +| **version** | `string` | Multiscales metadata version | ✓ Yes | [multiscales.version](#multiscalesversion) | +| **layout** | `[object]` | Array of objects representing the pyramid layout | ✓ Yes | [multiscales.layout](#multiscaleslayout) | +| **resampling_method** | `string` | Resampling method used for downsampling | No | [multiscales.resampling_method](#multiscalesresampling_method) | + +### Field Details + +Additional properties are allowed. + +#### multiscales.version + +Multiscales metadata version + +* **Type**: `string` +* **Required**: ✓ Yes +* **Allowed values**: `0.1.0` + +#### multiscales.layout + +Array of objects representing the pyramid layout and transformation relationships + +* **Type**: array of `object` +* **Required**: Yes + +This field SHALL describe the pyramid hierarchy with an array of objects representing each resolution level, ordered from highest to lowest resolution. Each object contains: + +- **`group`** (required): Group name for this resolution level +- **`from_group`** (optional): Source group used to generate this level +- **`factors`** (optional): Array of decimation factors per axis (e.g., `[2, 2]` for 2x decimation) +- **`scale`** (optional): Array of scale factors per axis describing the resolution change +- **`translation`** (optional): Array of translation offsets per axis in the coordinate space +- **`resampling_method`** (optional): Resampling method for this specific level + +The first level typically contains only the `group` field (native resolution), while subsequent levels include transformation information. + +**Transformation Semantics**: + +The `scale` and `translation` parameters describe how to map from array indices to a coordinate space at each level. For downsampling operations: + +- **Scale** represents the multiplicative factor applied to coordinates (e.g., scale of 2.0 means one pixel represents twice the coordinate span) +- **Translation** represents the coordinate offset, useful when downsampling takes a subset of the original sampling grid + +These transformations allow clients to determine the spatial extent of each pyramid level without needing to understand the specific downsampling algorithm. + +#### multiscales.resampling_method + +Resampling method used for downsampling + +* **Type**: `string` +* **Required**: No +* **Allowed values**: `"nearest"`, `"average"`, `"bilinear"`, `"cubic"`, `"cubic_spline"`, `"lanczos"`, `"mode"`, `"max"`, `"min"`, `"med"`, `"sum"`, `"q1"`, `"q3"`, `"rms"`, `"gauss"` +* **Default**: `"nearest"` + +The same method SHALL apply across all levels unless overridden at the level-specific `resampling_method`. + + + +### Hierarchical Layout + +Multiscale datasets SHOULD follow a specific hierarchical structure: + +1. **Multiscale Group**: Contains `multiscales` metadata +2. **Resolution Level Groups**: Child groups containing data at different resolutions + +``` +multiscales/ # Group with `multiscales` metadata +├── 0/ # First resolution level (highest resolution) +│ ├── data # Data variable +│ └── ... +├── 1/ # Second resolution level +│ ├── data # Data at lower resolution +│ └── ... +└── 2/ # Third resolution level + ├── data + └── ... +``` + +All levels SHOULD be stored in child groups with names matching layout keys (e.g., `0`, `1`, `2`, or custom names). + +### Group Discovery Methods + +The multiscales metadata enables complete discovery of the multiscale collection structure through the layout mechanism: + +- The `layout` definition specifies the exact set of resolution levels through its array of group names +- Each group name corresponds to a child group in the multiscale hierarchy +- Variable discovery within each level follows standard Zarr metadata conventions + +### Consolidated Metadata + +**Consolidated metadata is HIGHLY RECOMMENDED for multiscale groups** to ensure complete discoverability of pyramid structure and metadata without requiring individual access to each child dataset. + +#### Requirements + +1. **Zarr Consolidated Metadata**: The multiscale group SHOULD use Zarr's consolidated metadata feature to expose metadata from all child groups and arrays at the group level. + +2. **Variable Discovery**: The consolidated metadata SHOULD include complete variable listings for all resolution levels, enabling clients to understand the full pyramid structure without traversing child groups. + +### Validation Rules + +- **Level Consistency**: Resolution level group names SHALL match children group path values in the `layout` array +- **Transformation Consistency**: If both `factors` and `scale` are provided, they SHOULD be consistent with each other + +## Examples + +### Example 1: Simple Power-of-2 Pyramid + +```json +{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "multiscales": { + "version": "0.1.0", + "layout": [ + { + "group": "0" + }, + { + "group": "1", + "from_group": "0", + "factors": [2, 2], + "scale": [2.0, 2.0], + "translation": [0.0, 0.0], + "resampling_method": "average" + }, + { + "group": "2", + "from_group": "1", + "factors": [2, 2], + "scale": [4.0, 4.0], + "translation": [0.0, 0.0], + "resampling_method": "average" + } + ], + "resampling_method": "average" + } + } +} +``` + +### Example 2: Custom Pyramid Levels + +```json +{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "multiscales": { + "version": "0.1.0", + "layout": [ + { + "group": "full" + }, + { + "group": "half", + "from_group": "full", + "scale": [2.0, 2.0] + }, + { + "group": "quarter", + "from_group": "half", + "scale": [4.0, 4.0] + }, + { + "group": "eighth", + "from_group": "quarter", + "scale": [8.0, 8.0] + } + ] + } + } +} +``` + +## Composition with Domain-Specific Metadata + +This generic multiscales specification is designed to be composed with domain-specific metadata: + +### Geospatial Data + +For geospatial data, combine with `geo/proj` attributes to specify the Coordinate Reference System: + +```json +{ + "zarr_format": 3, + "node_type": "group", + "attributes": { + "multiscales": { + "version": "0.1.0", + "layout": [ + {"group": "0"}, + {"group": "1", "from_group": "0", "factors": [2, 2], "scale": [2.0, 2.0]} + ] + }, + "geo": { + "proj": { + "version": "0.1", + "code": "EPSG:32633", + "transform": [10.0, 0.0, 500000.0, 0.0, -10.0, 5000000.0], + "bbox": [500000.0, 4900000.0, 600000.0, 5000000.0] + } + } + } +} +``` + +At each resolution level, the `geo/proj` metadata would specify the appropriate transform for that resolution. + +### Bioimaging Data + +For bioimaging, spatial metadata can be integrated directly at each level following OME-NGFF conventions while using this specification for the pyramid structure. + +## Versioning and Compatibility + +This specification uses semantic versioning (SemVer) for version management: + +- **Major version** changes indicate backward-incompatible changes to the attribute schema +- **Minor version** changes add new optional fields while maintaining backward compatibility +- **Patch version** changes fix documentation, clarify behavior, or make other non-breaking updates + +### Compatibility Guarantees + +- Parsers MUST support all fields defined in their major version +- Parsers SHOULD gracefully handle unknown optional fields from newer minor versions +- Producers SHOULD include the `version` field to indicate specification compliance level + +## Implementation Notes + +### Scale and Translation Parameters + +The `scale` and `translation` parameters explicitly capture the transformation induced by downsampling. This approach has several advantages: + +1. **Explicit vs. Implicit**: Clients don't need to infer transformations from decimation factors +2. **Flexibility**: Supports arbitrary downsampling schemes beyond simple decimation +3. **Composability**: Domain-specific coordinate systems can build upon these transformations + +### Relationship to Decimation Factors + +The `factors` field is provided for convenience and documentation purposes. The `scale` field is the authoritative source for the resolution relationship. When both are present, they should be consistent. + +## References + +- [OME-NGFF Multiscale Specification](https://ngff.openmicroscopy.org/latest/#multiscale-md) +- [Zarr Specifications Discussion on Multiscales](https://github.com/zarr-developers/zarr-specs/issues/50) +- [GeoZarr Specification](https://github.com/zarr-developers/geozarr-spec) diff --git a/attributes/multiscales/schema.json b/attributes/multiscales/schema.json new file mode 100644 index 0000000..ae062d4 --- /dev/null +++ b/attributes/multiscales/schema.json @@ -0,0 +1,101 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://eopf-explorer.github.io/data-model/attributes/multiscales/schema.json", + "title": "Multiscales Attribute Extension", + "description": "JSON schema for the multiscales attribute in Zarr groups", + "type": "object", + "properties": { + "version": { + "type": "string", + "enum": ["0.1.0"], + "description": "Multiscales metadata version" + }, + "layout": { + "type": "array", + "items": { + "type": "object", + "properties": { + "group": { + "type": "string", + "description": "Group name for this resolution level" + }, + "from_group": { + "type": "string", + "description": "Source group used to generate this level" + }, + "factors": { + "type": "array", + "items": { + "type": "number", + "minimum": 1 + }, + "description": "Array of decimation factors per axis (e.g., [2, 2] for 2x decimation)" + }, + "scale": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Array of scale factors per axis describing the resolution change" + }, + "translation": { + "type": "array", + "items": { + "type": "number" + }, + "description": "Array of translation offsets per axis in the coordinate space" + }, + "resampling_method": { + "type": "string", + "enum": [ + "nearest", + "average", + "bilinear", + "cubic", + "cubic_spline", + "lanczos", + "mode", + "max", + "min", + "med", + "sum", + "q1", + "q3", + "rms", + "gauss" + ], + "description": "Resampling method for this specific level" + } + }, + "required": ["group"], + "additionalProperties": false + }, + "minItems": 1, + "description": "Array of objects representing the pyramid layout and transformation relationships, ordered from highest to lowest resolution" + }, + "resampling_method": { + "type": "string", + "enum": [ + "nearest", + "average", + "bilinear", + "cubic", + "cubic_spline", + "lanczos", + "mode", + "max", + "min", + "med", + "sum", + "q1", + "q3", + "rms", + "gauss" + ], + "default": "nearest", + "description": "Resampling method used for downsampling" + } + }, + "required": ["version", "layout"], + "additionalProperties": true +}