From bc9f3d4ff193948071fa4199258bb886b7b09c36 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 25 Apr 2024 23:54:16 +0200 Subject: [PATCH 1/8] Propose version 2 of this extension. Fixes #16, #21, #22, #23 --- CHANGELOG.md | 21 ++++- README.md | 70 +++++++++----- examples/catalog-link.json | 33 +++++++ examples/item-naip.json | 120 ++++++++++++++++-------- examples/item-nsl.json | 78 +++++++++------- json-schema/schema.json | 181 +++++++++++++++++++++++++++---------- package.json | 6 +- 7 files changed, 363 insertions(+), 146 deletions(-) create mode 100644 examples/catalog-link.json diff --git a/CHANGELOG.md b/CHANGELOG.md index fdf8250..d0bd782 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,9 +16,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +## [v2.0.0] - 2021-06-23 + +### Added + +- `storage:schemes`, `storage:refs` and Storage Scheme Object +- Support the storage extension in Links +- Support for the Alternate Assets Extension +- Support for other storage providers, including custom S3 hosts + +### Changed + +- The storage providers are grouped in `storage:schemes` and located in the Item Properties, Collections or Catalog metadata +- Assets and Links reference the storage schemes by key in `storage:refs` + +### Removed + +- `storage:platform`, `storage:region`, `storage:requester_pays` and `storage:tier` + ## [v1.0.0] - 2021-06-23 Initial release -[Unreleased]: +[Unreleased]: +[v2.0.0]: [v1.0.0]: diff --git a/README.md b/README.md index 4d6d45c..570053b 100644 --- a/README.md +++ b/README.md @@ -1,19 +1,20 @@ # Storage Extension Specification - **Title:** Storage -- **Identifier:** +- **Identifier:** - **Field Name Prefix:** storage -- **Scope:** Item, Collection +- **Scope:** Item, Catalog, Collection - **Extension [Maturity Classification](https://github.com/radiantearth/stac-spec/tree/master/extensions/README.md#extension-maturity):** Pilot - **Owner**: @davidraleigh @matthewhanson This document explains the Storage Extension to the [SpatioTemporal Asset Catalog](https://github.com/radiantearth/stac-spec) (STAC) specification. -It allows adding details related to cloud storage access and costs to be associated with STAC Assets. +It allows adding details related to cloud object storage access and costs to be associated with STAC Assets. This extension does not cover NFS solutions provided by PaaS cloud companies. - Examples: - - [Item example 1](examples/item-naip.json): Shows the basic usage of the extension in a STAC Item. - - [Item example 2](examples/item-nsl.json): Another example of basic usage. + - [NAIP Item](examples/item-naip.json): Shows the usage of the extension in combination with the alternate asset extension. + - [NSL Item](examples/item-nsl.json): Shows a mixture of storage providers, including custom S3 hosts. + - [Catalog with Link](examples/catalog-link.json): Shows the usage of the extension on a link in a STAC Catalog. - [JSON Schema](json-schema/schema.json) - [Changelog](./CHANGELOG.md) @@ -21,26 +22,47 @@ This extension does not cover NFS solutions provided by PaaS cloud companies. The fields in the table below can be used in these parts of STAC documents: +- [x] Catalogs +- [x] Collections +- [x] Item Properties (incl. Summaries in Collections) +- [ ] Assets (for both Collections and Items, incl. Item Asset Definitions in Collections) +- [ ] Links + +| Field Name | Type | Description | +| ----------------- | ------------------------------------------------------------ | ----------- | +| `storage:schemes` | Map | **REQUIRED.** A property that contains all of the storage schemes used by Assets and Links in the STAC Item, Catalog or Collection. | + +--- + +The fields in the table below can be used in these parts of STAC documents: + - [ ] Catalogs - [ ] Collections -- [x] Item Properties (incl. Summaries in Collections) +- [ ] Item Properties (incl. Summaries in Collections) - [x] Assets (for both Collections and Items, incl. Item Asset Definitions in Collections) -- [ ] Links +- [x] Links +- [x] [Alternate Assets Object](https://github.com/stac-extensions/alternate-assets?tab=readme-ov-file#alternate-asset-object) + +| Field Name | Type | Description | +| -------------- | ---------- | ----------- | +| `storage:refs` | \[string\] | A property that specifies which schemes in `storage:schemes` may be used to access an Asset or Link. Each value must be one of the keys defined in `storage:schemes`. | + +### Storage Scheme Object -| Field Name | Type | Description | -| ---------------------- | --------- | ----------- | -| storage:platform | string | The [cloud provider](#providers) where data is stored | -| storage:region | string | The region where the data is stored. Relevant to speed of access and inter region egress costs (as defined by PaaS provider) | -| storage:requester_pays | boolean | Is the data requester pays or is it data manager/cloud provider pays. *Defaults to false* | -| storage:tier | string | The title for the tier type (as defined by PaaS provider) | +| Field Name | Type | Description | +| -------------- | ------- | ----------- | +| platform | string | **REQUIRED.** The [cloud provider](#platforms) where data is stored. | +| region | string | The region where the data is stored. Relevant to speed of access and inter region egress costs (as defined by PaaS provider) | +| requester_pays | boolean | Is the data requester pays or is it data manager/cloud provider pays. Defaults to `false` | +| tier | string | The title for the tier type (as defined by PaaS provider) | -While these are all valid properties on an Item, they will typically be defined per-asset. If a field applies equally -to all assets (e.g., storage:platform=AWS if all assets are on AWS), then it should be specified in Item properties. +The properties `title` and `description` as defined in Common Metadata can be used as well. -### Additional Field Information +#### Platforms -#### Providers -Currently this document is arranged to support object storage users of the following PaaS solutions: +The `platform` field identifies the cloud provider where the data is stored. + +There are a couple of pre-defined values for common providers: - Alibaba Cloud (Aliyun): `ALIBABA` - Amazon AWS: `AWS` @@ -48,11 +70,17 @@ Currently this document is arranged to support object storage users of the follo - Google Cloud Platform: `GCP` - IBM Cloud: `IBM` - Oracle Cloud: `ORACLE` -- All other PaaS solutions: `OTHER` -The upper-cased values are meant to be used for `storage:platform`. +All other PaaS solutions must use a unique URL to the service. + +In case an `href` contains a non-HTTP URL that is not directly resolvable, +the `platform` property must identify the host so that the URL can be resolved without further information. +This is especially useful to provide the endpoint URL for custom S3 providers. +In this case the `platform` is effectively the endpoint URL. + +#### Tiers -#### Cloud Provider Storage Tiers +Recommended values for the `tier` field: | Minimum Duration | [Google Cloud Platform](https://cloud.google.com/storage/docs/storage-classes) | [Amazon AWS](https://aws.amazon.com/s3/storage-classes/) | [Microsoft Azure](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-storage-tiers) | [IBM Cloud](https://cloud.ibm.com/objectstorage/create#pricing) | [Oracle Cloud](https://www.oracle.com/cloud/storage/pricing.html) | [Alibaba Cloud](https://www.alibabacloud.com/product/oss/pricing) | | ------------- | --------- | ------------------------ | ------- |---------- | ----------------- | ----------------- | diff --git a/examples/catalog-link.json b/examples/catalog-link.json new file mode 100644 index 0000000..d3a8de9 --- /dev/null +++ b/examples/catalog-link.json @@ -0,0 +1,33 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/storage/v2.0.0/schema.json" + ], + "type": "Catalog", + "id": "20190822T183518Z_746_POM1_ST2_P", + "title": "Example Catalog", + "description": "An example catalog with a link to documentation on object storage.", + "storage:schemes": { + "aws": { + "platform": "AWS", + "region": "us-west-2", + "requester_pays": true, + "tier": "Standard" + } + }, + "links": [ + { + "href": "https://example.com/examples/catalog-link.json", + "rel": "self" + }, + { + "title": "Documentation", + "href": "s3://mybucket/project/documentation.pdf", + "type": "application/pdf", + "rel": "about", + "storage:refs": [ + "aws" + ] + } + ] +} \ No newline at end of file diff --git a/examples/item-naip.json b/examples/item-naip.json index 0ddff93..c9650bb 100644 --- a/examples/item-naip.json +++ b/examples/item-naip.json @@ -1,10 +1,10 @@ { "stac_version": "1.0.0", "stac_extensions": [ - "https://stac-extensions.github.io/storage/v1.0.0/schema.json" + "https://stac-extensions.github.io/storage/v2.0.0/schema.json", + "https://stac-extensions.github.io/version/v1.2.0/schema.json" ], "id": "m_3009743_sw_14_1_20160928_20161129", - "collection": "NAIP_MOSAIC", "bbox": [ -97.75, 30.25, @@ -43,65 +43,107 @@ "datetime": "2016-09-28T00:00:00+00:00", "mission": "NAIP", "platform": "UNKNOWN_PLATFORM", - "gsd": 1 + "gsd": 1, + "storage:schemes": { + "az-wus2-arc": { + "platform": "AZURE", + "region": "westus2", + "tier": "archive" + }, + "gs-cld": { + "platform": "GCP", + "region": "us-central1", + "requester_pays": true, + "tier": "COLDLINE" + }, + "aws-std": { + "platform": "AWS", + "region": "us-west-2", + "requester_pays": true, + "tier": "Standard" + }, + "az-weu-hot": { + "platform": "AZURE", + "region": "westeurope", + "requester_pays": false, + "tier": "hot" + }, + "az-eus-hot": { + "platform": "AZURE", + "region": "eastus", + "requester_pays": false, + "tier": "hot", + "deprecated": true + }, + "minio": { + "platform": "https://play.min.io:9000" + } + } }, "assets": { + "CO_GEOTIFF_RGB": { + "href": "s3://mybucket/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "minio" + ] + }, + "CO_GEOTIFF_AWS_RGB": { + "href": "s3://naip-visualization/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "gs-std" + ] + }, "GEOTIFF_AZURE_RGBIR": { "href": "https://naip-nsl.blob.core.windows.net/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/vnd.stac.geotiff", - "storage:platform": "AZURE", - "storage:region": "westus2", - "storage:tier": "archive" + "type": "image/tiff; application=geotiff", + "storage:refs": [ + "az-wus2-ar" + ] }, "CO_GEOTIFF_GCP_RGB": { "href": "gs://naip-data/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/vnd.stac.geotiff; cloud-optimized=true", - "storage:platform": "GCP", - "storage:region": "us-central1", - "storage:requester_pays": true, - "storage:tier": "COLDLINE" - }, - "CO_GEOTIFF_AWS_RGB": { - "href": "s3://naip-visualization/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/vnd.stac.geotiff; cloud-optimized=true", - "storage:platform": "AWS", - "storage:region": "us-west-2", - "storage:requester_pays": true, - "storage:tier": "Standard" + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "gs-cld" + ] }, "CO_GEOTIFF_AZURE_RGB": { "href": "https://naipeuwest.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/vnd.stac.geotiff; cloud-optimized=true", - "storage:platform": "AZURE", - "storage:region": "westeurope", - "storage:requester_pays": false, - "storage:tier": "hot" + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "az-weu-hot" + ] }, "CO_GEOTIFF_AZURE_RGB_DEPRECATED": { "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/vnd.stac.geotiff; cloud-optimized=true", - "storage:platform": "AZURE", - "storage:region": "eastus", - "storage:requester_pays": false, - "storage:tier": "hot" + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "az-eus-hot" + ], + "deprecated": true + }, + "THUMBNAIL": { + "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.200.jpg", + "type": "image/jpeg", + "storage:refs": [ + "minio" + ] }, "THUMBNAIL_AZURE_DEPRECATED": { "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.200.jpg", "type": "image/jpeg", - "storage:platform": "AZURE", - "storage:region": "eastus", - "storage:requester_pays": false, - "storage:tier": "hot" + "storage:refs": [ + "az-eus-hot" + ], + "deprecated": true } }, "links": [ { - "href": "https://example.com/examples/item.json", + "href": "https://example.com/examples/item-naip.json", "rel": "self" - }, - { - "href": "https://example.com/examples/item.json", - "rel": "collection" } ] } \ No newline at end of file diff --git a/examples/item-nsl.json b/examples/item-nsl.json index 9802fd0..9b0c996 100644 --- a/examples/item-nsl.json +++ b/examples/item-nsl.json @@ -1,10 +1,10 @@ { "stac_version": "1.0.0", "stac_extensions": [ - "https://stac-extensions.github.io/storage/v1.0.0/schema.json" + "https://stac-extensions.github.io/storage/v2.0.0/schema.json", + "https://stac-extensions.github.io/alternate-assets/v1.0.0/schema.json" ], "id": "20190822T183518Z_746_POM1_ST2_P", - "collection": "NSL_SCENE", "type": "Feature", "bbox": [ -97.7466867683867, @@ -44,50 +44,58 @@ "mission": "SWIFT", "platform": "SWIFT_2", "instrument": "POM_1", - "gsd": 0.20000000298023224 + "gsd": 0.20000000298023224, + "storage:schemes": { + "gcp-std": { + "platform": "GCP", + "region": "us-central1", + "requester_pays": true, + "tier": "STANDARD" + }, + "aws-glc": { + "platform": "AWS", + "region": "us-central-1", + "requester_pays": true, + "tier": "Glacier" + } + } }, "assets": { - "GEOTIFF_RGB_GCP": { + "GEOTIFF_RGB": { "href": "gs://swiftera-processed-data/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.tif", - "type": "image/vnd.stac.geotiff", - "storage:platform": "GCP", - "storage:region": "us-central1", - "storage:requester_pays": true, - "storage:tier": "STANDARD" + "type": "image/tiff; application=geotiff; profile=cloud-optimized", + "storage:refs": [ + "gcp-std" + ], + "alternate": { + "aws": { + "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.tif", + "storage:refs": [ + "aws-std" + ] + } + } }, - "THUMBNAIL_RGB_GCP": { + "THUMBNAIL_RGB": { "href": "gs://swiftera-processed-data/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.png", "type": "image/png", - "storage:platform": "GCP", - "storage:region": "us-central1", - "storage:requester_pays": true, - "storage:tier": "STANDARD" - }, - "GEOTIFF_RGB_AWS": { - "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.tif", - "type": "image/vnd.stac.geotiff", - "storage:platform": "AWS", - "storage:region": "us-central-1", - "storage:requester_pays": true, - "storage:tier": "Glacier" - }, - "THUMBNAIL_RGB_AWS": { - "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.png", - "type": "image/png", - "storage:platform": "AWS", - "storage:region": "us-central-1", - "storage:requester_pays": true, - "storage:tier": "Standard" + "storage:refs": [ + "gcp-std" + ], + "alternate": { + "aws": { + "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.png", + "storage:refs": [ + "aws-std" + ] + } + } } }, "links": [ { - "href": "https://example.com/examples/item.json", + "href": "https://example.com/examples/item-nsl.json", "rel": "self" - }, - { - "href": "https://example.com/examples/item.json", - "rel": "collection" } ] } \ No newline at end of file diff --git a/json-schema/schema.json b/json-schema/schema.json index ca9d7ad..5087247 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -1,8 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": "https://stac-extensions.github.io/storage/v1.0.0/schema.json", - "title": "Storage Extension", - "description": "STAC Storage Extension to a STAC Item and STAC Assets.", + "$id": "https://stac-extensions.github.io/storage/v2.0.0/schema.json", + "title": "STAC Storage Extension", "type": "object", "required": [ "stac_extensions" @@ -11,7 +10,7 @@ "stac_extensions": { "type": "array", "contains": { - "const": "https://stac-extensions.github.io/storage/v1.0.0/schema.json" + "const": "https://stac-extensions.github.io/storage/v2.0.0/schema.json" } } }, @@ -21,21 +20,20 @@ "type": "object", "required": [ "type", - "properties", - "assets" + "properties" ], "properties": { "type": { "const": "Feature" }, "properties": { - "$ref": "#/definitions/fields" + "$ref": "#/definitions/schemes_field" }, "assets": { - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/fields" - } + "$ref": "#/definitions/assets" + }, + "links": { + "$ref": "#/definitions/links" } } }, @@ -50,51 +48,140 @@ "const": "Collection" }, "assets": { - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/fields" - } + "$ref": "#/definitions/assets" }, "item_assets": { - "type": "object", - "additionalProperties": { - "$ref": "#/definitions/fields" - } + "$ref": "#/definitions/assets" + }, + "links": { + "$ref": "#/definitions/links" } - } + }, + "allOf": [ + { + "$ref": "#/definitions/schemes_field" + } + ] + }, + { + "$comment": "This is the schema for STAC Catalogs", + "type": "object", + "required": [ + "type" + ], + "properties": { + "type": { + "const": "Catalog" + }, + "links": { + "$ref": "#/definitions/links" + } + }, + "allOf": [ + { + "$ref": "#/definitions/schemes_field" + } + ] } ], "definitions": { - "fields": { + "schemes_field": { "type": "object", + "required": [ + "storage:schemes" + ], "properties": { - "storage:platform": { - "title": "Platform", - "type": "string", - "enum": [ - "OTHER", - "AWS", - "GCP", - "AZURE", - "IBM", - "ALIBABA", - "ORACLE" - ], - "default": "OTHER" - }, - "storage:region": { - "title": "Region", - "type": "string" - }, - "storage:requester_pays": { - "type": "boolean", - "title": "Requester pays", - "default": false - }, - "storage:tier": { - "title": "Tier", - "type": "string" + "storage:schemes": { + "type": "object", + "patternProperties": { + "^.{1,}$": { + "required": [ + "platform" + ], + "properties": { + "platform": { + "title": "Platform", + "oneOf": [ + { + "type": "string", + "enum": [ + "AWS", + "GCP", + "AZURE", + "IBM", + "ALIBABA", + "ORACLE" + ] + }, + { + "type": "string", + "format": "iri", + "pattern": "^[\\w\\+.-]+://" + } + ] + }, + "region": { + "title": "Region", + "type": "string" + }, + "requester_pays": { + "type": "boolean", + "title": "Requester pays", + "default": false + }, + "tier": { + "title": "Tier", + "type": "string" + } + } + } + }, + "additionalProperties": false } + }, + "patternProperties": { + "^(?!storage:)": {} + }, + "additionalProperties": false + }, + "refs_field": { + "type": "object", + "properties": { + "storage:refs": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + } + }, + "patternProperties": { + "^(?!storage:)": {} + }, + "additionalProperties": false + }, + "assets": { + "type": "object", + "additionalProperties": { + "allOf": [ + { + "$ref": "#/definitions/refs_field" + }, + { + "type": "object", + "properties": { + "alternate": { + "$ref": "#/definitions/refs_field" + } + } + } + ] + } + }, + "links": { + "type": "array", + "items": { + "$ref": "#/definitions/refs_field" } } } diff --git a/package.json b/package.json index b83f5fd..948eb39 100644 --- a/package.json +++ b/package.json @@ -1,11 +1,11 @@ { "name": "stac-extensions", - "version": "1.0.0", + "version": "2.0.0", "scripts": { "test": "npm run check-markdown && npm run check-examples", "check-markdown": "remark . -f -r .github/remark.yaml", - "check-examples": "stac-node-validator . --lint --verbose --schemaMap https://stac-extensions.github.io/storage/v1.0.0/schema.json=./json-schema/schema.json", - "format-examples": "stac-node-validator . --format --schemaMap https://stac-extensions.github.io/storage/v1.0.0/schema.json=./json-schema/schema.json" + "check-examples": "stac-node-validator . --lint --verbose --schemaMap https://stac-extensions.github.io/storage/v2.0.0/schema.json=./json-schema/schema.json", + "format-examples": "stac-node-validator . --format --schemaMap https://stac-extensions.github.io/storage/v2.0.0/schema.json=./json-schema/schema.json" }, "dependencies": { "remark-cli": "^8.0.0", From e57da48fe0fb14d610a7d066cefa0e790e2b32a8 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Sat, 27 Apr 2024 11:04:24 +0200 Subject: [PATCH 2/8] Improve examples --- README.md | 7 ++-- examples/collection.json | 76 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) create mode 100644 examples/collection.json diff --git a/README.md b/README.md index 570053b..ff5762b 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,12 @@ It allows adding details related to cloud object storage access and costs to be This extension does not cover NFS solutions provided by PaaS cloud companies. - Examples: - - [NAIP Item](examples/item-naip.json): Shows the usage of the extension in combination with the alternate asset extension. - - [NSL Item](examples/item-nsl.json): Shows a mixture of storage providers, including custom S3 hosts. + - [NAIP Item](examples/item-naip.json): Shows a mixture of storage providers, including custom S3 hosts. + - [NSL Item](examples/item-nsl.json): Shows the usage of the extension in combination with the + [alternate asset extension](https://github.com/stac-extensions/alternate-assets). - [Catalog with Link](examples/catalog-link.json): Shows the usage of the extension on a link in a STAC Catalog. + - [Collection with Auth](examples/catalog-link.json): Shows the usage of the extension in a STAC Collecion in combination with the + [authentication extension](https://github.com/stac-extensions/authentication). - [JSON Schema](json-schema/schema.json) - [Changelog](./CHANGELOG.md) diff --git a/examples/collection.json b/examples/collection.json new file mode 100644 index 0000000..da32b39 --- /dev/null +++ b/examples/collection.json @@ -0,0 +1,76 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/storage/v2.0.0/schema.json", + "https://stac-extensions.github.io/authentication/v1.1.0/schema.json" + ], + "type": "Collection", + "id": "20190822T183518Z_746_POM1_ST2_P", + "title": "Example Collection", + "description": "An example catalog with a link to documentation on object storage.", + "license": "CC-0", + "storage:schemes": { + "aws": { + "platform": "AWS", + "region": "us-west-2", + "requester_pays": true, + "tier": "Standard" + } + }, + "auth:schemes": { + "aws": { + "type": "s3" + } + }, + "assets": { + "stac-items": { + "title": "STAC Items as GeoParquet", + "href": "s3://mybucket/project/items.parquet", + "type": "application/vnd.apache.parquet", + "storage:refs": [ + "aws" + ], + "auth:refs": [ + "aws" + ] + } + }, + "links": [ + { + "href": "https://example.com/examples/catalog-link.json", + "rel": "self" + }, + { + "title": "Documentation", + "href": "s3://mybucket/project/documentation.pdf", + "type": "application/pdf", + "rel": "about", + "storage:refs": [ + "aws" + ], + "auth:refs": [ + "aws" + ] + } + ], + "extent": { + "spatial": { + "bbox": [ + [ + -180, + -56, + 180, + 83 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2015-06-23T00:00:00Z", + null + ] + ] + } + } +} \ No newline at end of file From 25e38f1f6471089b7be9164198b81944504301b7 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Thu, 2 May 2024 21:24:55 +0200 Subject: [PATCH 3/8] Update examples/item-naip.json Co-authored-by: Phil Varner --- examples/item-naip.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/item-naip.json b/examples/item-naip.json index c9650bb..63eb687 100644 --- a/examples/item-naip.json +++ b/examples/item-naip.json @@ -92,7 +92,7 @@ "href": "s3://naip-visualization/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", "storage:refs": [ - "gs-std" + "aws-std" ] }, "GEOTIFF_AZURE_RGBIR": { From 91969ba5cd0820bab026754eaffd271914ce125f Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 12 Aug 2024 11:21:56 +0200 Subject: [PATCH 4/8] General framework --- CHANGELOG.md | 7 +-- CONTRIBUTING.md | 33 ++++++++++++ README.md | 99 +++++++++++++----------------------- examples/catalog-link.json | 10 ++-- examples/collection.json | 11 ++-- examples/item-naip.json | 92 ++++++--------------------------- examples/item-nsl.json | 101 ------------------------------------- json-schema/schema.json | 37 +++----------- platforms/aws-s3.md | 7 +++ platforms/ms-azure.md | 6 +++ platforms/s3.md | 5 ++ 11 files changed, 122 insertions(+), 286 deletions(-) create mode 100644 CONTRIBUTING.md delete mode 100644 examples/item-nsl.json create mode 100644 platforms/aws-s3.md create mode 100644 platforms/ms-azure.md create mode 100644 platforms/s3.md diff --git a/CHANGELOG.md b/CHANGELOG.md index d0bd782..a935e61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,19 +16,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed -## [v2.0.0] - 2021-06-23 +## [v2.0.0] - 2024-08-30 ### Added -- `storage:schemes`, `storage:refs` and Storage Scheme Object +- `storage:schemes`, `storage:ref` and Storage Scheme Object - Support the storage extension in Links - Support for the Alternate Assets Extension - Support for other storage providers, including custom S3 hosts ### Changed +- The extension is a framework for storage providers, it doesn't strictly define the individual providers. - The storage providers are grouped in `storage:schemes` and located in the Item Properties, Collections or Catalog metadata -- Assets and Links reference the storage schemes by key in `storage:refs` +- Assets and Links reference the storage schemes by key in `storage:ref` ### Removed diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..be7bbc6 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,33 @@ +# Contributing + +All contributions are subject to the +[STAC Specification Code of Conduct](https://github.com/radiantearth/stac-spec/blob/master/CODE_OF_CONDUCT.md). +For contributions, please follow the +[STAC specification contributing guide](https://github.com/radiantearth/stac-spec/blob/master/CONTRIBUTING.md) Instructions +for running tests are copied here for convenience. + +## Running tests + +The same checks that run as checks on PR's are part of the repository and can be run locally to verify that changes are valid. +To run tests locally, you'll need `npm`, which is a standard part of any [node.js installation](https://nodejs.org/en/download/). + +First you'll need to install everything with npm once. Just navigate to the root of this repository and on +your command line run: + +```bash +npm install +``` + +Then to check markdown formatting and test the examples against the JSON schema, you can run: + +```bash +npm test +``` + +This will spit out the same texts that you see online, and you can then go and fix your markdown or examples. + +If the tests reveal formatting problems with the examples, you can fix them with: + +```bash +npm run format-examples +``` diff --git a/README.md b/README.md index ff5762b..02e4ccd 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,8 @@ It allows adding details related to cloud object storage access and costs to be This extension does not cover NFS solutions provided by PaaS cloud companies. - Examples: - - [NAIP Item](examples/item-naip.json): Shows a mixture of storage providers, including custom S3 hosts. - - [NSL Item](examples/item-nsl.json): Shows the usage of the extension in combination with the - [alternate asset extension](https://github.com/stac-extensions/alternate-assets). + - [NAIP Item with Alternate Assets](examples/item-naip.json): Shows a mixture of storage providers, including custom S3 hosts + and the [alternate assets extension](https://github.com/stac-extensions/alternate-assets). - [Catalog with Link](examples/catalog-link.json): Shows the usage of the extension on a link in a STAC Catalog. - [Collection with Auth](examples/catalog-link.json): Shows the usage of the extension in a STAC Collecion in combination with the [authentication extension](https://github.com/stac-extensions/authentication). @@ -46,82 +45,56 @@ The fields in the table below can be used in these parts of STAC documents: - [x] Links - [x] [Alternate Assets Object](https://github.com/stac-extensions/alternate-assets?tab=readme-ov-file#alternate-asset-object) -| Field Name | Type | Description | -| -------------- | ---------- | ----------- | -| `storage:refs` | \[string\] | A property that specifies which schemes in `storage:schemes` may be used to access an Asset or Link. Each value must be one of the keys defined in `storage:schemes`. | +| Field Name | Type | Description | +| ------------- | ------- | ----------- | +| `storage:ref` | string | A property that specifies which schemes in `storage:schemes` may be used to access an Asset or Link. Each value must be one of the keys defined in `storage:schemes`. | ### Storage Scheme Object | Field Name | Type | Description | | -------------- | ------- | ----------- | -| platform | string | **REQUIRED.** The [cloud provider](#platforms) where data is stored. | -| region | string | The region where the data is stored. Relevant to speed of access and inter region egress costs (as defined by PaaS provider) | -| requester_pays | boolean | Is the data requester pays or is it data manager/cloud provider pays. Defaults to `false` | -| tier | string | The title for the tier type (as defined by PaaS provider) | +| platform | string | **REQUIRED.** The cloud provider where data is stored as URI or URI template to the API. | +| region | string | The region where the data is stored. Relevant to speed of access and inter region egress costs (as defined by PaaS provider). | +| requester_pays | boolean | Is the data "requester pays" (`true`) or is it "data manager/cloud provider pays" (`false`). Defaults to `false`. | +| ... | ... | Additional properties as defined in the URL template or in the platform specific documents. | -The properties `title` and `description` as defined in Common Metadata can be used as well. +The properties `title` and `description` as defined in Common Metadata should be used as well. -#### Platforms +#### platform -The `platform` field identifies the cloud provider where the data is stored. +The `platform` field identifies the cloud provider where the data is stored as URI or URI template to the API of the service. -There are a couple of pre-defined values for common providers: +If a URI template is provided, all variables must be defined in the Storage Scheme Object as a property with the same name. +For example, the URI template `https://{bucket}.{region}.example.com` must have at least the properties +`bucket` and `region` defined: -- Alibaba Cloud (Aliyun): `ALIBABA` -- Amazon AWS: `AWS` -- Microsoft Azure: `AZURE` -- Google Cloud Platform: `GCP` -- IBM Cloud: `IBM` -- Oracle Cloud: `ORACLE` - -All other PaaS solutions must use a unique URL to the service. +```json +{ + "platform": "https://{bucket}.{region}.example.com", + "region": "eu-fr", + "bucket": "john-doe-stac", + "requester_pays": true +} +``` In case an `href` contains a non-HTTP URL that is not directly resolvable, the `platform` property must identify the host so that the URL can be resolved without further information. -This is especially useful to provide the endpoint URL for custom S3 providers. -In this case the `platform` is effectively the endpoint URL. - -#### Tiers - -Recommended values for the `tier` field: - -| Minimum Duration | [Google Cloud Platform](https://cloud.google.com/storage/docs/storage-classes) | [Amazon AWS](https://aws.amazon.com/s3/storage-classes/) | [Microsoft Azure](https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-storage-tiers) | [IBM Cloud](https://cloud.ibm.com/objectstorage/create#pricing) | [Oracle Cloud](https://www.oracle.com/cloud/storage/pricing.html) | [Alibaba Cloud](https://www.alibabacloud.com/product/oss/pricing) | -| ------------- | --------- | ------------------------ | ------- |---------- | ----------------- | ----------------- | -| 0 (Auto-Tier) | | Intelligent-Tiering | | Smart Tier | -| 0 days | STANDARD | Standard | hot | Standard | Standard | Standard | -| 30 days | NEARLINE | Standard-IA, One Zone-IA | cool | Vault | Infrequent Access | Infrequent Access | -| 60 days | | | | | | Archive | -| 90 days | COLDLINE | Glacier | | Cold Vault | Archive | | -| 180 days | | Glacier Deep Archive | archive | | | Cold Archive | -| 365 days | ARCHIVE | | | | | | - -## Contributing - -All contributions are subject to the -[STAC Specification Code of Conduct](https://github.com/radiantearth/stac-spec/blob/master/CODE_OF_CONDUCT.md). -For contributions, please follow the -[STAC specification contributing guide](https://github.com/radiantearth/stac-spec/blob/master/CONTRIBUTING.md) Instructions -for running tests are copied here for convenience. +For example, this is especially useful to provide the endpoint URL for custom S3 providers. +In this case the `platform` could effectively provide the endpoint URL. -### Running tests +We try to collect pre-defined templates and best pratices for as many providers as possible +in this repository, but be aware that these are not part of the official extension releases +and are not validated. This extension just provides the framework, the provider best pratices +may change at any time without a new version of this extension being released. -The same checks that run as checks on PR's are part of the repository and can be run locally to verify that changes are valid. -To run tests locally, you'll need `npm`, which is a standard part of any [node.js installation](https://nodejs.org/en/download/). +The following providers have defined best pratices at this point: -First you'll need to install everything with npm once. Just navigate to the root of this repository and on -your command line run: -```bash -npm install -``` +- [AWS S3](platforms/aws-s3.md) +- [Generic S3 (non-AWS)](platforms/s3.md) +- [Microsoft Azure](platforms/ms-azure.md) -Then to check markdown formatting and test the examples against the JSON schema, you can run: -```bash -npm test -``` +Feel encouraged to submit additional platform specifications via Pull Requests. -This will spit out the same texts that you see online, and you can then go and fix your markdown or examples. +## Contributing -If the tests reveal formatting problems with the examples, you can fix them with: -```bash -npm run format-examples -``` +See the [Contributor documentation](CONTRIBUTING.md) for details. diff --git a/examples/catalog-link.json b/examples/catalog-link.json index d3a8de9..0bb9141 100644 --- a/examples/catalog-link.json +++ b/examples/catalog-link.json @@ -9,10 +9,10 @@ "description": "An example catalog with a link to documentation on object storage.", "storage:schemes": { "aws": { - "platform": "AWS", + "platform": "https://{bucket}.s3.{region}.amazonaws.com", + "bucket": "mybucket", "region": "us-west-2", - "requester_pays": true, - "tier": "Standard" + "requester_pays": true } }, "links": [ @@ -25,9 +25,7 @@ "href": "s3://mybucket/project/documentation.pdf", "type": "application/pdf", "rel": "about", - "storage:refs": [ - "aws" - ] + "storage:ref": "aws" } ] } \ No newline at end of file diff --git a/examples/collection.json b/examples/collection.json index da32b39..270e660 100644 --- a/examples/collection.json +++ b/examples/collection.json @@ -11,7 +11,8 @@ "license": "CC-0", "storage:schemes": { "aws": { - "platform": "AWS", + "platform": "https://{bucket}.s3.{region}.amazonaws.com", + "bucket": "mybucket", "region": "us-west-2", "requester_pays": true, "tier": "Standard" @@ -27,9 +28,7 @@ "title": "STAC Items as GeoParquet", "href": "s3://mybucket/project/items.parquet", "type": "application/vnd.apache.parquet", - "storage:refs": [ - "aws" - ], + "storage:ref": "aws", "auth:refs": [ "aws" ] @@ -45,9 +44,7 @@ "href": "s3://mybucket/project/documentation.pdf", "type": "application/pdf", "rel": "about", - "storage:refs": [ - "aws" - ], + "storage:ref": "aws", "auth:refs": [ "aws" ] diff --git a/examples/item-naip.json b/examples/item-naip.json index 63eb687..6af3f26 100644 --- a/examples/item-naip.json +++ b/examples/item-naip.json @@ -2,7 +2,8 @@ "stac_version": "1.0.0", "stac_extensions": [ "https://stac-extensions.github.io/storage/v2.0.0/schema.json", - "https://stac-extensions.github.io/version/v1.2.0/schema.json" + "https://stac-extensions.github.io/version/v1.2.0/schema.json", + "https://stac-extensions.github.io/alternate-assets/v1.1.0/schema.json" ], "id": "m_3009743_sw_14_1_20160928_20161129", "bbox": [ @@ -45,35 +46,16 @@ "platform": "UNKNOWN_PLATFORM", "gsd": 1, "storage:schemes": { - "az-wus2-arc": { - "platform": "AZURE", - "region": "westus2", - "tier": "archive" - }, - "gs-cld": { - "platform": "GCP", - "region": "us-central1", - "requester_pays": true, - "tier": "COLDLINE" + "az-wus2-ar": { + "platform": "https://{account}.blob.core.windows.net", + "account": "jon-doe-123", + "region": "westus2" }, "aws-std": { - "platform": "AWS", + "platform": "https://{bucket}.s3.{region}.amazonaws.com", + "bucket": "naip-visualization", "region": "us-west-2", - "requester_pays": true, - "tier": "Standard" - }, - "az-weu-hot": { - "platform": "AZURE", - "region": "westeurope", - "requester_pays": false, - "tier": "hot" - }, - "az-eus-hot": { - "platform": "AZURE", - "region": "eastus", - "requester_pays": false, - "tier": "hot", - "deprecated": true + "requester_pays": true }, "minio": { "platform": "https://play.min.io:9000" @@ -82,62 +64,18 @@ }, "assets": { "CO_GEOTIFF_RGB": { - "href": "s3://mybucket/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "minio" - ] - }, - "CO_GEOTIFF_AWS_RGB": { "href": "s3://naip-visualization/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "aws-std" - ] + "storage:ref": "aws-std", + "alternate": { + "href": "s3://mybucket/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", + "storage:ref": "minio" + } }, "GEOTIFF_AZURE_RGBIR": { "href": "https://naip-nsl.blob.core.windows.net/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", "type": "image/tiff; application=geotiff", - "storage:refs": [ - "az-wus2-ar" - ] - }, - "CO_GEOTIFF_GCP_RGB": { - "href": "gs://naip-data/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "gs-cld" - ] - }, - "CO_GEOTIFF_AZURE_RGB": { - "href": "https://naipeuwest.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "az-weu-hot" - ] - }, - "CO_GEOTIFF_AZURE_RGB_DEPRECATED": { - "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.tif", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "az-eus-hot" - ], - "deprecated": true - }, - "THUMBNAIL": { - "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.200.jpg", - "type": "image/jpeg", - "storage:refs": [ - "minio" - ] - }, - "THUMBNAIL_AZURE_DEPRECATED": { - "href": "https://naipblobs.blob.core.windows.net/naip/v002/tx/2016/tx_100cm_2016/30097/m_3009743_sw_14_1_20160928.200.jpg", - "type": "image/jpeg", - "storage:refs": [ - "az-eus-hot" - ], - "deprecated": true + "storage:ref": "az-wus2-ar" } }, "links": [ diff --git a/examples/item-nsl.json b/examples/item-nsl.json deleted file mode 100644 index 9b0c996..0000000 --- a/examples/item-nsl.json +++ /dev/null @@ -1,101 +0,0 @@ -{ - "stac_version": "1.0.0", - "stac_extensions": [ - "https://stac-extensions.github.io/storage/v2.0.0/schema.json", - "https://stac-extensions.github.io/alternate-assets/v1.0.0/schema.json" - ], - "id": "20190822T183518Z_746_POM1_ST2_P", - "type": "Feature", - "bbox": [ - -97.7466867683867, - 30.278398961994966, - -97.72990596574927, - 30.288621181865743 - ], - "geometry": { - "type": "Polygon", - "coordinates": [ - [ - [ - -97.7466867683867, - 30.28754662370266 - ], - [ - -97.74555747279238, - 30.278398961994966 - ], - [ - -97.72990596574927, - 30.27972380176124 - ], - [ - -97.73085242627444, - 30.288621181865743 - ], - [ - -97.7466867683867, - 30.28754662370266 - ] - ] - ] - }, - "properties": { - "datetime": "2019-08-22T18:35:18+00:00", - "mission": "SWIFT", - "platform": "SWIFT_2", - "instrument": "POM_1", - "gsd": 0.20000000298023224, - "storage:schemes": { - "gcp-std": { - "platform": "GCP", - "region": "us-central1", - "requester_pays": true, - "tier": "STANDARD" - }, - "aws-glc": { - "platform": "AWS", - "region": "us-central-1", - "requester_pays": true, - "tier": "Glacier" - } - } - }, - "assets": { - "GEOTIFF_RGB": { - "href": "gs://swiftera-processed-data/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.tif", - "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:refs": [ - "gcp-std" - ], - "alternate": { - "aws": { - "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.tif", - "storage:refs": [ - "aws-std" - ] - } - } - }, - "THUMBNAIL_RGB": { - "href": "gs://swiftera-processed-data/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.png", - "type": "image/png", - "storage:refs": [ - "gcp-std" - ], - "alternate": { - "aws": { - "href": "s3://nsl-data-archive/20190822T162258Z_TRAVIS_COUNTY/Published/REGION_0/20190822T183518Z_746_POM1_ST2_P.png", - "storage:refs": [ - "aws-std" - ] - } - } - } - }, - "links": [ - { - "href": "https://example.com/examples/item-nsl.json", - "rel": "self" - } - ] -} \ No newline at end of file diff --git a/json-schema/schema.json b/json-schema/schema.json index 5087247..9ab8a25 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -101,24 +101,9 @@ "properties": { "platform": { "title": "Platform", - "oneOf": [ - { - "type": "string", - "enum": [ - "AWS", - "GCP", - "AZURE", - "IBM", - "ALIBABA", - "ORACLE" - ] - }, - { - "type": "string", - "format": "iri", - "pattern": "^[\\w\\+.-]+://" - } - ] + "type": "string", + "format": "uri-template", + "pattern": "^[\\w\\+.-]+://" }, "region": { "title": "Region", @@ -128,12 +113,9 @@ "type": "boolean", "title": "Requester pays", "default": false - }, - "tier": { - "title": "Tier", - "type": "string" } - } + }, + "additionalProperties": true } }, "additionalProperties": false @@ -147,12 +129,9 @@ "refs_field": { "type": "object", "properties": { - "storage:refs": { - "type": "array", - "items": { - "type": "string", - "minLength": 1 - } + "storage:ref": { + "type": "string", + "minLength": 1 } }, "patternProperties": { diff --git a/platforms/aws-s3.md b/platforms/aws-s3.md new file mode 100644 index 0000000..93fde52 --- /dev/null +++ b/platforms/aws-s3.md @@ -0,0 +1,7 @@ +# AWS S3 + +This defines the Amazon Web Services (AWS) S3 interface. + +- `platform`: `https://{bucket}.s3.{region}.amazonaws.com` +- `bucket`: The bucket name +- `region`: One of the S3 regions (lowercase) diff --git a/platforms/ms-azure.md b/platforms/ms-azure.md new file mode 100644 index 0000000..e7c968d --- /dev/null +++ b/platforms/ms-azure.md @@ -0,0 +1,6 @@ +# Microsoft Azure + +This defines the Microsoft Azure interface. + +- `platform`: `https://{account}.blob.core.windows.net` +- `account`: The Microsoft account identifier diff --git a/platforms/s3.md b/platforms/s3.md new file mode 100644 index 0000000..7809afc --- /dev/null +++ b/platforms/s3.md @@ -0,0 +1,5 @@ +# S3 (non-AWS) + +This defines the S3 interface for providers other than AWS (e.g. minio-based). + +- `platform`: The API URL, must be the endpoint URL that can be used for the AWS CLI for example. From 1f7b7e29b6f8886b877c780dedf90594992be583 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Mon, 12 Aug 2024 12:07:31 +0200 Subject: [PATCH 5/8] Revert to storage:refs --- CHANGELOG.md | 4 ++-- README.md | 6 +++--- examples/catalog-link.json | 4 +++- examples/collection.json | 8 ++++++-- examples/item-naip.json | 12 +++++++++--- json-schema/schema.json | 9 ++++++--- 6 files changed, 29 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a935e61..4eb8a43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added -- `storage:schemes`, `storage:ref` and Storage Scheme Object +- `storage:schemes`, `storage:refs` and Storage Scheme Object - Support the storage extension in Links - Support for the Alternate Assets Extension - Support for other storage providers, including custom S3 hosts @@ -29,7 +29,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - The extension is a framework for storage providers, it doesn't strictly define the individual providers. - The storage providers are grouped in `storage:schemes` and located in the Item Properties, Collections or Catalog metadata -- Assets and Links reference the storage schemes by key in `storage:ref` +- Assets and Links reference the storage schemes by key in `storage:refs` ### Removed diff --git a/README.md b/README.md index 02e4ccd..eeeb223 100644 --- a/README.md +++ b/README.md @@ -45,9 +45,9 @@ The fields in the table below can be used in these parts of STAC documents: - [x] Links - [x] [Alternate Assets Object](https://github.com/stac-extensions/alternate-assets?tab=readme-ov-file#alternate-asset-object) -| Field Name | Type | Description | -| ------------- | ------- | ----------- | -| `storage:ref` | string | A property that specifies which schemes in `storage:schemes` may be used to access an Asset or Link. Each value must be one of the keys defined in `storage:schemes`. | +| Field Name | Type | Description | +| -------------- | ------- | ----------- | +| `storage:refs` | string | A property that specifies which schemes in `storage:schemes` may be used to access an Asset or Link. Each value must be one of the keys defined in `storage:schemes`. | ### Storage Scheme Object diff --git a/examples/catalog-link.json b/examples/catalog-link.json index 0bb9141..442189a 100644 --- a/examples/catalog-link.json +++ b/examples/catalog-link.json @@ -25,7 +25,9 @@ "href": "s3://mybucket/project/documentation.pdf", "type": "application/pdf", "rel": "about", - "storage:ref": "aws" + "storage:refs": [ + "aws" + ] } ] } \ No newline at end of file diff --git a/examples/collection.json b/examples/collection.json index 270e660..26b606e 100644 --- a/examples/collection.json +++ b/examples/collection.json @@ -28,7 +28,9 @@ "title": "STAC Items as GeoParquet", "href": "s3://mybucket/project/items.parquet", "type": "application/vnd.apache.parquet", - "storage:ref": "aws", + "storage:refs": [ + "aws" + ], "auth:refs": [ "aws" ] @@ -44,7 +46,9 @@ "href": "s3://mybucket/project/documentation.pdf", "type": "application/pdf", "rel": "about", - "storage:ref": "aws", + "storage:refs": [ + "aws" + ], "auth:refs": [ "aws" ] diff --git a/examples/item-naip.json b/examples/item-naip.json index 6af3f26..26a2727 100644 --- a/examples/item-naip.json +++ b/examples/item-naip.json @@ -66,16 +66,22 @@ "CO_GEOTIFF_RGB": { "href": "s3://naip-visualization/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", "type": "image/tiff; application=geotiff; profile=cloud-optimized", - "storage:ref": "aws-std", + "storage:refs": [ + "aws-std" + ], "alternate": { "href": "s3://mybucket/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", - "storage:ref": "minio" + "storage:refs": [ + "minio" + ] } }, "GEOTIFF_AZURE_RGBIR": { "href": "https://naip-nsl.blob.core.windows.net/tx/2016/100cm/rgb/30097/m_3009743_sw_14_1_20160928.tif", "type": "image/tiff; application=geotiff", - "storage:ref": "az-wus2-ar" + "storage:refs": [ + "az-wus2-ar" + ] } }, "links": [ diff --git a/json-schema/schema.json b/json-schema/schema.json index 9ab8a25..44c265f 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -129,9 +129,12 @@ "refs_field": { "type": "object", "properties": { - "storage:ref": { - "type": "string", - "minLength": 1 + "storage:refs": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } } }, "patternProperties": { From f02f2db6a4b2b173b838e3bb5a6767c691539dcd Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Fri, 18 Oct 2024 14:19:31 +0200 Subject: [PATCH 6/8] Minor adjustments --- CHANGELOG.md | 1 + README.md | 8 ++++---- platforms/{s3.md => custom-s3.md} | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) rename platforms/{s3.md => custom-s3.md} (88%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4eb8a43..81b63cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - `storage:platform`, `storage:region`, `storage:requester_pays` and `storage:tier` + (moved to Storage Scheme Object, except for the tier) ## [v1.0.0] - 2021-06-23 diff --git a/README.md b/README.md index eeeb223..1be2faf 100644 --- a/README.md +++ b/README.md @@ -45,9 +45,9 @@ The fields in the table below can be used in these parts of STAC documents: - [x] Links - [x] [Alternate Assets Object](https://github.com/stac-extensions/alternate-assets?tab=readme-ov-file#alternate-asset-object) -| Field Name | Type | Description | -| -------------- | ------- | ----------- | -| `storage:refs` | string | A property that specifies which schemes in `storage:schemes` may be used to access an Asset or Link. Each value must be one of the keys defined in `storage:schemes`. | +| Field Name | Type | Description | +| -------------- | ---------- | ----------- | +| `storage:refs` | \[string\] | A property that specifies which schemes in `storage:schemes` may be used to access an Asset or Link. Each value must be one of the keys defined in `storage:schemes`. | ### Storage Scheme Object @@ -90,7 +90,7 @@ may change at any time without a new version of this extension being released. The following providers have defined best pratices at this point: - [AWS S3](platforms/aws-s3.md) -- [Generic S3 (non-AWS)](platforms/s3.md) +- [Generic S3 (non-AWS)](platforms/custom-s3.md) - [Microsoft Azure](platforms/ms-azure.md) Feel encouraged to submit additional platform specifications via Pull Requests. diff --git a/platforms/s3.md b/platforms/custom-s3.md similarity index 88% rename from platforms/s3.md rename to platforms/custom-s3.md index 7809afc..c6c5ebd 100644 --- a/platforms/s3.md +++ b/platforms/custom-s3.md @@ -1,4 +1,4 @@ -# S3 (non-AWS) +# Generic S3 (non-AWS) This defines the S3 interface for providers other than AWS (e.g. minio-based). From b001730be94b806f50e248cbd6a63ca7ac249434 Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Fri, 18 Oct 2024 14:51:40 +0200 Subject: [PATCH 7/8] Add type field and validation --- README.md | 24 ++++++++++++++++------- examples/catalog-link.json | 1 + examples/collection.json | 1 + examples/item-naip.json | 4 +++- json-schema/platforms/aws-s3.json | 29 ++++++++++++++++++++++++++++ json-schema/platforms/custom-s3.json | 16 +++++++++++++++ json-schema/platforms/ms-azure.json | 23 ++++++++++++++++++++++ json-schema/schema.json | 16 +++++++++++++++ package.json | 6 +++--- validator-config.json | 8 ++++++++ 10 files changed, 117 insertions(+), 11 deletions(-) create mode 100644 json-schema/platforms/aws-s3.json create mode 100644 json-schema/platforms/custom-s3.json create mode 100644 json-schema/platforms/ms-azure.json create mode 100644 validator-config.json diff --git a/README.md b/README.md index 1be2faf..f002a29 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ - **Field Name Prefix:** storage - **Scope:** Item, Catalog, Collection - **Extension [Maturity Classification](https://github.com/radiantearth/stac-spec/tree/master/extensions/README.md#extension-maturity):** Pilot -- **Owner**: @davidraleigh @matthewhanson +- **Owner**: @matthewhanson @m-mohr This document explains the Storage Extension to the [SpatioTemporal Asset Catalog](https://github.com/radiantearth/stac-spec) (STAC) specification. It allows adding details related to cloud object storage access and costs to be associated with STAC Assets. @@ -53,6 +53,7 @@ The fields in the table below can be used in these parts of STAC documents: | Field Name | Type | Description | | -------------- | ------- | ----------- | +| type | string | **REQUIRED.** Type identifier for the platform, see below. | | platform | string | **REQUIRED.** The cloud provider where data is stored as URI or URI template to the API. | | region | string | The region where the data is stored. Relevant to speed of access and inter region egress costs (as defined by PaaS provider). | | requester_pays | boolean | Is the data "requester pays" (`true`) or is it "data manager/cloud provider pays" (`false`). Defaults to `false`. | @@ -65,11 +66,12 @@ The properties `title` and `description` as defined in Common Metadata should be The `platform` field identifies the cloud provider where the data is stored as URI or URI template to the API of the service. If a URI template is provided, all variables must be defined in the Storage Scheme Object as a property with the same name. -For example, the URI template `https://{bucket}.{region}.example.com` must have at least the properties +For example, the URI template `https://{bucket}.{region}.example.com` must have at least the properties `bucket` and `region` defined: ```json { + "type": "example", "platform": "https://{bucket}.{region}.example.com", "region": "eu-fr", "bucket": "john-doe-stac", @@ -82,19 +84,27 @@ the `platform` property must identify the host so that the URL can be resolved w For example, this is especially useful to provide the endpoint URL for custom S3 providers. In this case the `platform` could effectively provide the endpoint URL. +#### type + We try to collect pre-defined templates and best pratices for as many providers as possible -in this repository, but be aware that these are not part of the official extension releases -and are not validated. This extension just provides the framework, the provider best pratices +in this repository, but be aware that these are not part of the official extension releases. +This extension just provides the framework, the provider best pratices may change at any time without a new version of this extension being released. The following providers have defined best pratices at this point: -- [AWS S3](platforms/aws-s3.md) -- [Generic S3 (non-AWS)](platforms/custom-s3.md) -- [Microsoft Azure](platforms/ms-azure.md) +| `type` | Provider and Documentation | +| ----------- | -------------------------- | +| `aws-s3` | [AWS S3](platforms/aws-s3.md) | +| `custom-s3` | [Generic S3 (non-AWS)](platforms/custom-s3.md) | +| `ms-azure` | [Microsoft Azure](platforms/ms-azure.md) | Feel encouraged to submit additional platform specifications via Pull Requests. +The `type` fields can be any value chosen by the implementor, +but the types defined in the table above should be used as defined in the best practices. +This ensures proper schema validation. + ## Contributing See the [Contributor documentation](CONTRIBUTING.md) for details. diff --git a/examples/catalog-link.json b/examples/catalog-link.json index 442189a..8094a2c 100644 --- a/examples/catalog-link.json +++ b/examples/catalog-link.json @@ -9,6 +9,7 @@ "description": "An example catalog with a link to documentation on object storage.", "storage:schemes": { "aws": { + "type": "aws-s3", "platform": "https://{bucket}.s3.{region}.amazonaws.com", "bucket": "mybucket", "region": "us-west-2", diff --git a/examples/collection.json b/examples/collection.json index 26b606e..4ca6980 100644 --- a/examples/collection.json +++ b/examples/collection.json @@ -11,6 +11,7 @@ "license": "CC-0", "storage:schemes": { "aws": { + "type": "aws-s3", "platform": "https://{bucket}.s3.{region}.amazonaws.com", "bucket": "mybucket", "region": "us-west-2", diff --git a/examples/item-naip.json b/examples/item-naip.json index 26a2727..2458121 100644 --- a/examples/item-naip.json +++ b/examples/item-naip.json @@ -43,21 +43,23 @@ "properties": { "datetime": "2016-09-28T00:00:00+00:00", "mission": "NAIP", - "platform": "UNKNOWN_PLATFORM", "gsd": 1, "storage:schemes": { "az-wus2-ar": { + "type": "ms-azure", "platform": "https://{account}.blob.core.windows.net", "account": "jon-doe-123", "region": "westus2" }, "aws-std": { + "type": "aws-s3", "platform": "https://{bucket}.s3.{region}.amazonaws.com", "bucket": "naip-visualization", "region": "us-west-2", "requester_pays": true }, "minio": { + "type": "custom-s3", "platform": "https://play.min.io:9000" } } diff --git a/json-schema/platforms/aws-s3.json b/json-schema/platforms/aws-s3.json new file mode 100644 index 0000000..3721741 --- /dev/null +++ b/json-schema/platforms/aws-s3.json @@ -0,0 +1,29 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://stac-extensions.github.io/storage/v2.0.0/platforms/aws-s3.json", + "title": "AWS S3", + "type": "object", + "if": { + "properties": { + "type": { + "const": "aws-s3" + } + } + }, + "then": { + "properties": { + "platform": { + "const": "https://{bucket}.s3.{region}.amazonaws.com" + }, + "bucket": { + "$comment": "See https://docs.aws.amazon.com/AmazonS3/latest/userguide/bucketnamingrules.html", + "type": "string", + "pattern": "^[a-z0-9][a-z0-9-.]{1,61}[a-z0-9]$" + }, + "region": { + "type": "string", + "pattern": "^[a-z0-9-]+$" + } + } + } +} \ No newline at end of file diff --git a/json-schema/platforms/custom-s3.json b/json-schema/platforms/custom-s3.json new file mode 100644 index 0000000..25a89b7 --- /dev/null +++ b/json-schema/platforms/custom-s3.json @@ -0,0 +1,16 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://stac-extensions.github.io/storage/v2.0.0/platforms/custom-s3.json", + "title": "Generic S3", + "type": "object", + "if": { + "properties": { + "type": { + "const": "custom-s3" + } + } + }, + "then": { + "$comment": "No specific validation rules apply" + } +} \ No newline at end of file diff --git a/json-schema/platforms/ms-azure.json b/json-schema/platforms/ms-azure.json new file mode 100644 index 0000000..44132da --- /dev/null +++ b/json-schema/platforms/ms-azure.json @@ -0,0 +1,23 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://stac-extensions.github.io/storage/v2.0.0/platforms/ms-azure.json", + "title": "Microsoft Azure", + "type": "object", + "if": { + "properties": { + "type": { + "const": "ms-azure" + } + } + }, + "then": { + "properties": { + "platform": { + "const": "https://{account}.blob.core.windows.net" + }, + "account": { + "type": "string" + } + } + } +} \ No newline at end of file diff --git a/json-schema/schema.json b/json-schema/schema.json index 44c265f..1716e31 100644 --- a/json-schema/schema.json +++ b/json-schema/schema.json @@ -96,9 +96,14 @@ "patternProperties": { "^.{1,}$": { "required": [ + "type", "platform" ], "properties": { + "type": { + "title": "Type identifier", + "type": "string" + }, "platform": { "title": "Platform", "type": "string", @@ -115,6 +120,17 @@ "default": false } }, + "allOf": [ + { + "$ref": "./platforms/aws-s3.json" + }, + { + "$ref": "./platforms/custom-s3.json" + }, + { + "$ref": "./platforms/ms-azure.json" + } + ], "additionalProperties": true } }, diff --git a/package.json b/package.json index 948eb39..09ae103 100644 --- a/package.json +++ b/package.json @@ -4,8 +4,8 @@ "scripts": { "test": "npm run check-markdown && npm run check-examples", "check-markdown": "remark . -f -r .github/remark.yaml", - "check-examples": "stac-node-validator . --lint --verbose --schemaMap https://stac-extensions.github.io/storage/v2.0.0/schema.json=./json-schema/schema.json", - "format-examples": "stac-node-validator . --format --schemaMap https://stac-extensions.github.io/storage/v2.0.0/schema.json=./json-schema/schema.json" + "check-examples": "stac-node-validator examples --verbose --lint --config ./validator-config.json", + "format-examples": "stac-node-validator examples --format --config ./validator-config.json" }, "dependencies": { "remark-cli": "^8.0.0", @@ -15,6 +15,6 @@ "remark-preset-lint-markdown-style-guide": "^3.0.0", "remark-preset-lint-recommended": "^4.0.0", "remark-validate-links": "^10.0.0", - "stac-node-validator": "^1.1.0" + "stac-node-validator": "^2.0.0-beta.12" } } diff --git a/validator-config.json b/validator-config.json new file mode 100644 index 0000000..e083d05 --- /dev/null +++ b/validator-config.json @@ -0,0 +1,8 @@ +{ + "schemaMap": { + "https://stac-extensions.github.io/storage/v2.0.0/schema.json": "./json-schema/schema.json", + "https://stac-extensions.github.io/storage/v2.0.0/platforms/aws-s3.json": "./json-schema/platforms/aws-s3.json", + "https://stac-extensions.github.io/storage/v2.0.0/platforms/custom-s3.json": "./json-schema/platforms/custom-s3.json", + "https://stac-extensions.github.io/storage/v2.0.0/platforms/ms-azure.json": "./json-schema/platforms/ms-azure.json" + } +} \ No newline at end of file From 274b0f23dde7ecac68888a3a835235c60d1ce0aa Mon Sep 17 00:00:00 2001 From: Matthias Mohr Date: Fri, 18 Oct 2024 14:56:17 +0200 Subject: [PATCH 8/8] Describe how to add a new provider --- CONTRIBUTING.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index be7bbc6..0ea1512 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -31,3 +31,13 @@ If the tests reveal formatting problems with the examples, you can fix them with ```bash npm run format-examples ``` + +## Adding a new provider + +1. Add documentation in a Markdown file to the folder `platforms` +2. Add the provider to the table in the `README.md`, see chapter "type" +3. Add a JSON Schema to the folder `json-schema/platforms` +4. Add the schema to the extension schema in file `json-schema/schema.json` (search for `allOf` below the definition of `storage:schemes`) +5. Add the newly created schema to the `validator-config.json` + +Use the same file names (excluding the extension) for documentation and schema.