fiboa · aninda-ghosh · Jun 6, 2024 · Jun 14, 2024 · Jun 14, 2024 · Jul 8, 2024
diff --git a/fiboa_cli/datasets/fs_de_bb.py b/fiboa_cli/datasets/fs_de_bb.py
@@ -0,0 +1,144 @@
+# TEMPLATE FOR A FIBOA CONVERTER
+#
+# Copy this file and rename it to something sensible.
+# The name of the file will be the name of the converter in the cli.
+# If you name it 'de_abc' you'll be able to run `fiboa convert de_abc` in the cli.
+
+from ..convert_utils import convert as convert_
+
+# File to read the data from
+# Can read any tabular data format that GeoPandas can read through read_file()
+# Supported protcols: HTTP(S), GCS, S3, or the local file system
+
+# Local URI added to the repository for initial conversion, Original Source https://beta.source.coop/esa/fusion-competition/
+URI = "/home/byteboogie/fieldscapes/germany/fs_de_bb.gpkg"
+
+# Unique identifier for the collection
+ID = "fs_de_bb"
+# Title of the collection
+TITLE = "Field boundaries for Germany, Brandenburg"
+# Description of the collection. Can be multiline and include CommonMark.
+DESCRIPTION = """ The dataset contains field boundaries for the German state of Brandenburg."""
+# Bounding box of the data in WGS84 coordinates
+BBOX = [13.635334610075107, 52.41814553442972, 14.35270427904761, 52.849468757681805]
+
+# Provider name, can be None if not applicable, must be provided if PROVIDER_URL is provided
+PROVIDER_NAME = "ESA"
+# URL to the homepage of the data or the provider, can be None if not applicable
+PROVIDER_URL = "https://beta.source.coop/esa/fusion-competition/"
+# Attribution, can be None if not applicable
+ATTRIBUTION = "© GeoBasis-DE/LGB"
+
+# License of the data, either
+# 1. a SPDX license identifier (including "dl-de/by-2-0" / "dl-de/zero-2-0"), or
+LICENSE = "DL-DE->BY-2.0"
-LICENSE = "DL-DE->BY-2.0"
+LICENSE = "dl-de/by-2-0"
-LICENSE = "DL-DE->BY-2.0"
+LICENSE = "dl-de/by-2-0"
+# 2. a STAC Link Object with relation type "license"
+# LICENSE = {"title": "CC-BY-4.0", "href": "https://creativecommons.org/licenses/by/4.0/", "type": "text/html", "rel": "license"}
+
+# Map original column names to fiboa property names
+# You also need to list any column that you may have added in the MIGRATION function (see below).
+COLUMNS = {
+    'fid': 'id',
+    'grid_id': 'grid_id',
+    "SHAPE_AREA": "area",
+    "SHAPE_LEN": "perimeter",
+    'geometry': 'geometry',
+    'crop_id': 'crop_id',
+    'crop_name': 'crop_name'
+}
+
+# Add columns with constant values.
+# The key is the column name, the value is a constant value that's used for all rows.
+ADD_COLUMNS = {
+    "determination_datetime": "2018-01-01T00:00:00Z"
+}
+
+# A list of implemented extension identifiers
+EXTENSIONS = []
+
+# Functions to migrate data in columns to match the fiboa specification.
+# Example: You have a column area_m in square meters and want to convert
+# to hectares as required for the area field in fiboa.
+# Function signature:
+#   func(column: pd.Series) -> pd.Series
+COLUMN_MIGRATIONS = {
+
+}
+
+# Filter columns to only include the ones that are relevant for the collection,
+# e.g. only rows that contain the word "agriculture" but not "forest" in the column "land_cover_type".
+# Lamda function accepts a Pandas Series and returns a Series or a Tuple with a Series and True to inverse the mask.
+COLUMN_FILTERS = {
+
+}
+
+# Custom function to migrate the GeoDataFrame if the other options are not sufficient
+# This should be the last resort!
+# Function signature:
+#   func(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame
+MIGRATION = None
+
+# Schemas for the fields that are not defined in fiboa
+# Keys must be the values from the COLUMNS dict, not the keys
+MISSING_SCHEMAS = {
+    "required": ["grid_id"], # i.e. non-nullable properties
+    "properties": {
+        "grid_id": {
+            "type": "string"
+        }
+    }
+}
+
+
+# Conversion function, usually no changes required
+def convert(output_file, cache_file = None, source_coop_url = None, collection = False, compression = None):
+    """
+    Converts the field boundary datasets to fiboa.
+
+    For reference, this is the order in which the conversion steps are applied:
+    0. Read GeoDataFrame from file
+    1. Run global migration (if provided through MIGRATION)
+    2. Run filters to remove rows that shall not be in the final data
+       (if provided through COLUMN_FILTERS)
+    3. Add columns with constant values
+    4. Run column migrations (if provided through COLUMN_MIGRATIONS)
+    5. Duplicate columns (if an array is provided as the value in COLUMNS)
+    6. Rename columns (as provided in COLUMNS)
+    7. Remove columns (if column is not present as value in COLUMNS)
+    8. Create the collection
+    9. Change data types of the columns based on the provided schemas
+    (fiboa spec, extensions, and MISSING_SCHEMAS)
+    10. Write the data to the Parquet file
+
+    Parameters:
+    output_file (str): Path where the Parquet file shall be stored.
+    cache_file (str): Path to a cached file of the data. Default: None.
+                      Can be used to avoid repetitive downloads from the original data source.
+    source_coop_url (str): URL to the (future) Source Cooperative repository. Default: None
+    collection (bool): Additionally, store the collection separate from Parquet file. Default: False
+    compression (str): Compression method for the Parquet file. Default: zstd
+    kwargs: Additional keyword arguments for GeoPanda's read_file() or read_parquet() function.
+    """
+    convert_(
+        output_file,
+        cache_file,
+        URI,
+        COLUMNS,
+        ID,
+        TITLE,
+        DESCRIPTION,
+        BBOX,
+        provider_name=PROVIDER_NAME,
+        provider_url=PROVIDER_URL,
+        source_coop_url=source_coop_url,
+        extensions=EXTENSIONS,
+        missing_schemas=MISSING_SCHEMAS,
+        column_additions=ADD_COLUMNS,
+        column_migrations=COLUMN_MIGRATIONS,
+        column_filters=COLUMN_FILTERS,
+        migration=MIGRATION,
+        attribution=ATTRIBUTION,
+        store_collection=collection,
+        license=LICENSE,
+        compression=compression,
+    )
diff --git a/fiboa_cli/datasets/fs_za_ct.py b/fiboa_cli/datasets/fs_za_ct.py
@@ -0,0 +1,144 @@
+# TEMPLATE FOR A FIBOA CONVERTER
+#
+# Copy this file and rename it to something sensible.
+# The name of the file will be the name of the converter in the cli.
+# If you name it 'de_abc' you'll be able to run `fiboa convert de_abc` in the cli.
+
+from ..convert_utils import convert as convert_
+
+# File to read the data from
+# Can read any tabular data format that GeoPandas can read through read_file()
+# Supported protcols: HTTP(S), GCS, S3, or the local file system
+
+# Local URI added to the repository for initial conversion, Original Source https://beta.source.coop/esa/fusion-competition/
+URI = "/home/byteboogie/fieldscapes/south_africa/fs_za_ct.gpkg"
+
+# Unique identifier for the collection
+ID = "fs_za_ct"
+# Title of the collection
+TITLE = "Field boundaries for Cape Town, South Africa"
+# Description of the collection. Can be multiline and include CommonMark.
+DESCRIPTION = """ The dataset contains field boundaries for the Cape Town, South Africa."""
+# Bounding box of the data in WGS84 coordinates
+BBOX = [20.521492384730347, -34.39922362572791, 21.04341451023305, -33.980506187460875]
+
+# Provider name, can be None if not applicable, must be provided if PROVIDER_URL is provided
+PROVIDER_NAME = "Planet, Radiant Earth Foundation, Western Cape Department of Agriculture, & German Aerospace Center (DLR)"
+# URL to the homepage of the data or the provider, can be None if not applicable
+PROVIDER_URL = "https://beta.source.coop/esa/fusion-competition/"
+# Attribution, can be None if not applicable
+ATTRIBUTION = "ESA Fusion Competition"
+
+# License of the data, either
+# 1. a SPDX license identifier (including "dl-de/by-2-0" / "dl-de/zero-2-0"), or
+LICENSE = "CC BY-NC-SA 4.0"
-LICENSE = "CC BY-NC-SA 4.0"
+LICENSE = "CC-BY-NC-SA-4.0"
-LICENSE = "CC BY-NC-SA 4.0"
+LICENSE = "CC-BY-NC-SA-4.0"
+# 2. a STAC Link Object with relation type "license"
+# LICENSE = {"title": "CC-BY-4.0", "href": "https://creativecommons.org/licenses/by/4.0/", "type": "text/html", "rel": "license"}
+
+# Map original column names to fiboa property names
+# You also need to list any column that you may have added in the MIGRATION function (see below).
+COLUMNS = {
+    'fid': 'id',
+    'grid_id': 'grid_id',
+    "SHAPE_AREA": "area",
+    "SHAPE_LEN": "perimeter",
+    'geometry': 'geometry',
+    'crop_id': 'crop_id',
+    'crop_name': 'crop_name'
+}
+
+# Add columns with constant values.
+# The key is the column name, the value is a constant value that's used for all rows.
+ADD_COLUMNS = {
+    "determination_datetime": "2021-01-01T00:00:00Z"
+}
+
+# A list of implemented extension identifiers
+EXTENSIONS = []
+
+# Functions to migrate data in columns to match the fiboa specification.
+# Example: You have a column area_m in square meters and want to convert
+# to hectares as required for the area field in fiboa.
+# Function signature:
+#   func(column: pd.Series) -> pd.Series
+COLUMN_MIGRATIONS = {
+    "area_m": lambda column: column * 0.0001
+}
+
+# Filter columns to only include the ones that are relevant for the collection,
+# e.g. only rows that contain the word "agriculture" but not "forest" in the column "land_cover_type".
+# Lamda function accepts a Pandas Series and returns a Series or a Tuple with a Series and True to inverse the mask.
+COLUMN_FILTERS = {
+
+}
+
+# Custom function to migrate the GeoDataFrame if the other options are not sufficient
+# This should be the last resort!
+# Function signature:
+#   func(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame
+MIGRATION = None
+
+# Schemas for the fields that are not defined in fiboa
+# Keys must be the values from the COLUMNS dict, not the keys
+MISSING_SCHEMAS = {
+    "required": ["grid_id"], # i.e. non-nullable properties
+    "properties": {
+        "grid_id": {
+            "type": "string"
+        }
+    }
+}
+
+
+# Conversion function, usually no changes required
+def convert(output_file, cache_file = None, source_coop_url = None, collection = False, compression = None):
+    """
+    Converts the field boundary datasets to fiboa.
+
+    For reference, this is the order in which the conversion steps are applied:
+    0. Read GeoDataFrame from file
+    1. Run global migration (if provided through MIGRATION)
+    2. Run filters to remove rows that shall not be in the final data
+       (if provided through COLUMN_FILTERS)
+    3. Add columns with constant values
+    4. Run column migrations (if provided through COLUMN_MIGRATIONS)
+    5. Duplicate columns (if an array is provided as the value in COLUMNS)
+    6. Rename columns (as provided in COLUMNS)
+    7. Remove columns (if column is not present as value in COLUMNS)
+    8. Create the collection
+    9. Change data types of the columns based on the provided schemas
+    (fiboa spec, extensions, and MISSING_SCHEMAS)
+    10. Write the data to the Parquet file
+
+    Parameters:
+    output_file (str): Path where the Parquet file shall be stored.
+    cache_file (str): Path to a cached file of the data. Default: None.
+                      Can be used to avoid repetitive downloads from the original data source.
+    source_coop_url (str): URL to the (future) Source Cooperative repository. Default: None
+    collection (bool): Additionally, store the collection separate from Parquet file. Default: False
+    compression (str): Compression method for the Parquet file. Default: zstd
+    kwargs: Additional keyword arguments for GeoPanda's read_file() or read_parquet() function.
+    """
+    convert_(
+        output_file,
+        cache_file,
+        URI,
+        COLUMNS,
+        ID,
+        TITLE,
+        DESCRIPTION,
+        BBOX,
+        provider_name=PROVIDER_NAME,
+        provider_url=PROVIDER_URL,
+        source_coop_url=source_coop_url,
+        extensions=EXTENSIONS,
+        missing_schemas=MISSING_SCHEMAS,
+        column_additions=ADD_COLUMNS,
+        column_migrations=COLUMN_MIGRATIONS,
+        column_filters=COLUMN_FILTERS,
+        migration=MIGRATION,
+        attribution=ATTRIBUTION,
+        store_collection=collection,
+        license=LICENSE,
+        compression=compression,
+    )