From a4e924907d720a12e23f71097c51c2aa800c743a Mon Sep 17 00:00:00 2001
From: Aninda Ghosh <anindaghosh63@gmail.com>
Date: Thu, 6 Jun 2024 13:03:25 -0700
Subject: [PATCH 1/4] Added South Africa and Germany Fieldscapes Converter

---
 fiboa_cli/datasets/fs_de_bb.py | 144 +++++++++++++++++++++++++++++++++
 fiboa_cli/datasets/fs_za_ct.py | 144 +++++++++++++++++++++++++++++++++
 2 files changed, 288 insertions(+)
 create mode 100644 fiboa_cli/datasets/fs_de_bb.py
 create mode 100644 fiboa_cli/datasets/fs_za_ct.py

diff --git a/fiboa_cli/datasets/fs_de_bb.py b/fiboa_cli/datasets/fs_de_bb.py
new file mode 100644
index 0000000..adf5bbc
--- /dev/null
+++ b/fiboa_cli/datasets/fs_de_bb.py
@@ -0,0 +1,144 @@
+# TEMPLATE FOR A FIBOA CONVERTER
+#
+# Copy this file and rename it to something sensible.
+# The name of the file will be the name of the converter in the cli.
+# If you name it 'de_abc' you'll be able to run `fiboa convert de_abc` in the cli.
+
+from ..convert_utils import convert as convert_
+
+# File to read the data from
+# Can read any tabular data format that GeoPandas can read through read_file()
+# Supported protcols: HTTP(S), GCS, S3, or the local file system
+
+# Local URI added to the repository for initial conversion, Original Source https://beta.source.coop/esa/fusion-competition/
+URI = "/home/byteboogie/fieldscapes/germany/fs_de_bb.gpkg"
+
+# Unique identifier for the collection
+ID = "fs_de_bb"
+# Title of the collection
+TITLE = "Field boundaries for Germany, Brandenburg"
+# Description of the collection. Can be multiline and include CommonMark.
+DESCRIPTION = """ The dataset contains field boundaries for the German state of Brandenburg."""
+# Bounding box of the data in WGS84 coordinates
+BBOX = [13.635334610075107, 52.41814553442972, 14.35270427904761, 52.849468757681805]
+
+# Provider name, can be None if not applicable, must be provided if PROVIDER_URL is provided
+PROVIDER_NAME = "ESA"
+# URL to the homepage of the data or the provider, can be None if not applicable
+PROVIDER_URL = "https://beta.source.coop/esa/fusion-competition/"
+# Attribution, can be None if not applicable
+ATTRIBUTION = "© GeoBasis-DE/LGB"
+
+# License of the data, either
+# 1. a SPDX license identifier (including "dl-de/by-2-0" / "dl-de/zero-2-0"), or
+LICENSE = "DL-DE->BY-2.0"
+# 2. a STAC Link Object with relation type "license"
+# LICENSE = {"title": "CC-BY-4.0", "href": "https://creativecommons.org/licenses/by/4.0/", "type": "text/html", "rel": "license"}
+
+# Map original column names to fiboa property names
+# You also need to list any column that you may have added in the MIGRATION function (see below).
+COLUMNS = {
+    'fid': 'id',
+    'grid_id': 'grid_id',
+    "SHAPE_AREA": "area",
+    "SHAPE_LEN": "perimeter",
+    'geometry': 'geometry',
+    'crop_id': 'crop_id',
+    'crop_name': 'crop_name'
+}
+
+# Add columns with constant values.
+# The key is the column name, the value is a constant value that's used for all rows.
+ADD_COLUMNS = {
+    "determination_datetime": "2018-01-01T00:00:00Z"
+}
+
+# A list of implemented extension identifiers
+EXTENSIONS = []
+
+# Functions to migrate data in columns to match the fiboa specification.
+# Example: You have a column area_m in square meters and want to convert
+# to hectares as required for the area field in fiboa.
+# Function signature:
+#   func(column: pd.Series) -> pd.Series
+COLUMN_MIGRATIONS = {
+    
+}
+
+# Filter columns to only include the ones that are relevant for the collection,
+# e.g. only rows that contain the word "agriculture" but not "forest" in the column "land_cover_type".
+# Lamda function accepts a Pandas Series and returns a Series or a Tuple with a Series and True to inverse the mask.
+COLUMN_FILTERS = {
+    
+}
+
+# Custom function to migrate the GeoDataFrame if the other options are not sufficient
+# This should be the last resort!
+# Function signature:
+#   func(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame
+MIGRATION = None
+
+# Schemas for the fields that are not defined in fiboa
+# Keys must be the values from the COLUMNS dict, not the keys
+MISSING_SCHEMAS = {
+    "required": ["grid_id"], # i.e. non-nullable properties
+    "properties": {
+        "grid_id": {
+            "type": "string"
+        }
+    }
+}
+
+
+# Conversion function, usually no changes required
+def convert(output_file, cache_file = None, source_coop_url = None, collection = False, compression = None):
+    """
+    Converts the field boundary datasets to fiboa.
+
+    For reference, this is the order in which the conversion steps are applied:
+    0. Read GeoDataFrame from file
+    1. Run global migration (if provided through MIGRATION)
+    2. Run filters to remove rows that shall not be in the final data
+       (if provided through COLUMN_FILTERS)
+    3. Add columns with constant values
+    4. Run column migrations (if provided through COLUMN_MIGRATIONS)
+    5. Duplicate columns (if an array is provided as the value in COLUMNS)
+    6. Rename columns (as provided in COLUMNS)
+    7. Remove columns (if column is not present as value in COLUMNS)
+    8. Create the collection
+    9. Change data types of the columns based on the provided schemas
+    (fiboa spec, extensions, and MISSING_SCHEMAS)
+    10. Write the data to the Parquet file
+
+    Parameters:
+    output_file (str): Path where the Parquet file shall be stored.
+    cache_file (str): Path to a cached file of the data. Default: None.
+                      Can be used to avoid repetitive downloads from the original data source.
+    source_coop_url (str): URL to the (future) Source Cooperative repository. Default: None
+    collection (bool): Additionally, store the collection separate from Parquet file. Default: False
+    compression (str): Compression method for the Parquet file. Default: zstd
+    kwargs: Additional keyword arguments for GeoPanda's read_file() or read_parquet() function.
+    """
+    convert_(
+        output_file,
+        cache_file,
+        URI,
+        COLUMNS,
+        ID,
+        TITLE,
+        DESCRIPTION,
+        BBOX,
+        provider_name=PROVIDER_NAME,
+        provider_url=PROVIDER_URL,
+        source_coop_url=source_coop_url,
+        extensions=EXTENSIONS,
+        missing_schemas=MISSING_SCHEMAS,
+        column_additions=ADD_COLUMNS,
+        column_migrations=COLUMN_MIGRATIONS,
+        column_filters=COLUMN_FILTERS,
+        migration=MIGRATION,
+        attribution=ATTRIBUTION,
+        store_collection=collection,
+        license=LICENSE,
+        compression=compression,
+    )
\ No newline at end of file
diff --git a/fiboa_cli/datasets/fs_za_ct.py b/fiboa_cli/datasets/fs_za_ct.py
new file mode 100644
index 0000000..569906b
--- /dev/null
+++ b/fiboa_cli/datasets/fs_za_ct.py
@@ -0,0 +1,144 @@
+# TEMPLATE FOR A FIBOA CONVERTER
+#
+# Copy this file and rename it to something sensible.
+# The name of the file will be the name of the converter in the cli.
+# If you name it 'de_abc' you'll be able to run `fiboa convert de_abc` in the cli.
+
+from ..convert_utils import convert as convert_
+
+# File to read the data from
+# Can read any tabular data format that GeoPandas can read through read_file()
+# Supported protcols: HTTP(S), GCS, S3, or the local file system
+
+# Local URI added to the repository for initial conversion, Original Source https://beta.source.coop/esa/fusion-competition/
+URI = "/home/byteboogie/fieldscapes/south_africa/fs_za_ct.gpkg"
+
+# Unique identifier for the collection
+ID = "fs_za_ct"
+# Title of the collection
+TITLE = "Field boundaries for Cape Town, South Africa"
+# Description of the collection. Can be multiline and include CommonMark.
+DESCRIPTION = """ The dataset contains field boundaries for the Cape Town, South Africa."""
+# Bounding box of the data in WGS84 coordinates
+BBOX = [20.521492384730347, -34.39922362572791, 21.04341451023305, -33.980506187460875]
+
+# Provider name, can be None if not applicable, must be provided if PROVIDER_URL is provided
+PROVIDER_NAME = "Planet, Radiant Earth Foundation, Western Cape Department of Agriculture, & German Aerospace Center (DLR)"
+# URL to the homepage of the data or the provider, can be None if not applicable
+PROVIDER_URL = "https://beta.source.coop/esa/fusion-competition/"
+# Attribution, can be None if not applicable
+ATTRIBUTION = "ESA Fusion Competition"
+
+# License of the data, either
+# 1. a SPDX license identifier (including "dl-de/by-2-0" / "dl-de/zero-2-0"), or
+LICENSE = "CC BY-NC-SA 4.0"
+# 2. a STAC Link Object with relation type "license"
+# LICENSE = {"title": "CC-BY-4.0", "href": "https://creativecommons.org/licenses/by/4.0/", "type": "text/html", "rel": "license"}
+
+# Map original column names to fiboa property names
+# You also need to list any column that you may have added in the MIGRATION function (see below).
+COLUMNS = {
+    'fid': 'id',
+    'grid_id': 'grid_id',
+    "SHAPE_AREA": "area",
+    "SHAPE_LEN": "perimeter",
+    'geometry': 'geometry',
+    'crop_id': 'crop_id',
+    'crop_name': 'crop_name'
+}
+
+# Add columns with constant values.
+# The key is the column name, the value is a constant value that's used for all rows.
+ADD_COLUMNS = {
+    "determination_datetime": "2021-01-01T00:00:00Z"
+}
+
+# A list of implemented extension identifiers
+EXTENSIONS = []
+
+# Functions to migrate data in columns to match the fiboa specification.
+# Example: You have a column area_m in square meters and want to convert
+# to hectares as required for the area field in fiboa.
+# Function signature:
+#   func(column: pd.Series) -> pd.Series
+COLUMN_MIGRATIONS = {
+    "area_m": lambda column: column * 0.0001
+}
+
+# Filter columns to only include the ones that are relevant for the collection,
+# e.g. only rows that contain the word "agriculture" but not "forest" in the column "land_cover_type".
+# Lamda function accepts a Pandas Series and returns a Series or a Tuple with a Series and True to inverse the mask.
+COLUMN_FILTERS = {
+    
+}
+
+# Custom function to migrate the GeoDataFrame if the other options are not sufficient
+# This should be the last resort!
+# Function signature:
+#   func(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame
+MIGRATION = None
+
+# Schemas for the fields that are not defined in fiboa
+# Keys must be the values from the COLUMNS dict, not the keys
+MISSING_SCHEMAS = {
+    "required": ["grid_id"], # i.e. non-nullable properties
+    "properties": {
+        "grid_id": {
+            "type": "string"
+        }
+    }
+}
+
+
+# Conversion function, usually no changes required
+def convert(output_file, cache_file = None, source_coop_url = None, collection = False, compression = None):
+    """
+    Converts the field boundary datasets to fiboa.
+
+    For reference, this is the order in which the conversion steps are applied:
+    0. Read GeoDataFrame from file
+    1. Run global migration (if provided through MIGRATION)
+    2. Run filters to remove rows that shall not be in the final data
+       (if provided through COLUMN_FILTERS)
+    3. Add columns with constant values
+    4. Run column migrations (if provided through COLUMN_MIGRATIONS)
+    5. Duplicate columns (if an array is provided as the value in COLUMNS)
+    6. Rename columns (as provided in COLUMNS)
+    7. Remove columns (if column is not present as value in COLUMNS)
+    8. Create the collection
+    9. Change data types of the columns based on the provided schemas
+    (fiboa spec, extensions, and MISSING_SCHEMAS)
+    10. Write the data to the Parquet file
+
+    Parameters:
+    output_file (str): Path where the Parquet file shall be stored.
+    cache_file (str): Path to a cached file of the data. Default: None.
+                      Can be used to avoid repetitive downloads from the original data source.
+    source_coop_url (str): URL to the (future) Source Cooperative repository. Default: None
+    collection (bool): Additionally, store the collection separate from Parquet file. Default: False
+    compression (str): Compression method for the Parquet file. Default: zstd
+    kwargs: Additional keyword arguments for GeoPanda's read_file() or read_parquet() function.
+    """
+    convert_(
+        output_file,
+        cache_file,
+        URI,
+        COLUMNS,
+        ID,
+        TITLE,
+        DESCRIPTION,
+        BBOX,
+        provider_name=PROVIDER_NAME,
+        provider_url=PROVIDER_URL,
+        source_coop_url=source_coop_url,
+        extensions=EXTENSIONS,
+        missing_schemas=MISSING_SCHEMAS,
+        column_additions=ADD_COLUMNS,
+        column_migrations=COLUMN_MIGRATIONS,
+        column_filters=COLUMN_FILTERS,
+        migration=MIGRATION,
+        attribution=ATTRIBUTION,
+        store_collection=collection,
+        license=LICENSE,
+        compression=compression,
+    )
\ No newline at end of file

From 3c014a85ded2e2f51a0896b85443f9674adaa947 Mon Sep 17 00:00:00 2001
From: Aninda Ghosh <anindaghosh63@gmail.com>
Date: Thu, 13 Jun 2024 17:10:53 -0700
Subject: [PATCH 2/4] Updated Converters for Germnay and South Africa
 (Fieldscapes Subset)

---
 .../datasets/fieldscapes_germany_2021.py      | 138 ++++++++++++++++++
 .../datasets/fieldscapes_southafrica_2021.py  | 138 ++++++++++++++++++
 2 files changed, 276 insertions(+)
 create mode 100644 fiboa_cli/datasets/fieldscapes_germany_2021.py
 create mode 100644 fiboa_cli/datasets/fieldscapes_southafrica_2021.py

diff --git a/fiboa_cli/datasets/fieldscapes_germany_2021.py b/fiboa_cli/datasets/fieldscapes_germany_2021.py
new file mode 100644
index 0000000..ccc9327
--- /dev/null
+++ b/fiboa_cli/datasets/fieldscapes_germany_2021.py
@@ -0,0 +1,138 @@
+# TEMPLATE FOR A FIBOA CONVERTER
+#
+# Copy this file and rename it to something sensible.
+# The name of the file will be the name of the converter in the cli.
+# If you name it 'de_abc' you'll be able to run `fiboa convert de_abc` in the cli.
+
+from ..convert_utils import convert as convert_
+
+# File to read the data from
+# Can read any tabular data format that GeoPandas can read through read_file()
+# Supported protcols: HTTP(S), GCS, S3, or the local file system
+
+# Local URI added to the repository for initial conversion, Original Source https://beta.source.coop/esa/fusion-competition/
+URI = "/home/byteboogie/work/labwork_hkerner/fieldscapes/germany/boundaries_germany_2021.gpkg"
+
+# Unique identifier for the collection
+ID = "fieldscapes_germany_2021"
+# Title of the collection
+TITLE = "Field boundaries for Germany, Brandenburg (Fieldscapes)"
+# Description of the collection. Can be multiline and include CommonMark.
+DESCRIPTION = """ The dataset contains field boundaries for the German state of Brandenburg."""
+# Bounding box of the data in WGS84 coordinates
+BBOX = [13.635334610075107, 52.41814553442972, 14.35270427904761, 52.849468757681805]
+
+# Provider name, can be None if not applicable, must be provided if PROVIDER_URL is provided
+PROVIDER_NAME = "ESA"
+# URL to the homepage of the data or the provider, can be None if not applicable
+PROVIDER_URL = "https://beta.source.coop/esa/fusion-competition/"
+# Attribution, can be None if not applicable
+ATTRIBUTION = "© GeoBasis-DE/LGB"
+
+# License of the data, either
+# 1. a SPDX license identifier (including "dl-de/by-2-0" / "dl-de/zero-2-0"), or
+LICENSE = "dl-de/by-2-0"
+# 2. a STAC Link Object with relation type "license"
+# LICENSE = {"title": "CC-BY-4.0", "href": "https://creativecommons.org/licenses/by/4.0/", "type": "text/html", "rel": "license"}
+
+# Map original column names to fiboa property names
+# You also need to list any column that you may have added in the MIGRATION function (see below).
+COLUMNS = {
+    "id": "id",
+    "SHAPE_AREA": "area",
+    "SHAPE_LEN": "perimeter",
+    "geometry": "geometry",
+    "crop_id": "crop_id",
+    "crop_name": "crop_name"
+}
+
+# Add columns with constant values.
+# The key is the column name, the value is a constant value that's used for all rows.
+ADD_COLUMNS = {
+    "determination_datetime": "2021-01-01T00:00:00Z"
+}
+
+# A list of implemented extension identifiers
+EXTENSIONS = []
+
+# Functions to migrate data in columns to match the fiboa specification.
+# Example: You have a column area_m in square meters and want to convert
+# to hectares as required for the area field in fiboa.
+# Function signature:
+#   func(column: pd.Series) -> pd.Series
+COLUMN_MIGRATIONS = {
+    
+}
+
+# Filter columns to only include the ones that are relevant for the collection,
+# e.g. only rows that contain the word "agriculture" but not "forest" in the column "land_cover_type".
+# Lamda function accepts a Pandas Series and returns a Series or a Tuple with a Series and True to inverse the mask.
+COLUMN_FILTERS = {
+    
+}
+
+# Custom function to migrate the GeoDataFrame if the other options are not sufficient
+# This should be the last resort!
+# Function signature:
+#   func(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame
+MIGRATION = None
+
+# Schemas for the fields that are not defined in fiboa
+# Keys must be the values from the COLUMNS dict, not the keys
+MISSING_SCHEMAS = {
+
+}
+
+
+# Conversion function, usually no changes required
+def convert(output_file, cache_file = None, source_coop_url = None, collection = False, compression = None):
+    """
+    Converts the field boundary datasets to fiboa.
+
+    For reference, this is the order in which the conversion steps are applied:
+    0. Read GeoDataFrame from file
+    1. Run global migration (if provided through MIGRATION)
+    2. Run filters to remove rows that shall not be in the final data
+       (if provided through COLUMN_FILTERS)
+    3. Add columns with constant values
+    4. Run column migrations (if provided through COLUMN_MIGRATIONS)
+    5. Duplicate columns (if an array is provided as the value in COLUMNS)
+    6. Rename columns (as provided in COLUMNS)
+    7. Remove columns (if column is not present as value in COLUMNS)
+    8. Create the collection
+    9. Change data types of the columns based on the provided schemas
+    (fiboa spec, extensions, and MISSING_SCHEMAS)
+    10. Write the data to the Parquet file
+
+    Parameters:
+    output_file (str): Path where the Parquet file shall be stored.
+    cache_file (str): Path to a cached file of the data. Default: None.
+                      Can be used to avoid repetitive downloads from the original data source.
+    source_coop_url (str): URL to the (future) Source Cooperative repository. Default: None
+    collection (bool): Additionally, store the collection separate from Parquet file. Default: False
+    compression (str): Compression method for the Parquet file. Default: zstd
+    kwargs: Additional keyword arguments for GeoPanda's read_file() or read_parquet() function.
+    """
+    convert_(
+        output_file,
+        cache_file,
+        URI,
+        COLUMNS,
+        ID,
+        TITLE,
+        DESCRIPTION,
+        BBOX,
+        provider_name=PROVIDER_NAME,
+        provider_url=PROVIDER_URL,
+        source_coop_url=source_coop_url,
+        extensions=EXTENSIONS,
+        missing_schemas=MISSING_SCHEMAS,
+        column_additions=ADD_COLUMNS,
+        column_migrations=COLUMN_MIGRATIONS,
+        column_filters=COLUMN_FILTERS,
+        migration=MIGRATION,
+        attribution=ATTRIBUTION,
+        store_collection=collection,
+        license=LICENSE,
+        compression=compression,
+    )
\ No newline at end of file
diff --git a/fiboa_cli/datasets/fieldscapes_southafrica_2021.py b/fiboa_cli/datasets/fieldscapes_southafrica_2021.py
new file mode 100644
index 0000000..21cfb8f
--- /dev/null
+++ b/fiboa_cli/datasets/fieldscapes_southafrica_2021.py
@@ -0,0 +1,138 @@
+# TEMPLATE FOR A FIBOA CONVERTER
+#
+# Copy this file and rename it to something sensible.
+# The name of the file will be the name of the converter in the cli.
+# If you name it 'de_abc' you'll be able to run `fiboa convert de_abc` in the cli.
+
+from ..convert_utils import convert as convert_
+
+# File to read the data from
+# Can read any tabular data format that GeoPandas can read through read_file()
+# Supported protcols: HTTP(S), GCS, S3, or the local file system
+
+# Local URI added to the repository for initial conversion, Original Source https://beta.source.coop/esa/fusion-competition/
+URI = "/home/byteboogie/work/labwork_hkerner/fieldscapes/southafrica/boundaries_southafrica_2021.gpkg"
+
+# Unique identifier for the collection
+ID = "boundaries_southafrica_2021"
+# Title of the collection
+TITLE = "Field boundaries for Cape Town, South Africa"
+# Description of the collection. Can be multiline and include CommonMark.
+DESCRIPTION = """ The dataset contains field boundaries for the Cape Town, South Africa."""
+# Bounding box of the data in WGS84 coordinates
+BBOX = [20.521492384730347, -34.39922362572791, 21.04341451023305, -33.980506187460875]
+
+# Provider name, can be None if not applicable, must be provided if PROVIDER_URL is provided
+PROVIDER_NAME = "Planet, Radiant Earth Foundation, Western Cape Department of Agriculture, & German Aerospace Center (DLR)"
+# URL to the homepage of the data or the provider, can be None if not applicable
+PROVIDER_URL = "https://beta.source.coop/esa/fusion-competition/"
+# Attribution, can be None if not applicable
+ATTRIBUTION = "ESA Fusion Competition"
+
+# License of the data, either
+# 1. a SPDX license identifier (including "dl-de/by-2-0" / "dl-de/zero-2-0"), or
+LICENSE = "CC-BY-NC-SA-4.0"
+# 2. a STAC Link Object with relation type "license"
+# LICENSE = {"title": "CC-BY-4.0", "href": "https://creativecommons.org/licenses/by/4.0/", "type": "text/html", "rel": "license"}
+
+# Map original column names to fiboa property names
+# You also need to list any column that you may have added in the MIGRATION function (see below).
+COLUMNS = {
+    "id": "id",
+    "SHAPE_AREA": "area",
+    "SHAPE_LEN": "perimeter",
+    "geometry": "geometry",
+    "crop_id": "crop_id",
+    "crop_name": "crop_name"
+}
+
+# Add columns with constant values.
+# The key is the column name, the value is a constant value that's used for all rows.
+ADD_COLUMNS = {
+    "determination_datetime": "2021-01-01T00:00:00Z"
+}
+
+# A list of implemented extension identifiers
+EXTENSIONS = []
+
+# Functions to migrate data in columns to match the fiboa specification.
+# Example: You have a column area_m in square meters and want to convert
+# to hectares as required for the area field in fiboa.
+# Function signature:
+#   func(column: pd.Series) -> pd.Series
+COLUMN_MIGRATIONS = {
+
+}
+
+# Filter columns to only include the ones that are relevant for the collection,
+# e.g. only rows that contain the word "agriculture" but not "forest" in the column "land_cover_type".
+# Lamda function accepts a Pandas Series and returns a Series or a Tuple with a Series and True to inverse the mask.
+COLUMN_FILTERS = {
+    
+}
+
+# Custom function to migrate the GeoDataFrame if the other options are not sufficient
+# This should be the last resort!
+# Function signature:
+#   func(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame
+MIGRATION = None
+
+# Schemas for the fields that are not defined in fiboa
+# Keys must be the values from the COLUMNS dict, not the keys
+MISSING_SCHEMAS = {
+
+}
+
+
+# Conversion function, usually no changes required
+def convert(output_file, cache_file = None, source_coop_url = None, collection = False, compression = None):
+    """
+    Converts the field boundary datasets to fiboa.
+
+    For reference, this is the order in which the conversion steps are applied:
+    0. Read GeoDataFrame from file
+    1. Run global migration (if provided through MIGRATION)
+    2. Run filters to remove rows that shall not be in the final data
+       (if provided through COLUMN_FILTERS)
+    3. Add columns with constant values
+    4. Run column migrations (if provided through COLUMN_MIGRATIONS)
+    5. Duplicate columns (if an array is provided as the value in COLUMNS)
+    6. Rename columns (as provided in COLUMNS)
+    7. Remove columns (if column is not present as value in COLUMNS)
+    8. Create the collection
+    9. Change data types of the columns based on the provided schemas
+    (fiboa spec, extensions, and MISSING_SCHEMAS)
+    10. Write the data to the Parquet file
+
+    Parameters:
+    output_file (str): Path where the Parquet file shall be stored.
+    cache_file (str): Path to a cached file of the data. Default: None.
+                      Can be used to avoid repetitive downloads from the original data source.
+    source_coop_url (str): URL to the (future) Source Cooperative repository. Default: None
+    collection (bool): Additionally, store the collection separate from Parquet file. Default: False
+    compression (str): Compression method for the Parquet file. Default: zstd
+    kwargs: Additional keyword arguments for GeoPanda's read_file() or read_parquet() function.
+    """
+    convert_(
+        output_file,
+        cache_file,
+        URI,
+        COLUMNS,
+        ID,
+        TITLE,
+        DESCRIPTION,
+        BBOX,
+        provider_name=PROVIDER_NAME,
+        provider_url=PROVIDER_URL,
+        source_coop_url=source_coop_url,
+        extensions=EXTENSIONS,
+        missing_schemas=MISSING_SCHEMAS,
+        column_additions=ADD_COLUMNS,
+        column_migrations=COLUMN_MIGRATIONS,
+        column_filters=COLUMN_FILTERS,
+        migration=MIGRATION,
+        attribution=ATTRIBUTION,
+        store_collection=collection,
+        license=LICENSE,
+        compression=compression,
+    )
\ No newline at end of file

From 58d22465791a00a605b7d58991431198d89e1841 Mon Sep 17 00:00:00 2001
From: Aninda Ghosh <anindaghosh63@gmail.com>
Date: Thu, 13 Jun 2024 17:11:18 -0700
Subject: [PATCH 3/4] Removed older converters

---
 fiboa_cli/datasets/fs_de_bb.py | 144 ---------------------------------
 fiboa_cli/datasets/fs_za_ct.py | 144 ---------------------------------
 2 files changed, 288 deletions(-)
 delete mode 100644 fiboa_cli/datasets/fs_de_bb.py
 delete mode 100644 fiboa_cli/datasets/fs_za_ct.py

diff --git a/fiboa_cli/datasets/fs_de_bb.py b/fiboa_cli/datasets/fs_de_bb.py
deleted file mode 100644
index adf5bbc..0000000
--- a/fiboa_cli/datasets/fs_de_bb.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# TEMPLATE FOR A FIBOA CONVERTER
-#
-# Copy this file and rename it to something sensible.
-# The name of the file will be the name of the converter in the cli.
-# If you name it 'de_abc' you'll be able to run `fiboa convert de_abc` in the cli.
-
-from ..convert_utils import convert as convert_
-
-# File to read the data from
-# Can read any tabular data format that GeoPandas can read through read_file()
-# Supported protcols: HTTP(S), GCS, S3, or the local file system
-
-# Local URI added to the repository for initial conversion, Original Source https://beta.source.coop/esa/fusion-competition/
-URI = "/home/byteboogie/fieldscapes/germany/fs_de_bb.gpkg"
-
-# Unique identifier for the collection
-ID = "fs_de_bb"
-# Title of the collection
-TITLE = "Field boundaries for Germany, Brandenburg"
-# Description of the collection. Can be multiline and include CommonMark.
-DESCRIPTION = """ The dataset contains field boundaries for the German state of Brandenburg."""
-# Bounding box of the data in WGS84 coordinates
-BBOX = [13.635334610075107, 52.41814553442972, 14.35270427904761, 52.849468757681805]
-
-# Provider name, can be None if not applicable, must be provided if PROVIDER_URL is provided
-PROVIDER_NAME = "ESA"
-# URL to the homepage of the data or the provider, can be None if not applicable
-PROVIDER_URL = "https://beta.source.coop/esa/fusion-competition/"
-# Attribution, can be None if not applicable
-ATTRIBUTION = "© GeoBasis-DE/LGB"
-
-# License of the data, either
-# 1. a SPDX license identifier (including "dl-de/by-2-0" / "dl-de/zero-2-0"), or
-LICENSE = "DL-DE->BY-2.0"
-# 2. a STAC Link Object with relation type "license"
-# LICENSE = {"title": "CC-BY-4.0", "href": "https://creativecommons.org/licenses/by/4.0/", "type": "text/html", "rel": "license"}
-
-# Map original column names to fiboa property names
-# You also need to list any column that you may have added in the MIGRATION function (see below).
-COLUMNS = {
-    'fid': 'id',
-    'grid_id': 'grid_id',
-    "SHAPE_AREA": "area",
-    "SHAPE_LEN": "perimeter",
-    'geometry': 'geometry',
-    'crop_id': 'crop_id',
-    'crop_name': 'crop_name'
-}
-
-# Add columns with constant values.
-# The key is the column name, the value is a constant value that's used for all rows.
-ADD_COLUMNS = {
-    "determination_datetime": "2018-01-01T00:00:00Z"
-}
-
-# A list of implemented extension identifiers
-EXTENSIONS = []
-
-# Functions to migrate data in columns to match the fiboa specification.
-# Example: You have a column area_m in square meters and want to convert
-# to hectares as required for the area field in fiboa.
-# Function signature:
-#   func(column: pd.Series) -> pd.Series
-COLUMN_MIGRATIONS = {
-    
-}
-
-# Filter columns to only include the ones that are relevant for the collection,
-# e.g. only rows that contain the word "agriculture" but not "forest" in the column "land_cover_type".
-# Lamda function accepts a Pandas Series and returns a Series or a Tuple with a Series and True to inverse the mask.
-COLUMN_FILTERS = {
-    
-}
-
-# Custom function to migrate the GeoDataFrame if the other options are not sufficient
-# This should be the last resort!
-# Function signature:
-#   func(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame
-MIGRATION = None
-
-# Schemas for the fields that are not defined in fiboa
-# Keys must be the values from the COLUMNS dict, not the keys
-MISSING_SCHEMAS = {
-    "required": ["grid_id"], # i.e. non-nullable properties
-    "properties": {
-        "grid_id": {
-            "type": "string"
-        }
-    }
-}
-
-
-# Conversion function, usually no changes required
-def convert(output_file, cache_file = None, source_coop_url = None, collection = False, compression = None):
-    """
-    Converts the field boundary datasets to fiboa.
-
-    For reference, this is the order in which the conversion steps are applied:
-    0. Read GeoDataFrame from file
-    1. Run global migration (if provided through MIGRATION)
-    2. Run filters to remove rows that shall not be in the final data
-       (if provided through COLUMN_FILTERS)
-    3. Add columns with constant values
-    4. Run column migrations (if provided through COLUMN_MIGRATIONS)
-    5. Duplicate columns (if an array is provided as the value in COLUMNS)
-    6. Rename columns (as provided in COLUMNS)
-    7. Remove columns (if column is not present as value in COLUMNS)
-    8. Create the collection
-    9. Change data types of the columns based on the provided schemas
-    (fiboa spec, extensions, and MISSING_SCHEMAS)
-    10. Write the data to the Parquet file
-
-    Parameters:
-    output_file (str): Path where the Parquet file shall be stored.
-    cache_file (str): Path to a cached file of the data. Default: None.
-                      Can be used to avoid repetitive downloads from the original data source.
-    source_coop_url (str): URL to the (future) Source Cooperative repository. Default: None
-    collection (bool): Additionally, store the collection separate from Parquet file. Default: False
-    compression (str): Compression method for the Parquet file. Default: zstd
-    kwargs: Additional keyword arguments for GeoPanda's read_file() or read_parquet() function.
-    """
-    convert_(
-        output_file,
-        cache_file,
-        URI,
-        COLUMNS,
-        ID,
-        TITLE,
-        DESCRIPTION,
-        BBOX,
-        provider_name=PROVIDER_NAME,
-        provider_url=PROVIDER_URL,
-        source_coop_url=source_coop_url,
-        extensions=EXTENSIONS,
-        missing_schemas=MISSING_SCHEMAS,
-        column_additions=ADD_COLUMNS,
-        column_migrations=COLUMN_MIGRATIONS,
-        column_filters=COLUMN_FILTERS,
-        migration=MIGRATION,
-        attribution=ATTRIBUTION,
-        store_collection=collection,
-        license=LICENSE,
-        compression=compression,
-    )
\ No newline at end of file
diff --git a/fiboa_cli/datasets/fs_za_ct.py b/fiboa_cli/datasets/fs_za_ct.py
deleted file mode 100644
index 569906b..0000000
--- a/fiboa_cli/datasets/fs_za_ct.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# TEMPLATE FOR A FIBOA CONVERTER
-#
-# Copy this file and rename it to something sensible.
-# The name of the file will be the name of the converter in the cli.
-# If you name it 'de_abc' you'll be able to run `fiboa convert de_abc` in the cli.
-
-from ..convert_utils import convert as convert_
-
-# File to read the data from
-# Can read any tabular data format that GeoPandas can read through read_file()
-# Supported protcols: HTTP(S), GCS, S3, or the local file system
-
-# Local URI added to the repository for initial conversion, Original Source https://beta.source.coop/esa/fusion-competition/
-URI = "/home/byteboogie/fieldscapes/south_africa/fs_za_ct.gpkg"
-
-# Unique identifier for the collection
-ID = "fs_za_ct"
-# Title of the collection
-TITLE = "Field boundaries for Cape Town, South Africa"
-# Description of the collection. Can be multiline and include CommonMark.
-DESCRIPTION = """ The dataset contains field boundaries for the Cape Town, South Africa."""
-# Bounding box of the data in WGS84 coordinates
-BBOX = [20.521492384730347, -34.39922362572791, 21.04341451023305, -33.980506187460875]
-
-# Provider name, can be None if not applicable, must be provided if PROVIDER_URL is provided
-PROVIDER_NAME = "Planet, Radiant Earth Foundation, Western Cape Department of Agriculture, & German Aerospace Center (DLR)"
-# URL to the homepage of the data or the provider, can be None if not applicable
-PROVIDER_URL = "https://beta.source.coop/esa/fusion-competition/"
-# Attribution, can be None if not applicable
-ATTRIBUTION = "ESA Fusion Competition"
-
-# License of the data, either
-# 1. a SPDX license identifier (including "dl-de/by-2-0" / "dl-de/zero-2-0"), or
-LICENSE = "CC BY-NC-SA 4.0"
-# 2. a STAC Link Object with relation type "license"
-# LICENSE = {"title": "CC-BY-4.0", "href": "https://creativecommons.org/licenses/by/4.0/", "type": "text/html", "rel": "license"}
-
-# Map original column names to fiboa property names
-# You also need to list any column that you may have added in the MIGRATION function (see below).
-COLUMNS = {
-    'fid': 'id',
-    'grid_id': 'grid_id',
-    "SHAPE_AREA": "area",
-    "SHAPE_LEN": "perimeter",
-    'geometry': 'geometry',
-    'crop_id': 'crop_id',
-    'crop_name': 'crop_name'
-}
-
-# Add columns with constant values.
-# The key is the column name, the value is a constant value that's used for all rows.
-ADD_COLUMNS = {
-    "determination_datetime": "2021-01-01T00:00:00Z"
-}
-
-# A list of implemented extension identifiers
-EXTENSIONS = []
-
-# Functions to migrate data in columns to match the fiboa specification.
-# Example: You have a column area_m in square meters and want to convert
-# to hectares as required for the area field in fiboa.
-# Function signature:
-#   func(column: pd.Series) -> pd.Series
-COLUMN_MIGRATIONS = {
-    "area_m": lambda column: column * 0.0001
-}
-
-# Filter columns to only include the ones that are relevant for the collection,
-# e.g. only rows that contain the word "agriculture" but not "forest" in the column "land_cover_type".
-# Lamda function accepts a Pandas Series and returns a Series or a Tuple with a Series and True to inverse the mask.
-COLUMN_FILTERS = {
-    
-}
-
-# Custom function to migrate the GeoDataFrame if the other options are not sufficient
-# This should be the last resort!
-# Function signature:
-#   func(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame
-MIGRATION = None
-
-# Schemas for the fields that are not defined in fiboa
-# Keys must be the values from the COLUMNS dict, not the keys
-MISSING_SCHEMAS = {
-    "required": ["grid_id"], # i.e. non-nullable properties
-    "properties": {
-        "grid_id": {
-            "type": "string"
-        }
-    }
-}
-
-
-# Conversion function, usually no changes required
-def convert(output_file, cache_file = None, source_coop_url = None, collection = False, compression = None):
-    """
-    Converts the field boundary datasets to fiboa.
-
-    For reference, this is the order in which the conversion steps are applied:
-    0. Read GeoDataFrame from file
-    1. Run global migration (if provided through MIGRATION)
-    2. Run filters to remove rows that shall not be in the final data
-       (if provided through COLUMN_FILTERS)
-    3. Add columns with constant values
-    4. Run column migrations (if provided through COLUMN_MIGRATIONS)
-    5. Duplicate columns (if an array is provided as the value in COLUMNS)
-    6. Rename columns (as provided in COLUMNS)
-    7. Remove columns (if column is not present as value in COLUMNS)
-    8. Create the collection
-    9. Change data types of the columns based on the provided schemas
-    (fiboa spec, extensions, and MISSING_SCHEMAS)
-    10. Write the data to the Parquet file
-
-    Parameters:
-    output_file (str): Path where the Parquet file shall be stored.
-    cache_file (str): Path to a cached file of the data. Default: None.
-                      Can be used to avoid repetitive downloads from the original data source.
-    source_coop_url (str): URL to the (future) Source Cooperative repository. Default: None
-    collection (bool): Additionally, store the collection separate from Parquet file. Default: False
-    compression (str): Compression method for the Parquet file. Default: zstd
-    kwargs: Additional keyword arguments for GeoPanda's read_file() or read_parquet() function.
-    """
-    convert_(
-        output_file,
-        cache_file,
-        URI,
-        COLUMNS,
-        ID,
-        TITLE,
-        DESCRIPTION,
-        BBOX,
-        provider_name=PROVIDER_NAME,
-        provider_url=PROVIDER_URL,
-        source_coop_url=source_coop_url,
-        extensions=EXTENSIONS,
-        missing_schemas=MISSING_SCHEMAS,
-        column_additions=ADD_COLUMNS,
-        column_migrations=COLUMN_MIGRATIONS,
-        column_filters=COLUMN_FILTERS,
-        migration=MIGRATION,
-        attribution=ATTRIBUTION,
-        store_collection=collection,
-        license=LICENSE,
-        compression=compression,
-    )
\ No newline at end of file

From 52b744b14c6b2dd381a373db91888ad444c82083 Mon Sep 17 00:00:00 2001
From: Aninda Ghosh <anindaghosh63@gmail.com>
Date: Mon, 8 Jul 2024 16:03:28 -0700
Subject: [PATCH 4/4] Updated Years

---
 ...germany_2021.py => fieldscapes_germany.py} | 19 ++++++++++++++-----
 ...021.py => fieldscapes_southafrica_2018.py} | 16 ++++++++++++----
 2 files changed, 26 insertions(+), 9 deletions(-)
 rename fiboa_cli/datasets/{fieldscapes_germany_2021.py => fieldscapes_germany.py} (93%)
 rename fiboa_cli/datasets/{fieldscapes_southafrica_2021.py => fieldscapes_southafrica_2018.py} (94%)

diff --git a/fiboa_cli/datasets/fieldscapes_germany_2021.py b/fiboa_cli/datasets/fieldscapes_germany.py
similarity index 93%
rename from fiboa_cli/datasets/fieldscapes_germany_2021.py
rename to fiboa_cli/datasets/fieldscapes_germany.py
index ccc9327..9b03266 100644
--- a/fiboa_cli/datasets/fieldscapes_germany_2021.py
+++ b/fiboa_cli/datasets/fieldscapes_germany.py
@@ -11,10 +11,10 @@
 # Supported protcols: HTTP(S), GCS, S3, or the local file system
 
 # Local URI added to the repository for initial conversion, Original Source https://beta.source.coop/esa/fusion-competition/
-URI = "/home/byteboogie/work/labwork_hkerner/fieldscapes/germany/boundaries_germany_2021.gpkg"
+URI = "/home/byteboogie/work/labwork_hkerner/fieldscapes/germany/boundaries_germany.gpkg"
 
 # Unique identifier for the collection
-ID = "fieldscapes_germany_2021"
+ID = "fieldscapes_germany"
 # Title of the collection
 TITLE = "Field boundaries for Germany, Brandenburg (Fieldscapes)"
 # Description of the collection. Can be multiline and include CommonMark.
@@ -43,13 +43,14 @@
     "SHAPE_LEN": "perimeter",
     "geometry": "geometry",
     "crop_id": "crop_id",
-    "crop_name": "crop_name"
+    "crop_name": "crop_name",
+    "determination_datetime": "determination_datetime"
 }
 
 # Add columns with constant values.
 # The key is the column name, the value is a constant value that's used for all rows.
 ADD_COLUMNS = {
-    "determination_datetime": "2021-01-01T00:00:00Z"
+
 }
 
 # A list of implemented extension identifiers
@@ -80,7 +81,15 @@
 # Schemas for the fields that are not defined in fiboa
 # Keys must be the values from the COLUMNS dict, not the keys
 MISSING_SCHEMAS = {
-
+    "required": [ "crop_id", "crop_name" ], # i.e. non-nullable properties
+    "properties": {
+        "crop_id": {
+            "type": "int64"
+        },
+        "crop_name": {
+            "type": "string"
+        }
+    }
 }
 
 
diff --git a/fiboa_cli/datasets/fieldscapes_southafrica_2021.py b/fiboa_cli/datasets/fieldscapes_southafrica_2018.py
similarity index 94%
rename from fiboa_cli/datasets/fieldscapes_southafrica_2021.py
rename to fiboa_cli/datasets/fieldscapes_southafrica_2018.py
index 21cfb8f..68ad866 100644
--- a/fiboa_cli/datasets/fieldscapes_southafrica_2021.py
+++ b/fiboa_cli/datasets/fieldscapes_southafrica_2018.py
@@ -11,10 +11,10 @@
 # Supported protcols: HTTP(S), GCS, S3, or the local file system
 
 # Local URI added to the repository for initial conversion, Original Source https://beta.source.coop/esa/fusion-competition/
-URI = "/home/byteboogie/work/labwork_hkerner/fieldscapes/southafrica/boundaries_southafrica_2021.gpkg"
+URI = "/home/byteboogie/work/labwork_hkerner/fieldscapes/southafrica/boundaries_southafrica_2018.gpkg"
 
 # Unique identifier for the collection
-ID = "boundaries_southafrica_2021"
+ID = "boundaries_southafrica_2018"
 # Title of the collection
 TITLE = "Field boundaries for Cape Town, South Africa"
 # Description of the collection. Can be multiline and include CommonMark.
@@ -49,7 +49,7 @@
 # Add columns with constant values.
 # The key is the column name, the value is a constant value that's used for all rows.
 ADD_COLUMNS = {
-    "determination_datetime": "2021-01-01T00:00:00Z"
+    "determination_datetime": "2018-03-31T00:00:00Z"
 }
 
 # A list of implemented extension identifiers
@@ -80,7 +80,15 @@
 # Schemas for the fields that are not defined in fiboa
 # Keys must be the values from the COLUMNS dict, not the keys
 MISSING_SCHEMAS = {
-
+    "required": [ "crop_id", "crop_name" ], # i.e. non-nullable properties
+    "properties": {
+        "crop_id": {
+            "type": "int64"
+        },
+        "crop_name": {
+            "type": "string"
+        }
+    }
 }