Skip to content

Commit

Permalink
Converters for AI4smallholders (#83)
Browse files Browse the repository at this point in the history
* Fieldscapes converter for AI4SmallFarms

* updating date and removing calculated area

* update to call (some) source data

* renamed files

* Finalize ai4sf converters

* Merge Cambodia and Vietnam converters, add test

---------

Co-authored-by: snehalchaudhari98 <[email protected]>
Co-authored-by: Matthias Mohr <[email protected]>
  • Loading branch information
3 people authored Jul 25, 2024
1 parent d75363f commit 53962e9
Show file tree
Hide file tree
Showing 5 changed files with 162 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Converter for Planet's Automated Field Boundary (`planet_afb`)
- Converter for Portugal (`pt`)
- Converter for DigiFarm (`digifarm`)
- Converter for AI4SmallFarms in Cambodia and Vietnam (`ai4sf`)
- Further tests

### Changed
Expand Down
153 changes: 153 additions & 0 deletions fiboa_cli/datasets/ai4sf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
from ..convert_utils import convert as convert_

SOURCES = {
# Cambodia
"https://phys-techsciences.datastations.nl/api/access/datafile/100634?gbrecs=true": "2_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100282?gbrecs=true": "3_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100392?gbrecs=true": "4_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100252?gbrecs=true": "5_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100072?gbrecs=true": "6_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100169?gbrecs=true": "7_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100348?gbrecs=true": "8_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100487?gbrecs=true": "9_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100084?gbrecs=true": "10_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100155?gbrecs=true": "11_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100475?gbrecs=true": "12_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100372?gbrecs=true": "13_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100196?gbrecs=true": "14_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100006?gbrecs=true": "15_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100248?gbrecs=true": "16_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100397?gbrecs=true": "17_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100217?gbrecs=true": "18_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100652?gbrecs=true": "19_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100326?gbrecs=true": "20_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100625?gbrecs=true": "21_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100413?gbrecs=true": "33_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100593?gbrecs=true": "34_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100057?gbrecs=true": "35_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100536?gbrecs=true": "36_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100343?gbrecs=true": "37_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100711?gbrecs=true": "38_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100313?gbrecs=true": "39_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100679?gbrecs=true": "57_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100191?gbrecs=true": "58_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100023?gbrecs=true": "59_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100025?gbrecs=true": "60_cambodia_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100543?gbrecs=true": "61_cambodia_areas.gpkg",
# Vietnam
"https://phys-techsciences.datastations.nl/api/access/datafile/100297?gbrecs=true": "0_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100636?gbrecs=true": "1_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100574?gbrecs=true": "22_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100095?gbrecs=true": "23_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100398?gbrecs=true": "24_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100187?gbrecs=true": "25_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100065?gbrecs=true": "26_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100425?gbrecs=true": "27_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100589?gbrecs=true": "28_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100021?gbrecs=true": "29_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100043?gbrecs=true": "30_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100562?gbrecs=true": "31_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100437?gbrecs=true": "32_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100549?gbrecs=true": "40_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100039?gbrecs=true": "41_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100427?gbrecs=true": "42_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100466?gbrecs=true": "43_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100014?gbrecs=true": "44_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100464?gbrecs=true": "45_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100416?gbrecs=true": "46_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100115?gbrecs=true": "47_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100510?gbrecs=true": "48_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100459?gbrecs=true": "49_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100340?gbrecs=true": "50_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100119?gbrecs=true": "51_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100086?gbrecs=true": "52_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100467?gbrecs=true": "53_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100176?gbrecs=true": "54_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100145?gbrecs=true": "55_vietnam_areas.gpkg",
"https://phys-techsciences.datastations.nl/api/access/datafile/100492?gbrecs=true": "56_vietnam_areas.gpkg",
}

ID = "ai4sf"
SHORT_NAME = "Cambodia/Vietnam (AI4SmallFarms)"
TITLE = "Field boundaries for Cambodia and Vietnam (AI4SmallFarms)"
# from https://research.tudelft.nl/en/publications/ai4smallfarms-a-dataset-for-crop-field-delineation-in-southeast-a
DESCRIPTION = """
Agricultural field polygons within smallholder farming systems are essential to facilitate the collection of geo-spatial data useful for farmers, managers, and policymakers.
However, the limited availability of training labels poses a challenge in developing supervised methods to accurately delineate field boundaries using Earth Observation (EO) data.
This data set allows researchers to test and benchmark machine learning methods to delineate agricultural field boundaries in polygon format.
The large-scale data set consists of 439,001 field polygons divided into 62 tiles of approximately 5×5 km distributed across Vietnam and Cambodia, covering a range of fields and diverse landscape types.
The field polygons have been meticulously digitized from satellite images, following a rigorous multi-step quality control process and topological consistency checks.
Multi-temporal composites of Sentinel-2 (S2) images are provided to ensure cloud-free data.
"""

PROVIDERS = [
{
"name": "DATA Archiving and Networked Services (DANS)",
"url": "https://research.tudelft.nl/en/publications/ai4smallfarms-a-dataset-for-crop-field-delineation-in-southeast-a",
"roles": ["producer", "licensor"]
}
]
ATTRIBUTION = "Persello, C., Grift, J., Fan, X., Paris, C., Hansch, R., Koeva, M., & Nelson, A. (2023). AI4SmallFarms: A Dataset for Crop Field Delineation in Southeast Asian Smallholder Farms. IEEE Geoscience and Remote Sensing Letters, 20, 1-5. Article 2505705. https://doi.org/10.1109/LGRS.2023.3323095"
LICENSE = "CC-BY-4.0"

COLUMNS = {
'fiboa_id': 'id',
'id' : 'group',
'_predicate' : '_predicate',
'country': 'country',
'geometry' : 'geometry',
}

# Add columns with constant values.
# The key is the column name, the value is a constant value that's used for all rows.
ADD_COLUMNS = {
"determination_datetime": "2021-08-01T00:00:00Z",
"determination_method": "auto-imagery"
}

def migrate(gdf):
# Create unique IDs from the dataset in the form "xx_xxxxx"
gdf['fiboa_id'] = gdf['id'].astype(str).str.zfill(2) + "_" + gdf.index.astype(str).str.zfill(5)
return gdf

MIGRATION = migrate

MISSING_SCHEMAS = {
"properties": {
"group": {
"type": "uint8"
},
"group_id": {
"type": "uint16"
},
"_predicate": {
"type": "string",
"enum": ["INTERSECTS"]
},
"country": {
"type": "string",
"enum": ["cambodia", "vietnam"]
}
}
}

# Conversion function, usually no changes required
def convert(output_file, input_files = None, cache = None, source_coop_url = None, collection = False, compression = None):
convert_(
output_file,
cache,
SOURCES,
COLUMNS,
ID,
TITLE,
DESCRIPTION,
input_files=input_files,
source_coop_url=source_coop_url,
missing_schemas=MISSING_SCHEMAS,
column_additions=ADD_COLUMNS,
migration=MIGRATION,
attribution=ATTRIBUTION,
store_collection=collection,
license=LICENSE,
compression=compression,
)
Binary file not shown.
Binary file not shown.
11 changes: 8 additions & 3 deletions tests/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,17 @@ def test_converter(out_file, converter):


@mark.parametrize('args', [
['be_wa', 'tests/data-files/convert/be_wa/SIGEC_PARC_AGRI_ANON__2022_GEOPACKAGE_31370.zip|SIGEC_PARC_AGRI_ANON__2022.gpkg']
['be_wa', ['tests/data-files/convert/be_wa/SIGEC_PARC_AGRI_ANON__2022_GEOPACKAGE_31370.zip|SIGEC_PARC_AGRI_ANON__2022.gpkg']],
['ai4sf', ['tests/data-files/convert/ai4sf/1_vietnam_areas.gpkg', 'tests/data-files/convert/ai4sf/4_cambodia_areas.gpkg']]
])
def test_converter_with_input(out_file, args):
converter, input_file = args
converter, input_files = args
runner = CliRunner()
result = runner.invoke(convert, [converter, '-o', out_file.name, '-i', input_file])
args = [converter, '-o', out_file.name]
for input_file in input_files:
args.append('-i')
args.append(input_file)
result = runner.invoke(convert, args)
assert result.exit_code == 0, result.output
error = re.search('Skipped - |No schema defined', result.output)
if error:
Expand Down

0 comments on commit 53962e9

Please sign in to comment.