Skip to content

Commit

Permalink
Landcover based sampling strategy. (#29)
Browse files Browse the repository at this point in the history
* Add landcover based sampling scripts

Closes #28

* Drop duplicates, fix typo, uncomment compute_stats function.

* Fix comment that was out of sync with code
  • Loading branch information
yellowcap authored Nov 16, 2023
1 parent 120f8b1 commit d525d59
Show file tree
Hide file tree
Showing 2 changed files with 233 additions and 0 deletions.
206 changes: 206 additions & 0 deletions scripts/landcover.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
from pathlib import Path

import click
import fiona
import geopandas
import numpy
import pandas
import rasterio
from fiona.crs import CRS
from rasterio.windows import from_bounds
from shapely.geometry import shape

WGS84 = CRS.from_epsg(4326)
NODATA = 0
WATER = 80
WATER_LOWER_TH = 0.2
WATER_UPPER_TH = 0.7
RANDOM_SEED = 42
CLASSES = {
10: "Tree cover",
20: "Shrubland",
30: "Grassland",
40: "Cropland",
50: "Built-up",
60: "Bare / sparse vegetation",
70: "Snow and Ice",
80: "Permanent water bodies",
90: "Herbaceous wetland",
95: "Mangroves",
100: "Moss and lichen",
}
SCHEMA = {
"geometry": "MultiPolygon",
"properties": {
"name": "str",
"count": "int",
"Tree cover": "int",
"Shrubland": "int",
"Grassland": "int",
"Cropland": "int",
"Built-up": "int",
"Bare / sparse vegetation": "int",
"Snow and Ice": "int",
"Permanent water bodies": "int",
"Herbaceous wetland": "int",
"Mangroves": "int",
"Moss and lichen": "int",
},
}


@click.command()
@click.option(
"--wd",
required=True,
type=str,
)
@click.option(
"--worldcover",
required=True,
type=str,
)
@click.option(
"--mgrs",
required=True,
type=str,
)
def process(wd, worldcover, mgrs):
"""
Run statistics and sampling.
"""
compute_stats(wd, worldcover, mgrs)
sample(wd)


def compute_stats(wd, worldcover, mgrs):
"""
Compute statistics of Worldcover data over MGRS tiles.
"""
result = []
with rasterio.open(worldcover) as cover:
with fiona.open(mgrs, "r") as tiles:
assert cover.crs.to_epsg() == tiles.crs.to_epsg()
for tile in tiles:
print(tile.properties.get("Name"))
# Split polygons in parts, mgrs tiles at the dateline
# are split into two parts in the mgrs source file.
parts = shape(tile["geometry"]).geoms
pixels = []
for polygon in parts:
bounds = from_bounds(*polygon.bounds, cover.transform)
pixels.append(
cover.read(
1,
window=bounds,
).ravel()
)
pixels = numpy.hstack(pixels)

pixels = pixels[pixels != NODATA]

if not pixels.size:
continue
elif numpy.all(pixels == WATER):
continue
else:
props = {}
for key, classname in CLASSES.items():
props[str(classname)] = int(numpy.sum(pixels == key))
props["name"] = tile.properties.get("Name")
props["count"] = int(len(numpy.unique(pixels)))

result.append(
{
"geometry": dict(tile["geometry"]),
"properties": props,
}
)

with fiona.open(
Path(wd, "mgrs_stats.fgb"),
"w",
driver="FlatGeobuf",
crs=WGS84,
schema=SCHEMA,
) as colxn:
colxn.writerecords(result)


def split_highest(data, column, size, pool=1000, seed=RANDOM_SEED):
"""
Split highest values of a column from a dataframe.
"""
data.sort_values(column, ascending=False, inplace=True)
return data[:pool].sample(size, random_state=seed)


def percentages(data):
"""
Normalize all numerical columns to percentages
"""
data_num = data.select_dtypes(include="number")
data_norm = data_num.div(data_num.sum(axis=1), axis=0)
data[data_norm.columns] = data_norm

return data


def sample(wd):
"""
Sample the mgrs tiles based on landcover statistics.
Target: ~1000 tiles
Set very small counts to zero. Exclude high latitudes.
200 samples from the 2000 most diverse
50 samples from the 1000 highest for all other categories except water
100 samples from all tiles with water between 30% an 70% (making sure we
capture some, but exclude only purely water so we catch coasts)
"""
data = geopandas.read_file(Path(wd, "mgrs_stats.fgb"))

data_norm = percentages(data.loc[:, data.columns != "count"])
data[data_norm.columns] = data_norm

diversity = split_highest(data, "count", 200, 2000)
urban = split_highest(data, "Built-up", 200)
wetland = split_highest(data, "Herbaceous wetland", 50)
mangroves = split_highest(data, "Mangroves", 50)
moss = split_highest(data, "Moss and lichen", 50)
cropland = split_highest(data, "Cropland", 50)
trees = split_highest(data, "Tree cover", 50)
shrubland = split_highest(data, "Shrubland", 50)
grassland = split_highest(data, "Grassland", 50)
bare = split_highest(data, "Bare / sparse vegetation", 50)
snow = split_highest(data, "Snow and Ice", 50)

selector = numpy.logical_and(
data["Permanent water bodies"] > WATER_LOWER_TH,
data["Permanent water bodies"] < WATER_UPPER_TH,
)
water = data[selector].sample(100, random_state=RANDOM_SEED)

result = pandas.concat(
[
diversity,
urban,
wetland,
mangroves,
moss,
cropland,
trees,
shrubland,
grassland,
bare,
snow,
water,
]
)

result = result.drop_duplicates(subset=["name"])

result.to_file(Path(wd, "mgrs_sample.geojson", driver="GeoJSON"))


if __name__ == "__main__":
process()
27 changes: 27 additions & 0 deletions scripts/landcover.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
wd=/datadisk

# Download Worldcover layer
aws s3 sync s3://esa-worldcover/v200/2021/map $wd/esa-worldcover-v200-2021-map --no-sign-request

# Download MGRS grid kml and convert to fgb
curl -o $wd/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml https://hls.gsfc.nasa.gov/wp-content/uploads/2016/03/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml
ogr2ogr \
-overwrite\
$wd/mgrs.fgb\
$wd/S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml \
-nlt multipolygon\
-sql "select Name, ExtractMultiPolygon(geometry) as geometry from Features"\
-dialect sqlite

# Reduce resolution and merge
counter=0
for file in $wd/esa-worldcover-v200-2021-map/*.tif; do
counter=$((counter+1))
echo "$counter $file"
gdal_translate -ovr 3 $file $wd/tmp/tmp_$counter.tif
done

gdal_merge.py -o $wd/worldcover.tif $wd/tmp/*.tif

# Intersect worldcover with mgrs and sample based on landcover
python landcover.py --wd=$wd --worldcover=$wd/worldcover.tif --mgrs=$wd/mgrs.fgb

0 comments on commit d525d59

Please sign in to comment.