Skip to content

Commit

Permalink
Merge pull request #40 from EcoExtreML/fapar-lai
Browse files Browse the repository at this point in the history
Add fapar-lai dataset
  • Loading branch information
BSchilperoort authored Nov 17, 2023
2 parents 9d24eed + 0ab8f0a commit 24781ea
Show file tree
Hide file tree
Showing 14 changed files with 517 additions and 0 deletions.
5 changes: 5 additions & 0 deletions docs/available_datasets.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,8 @@ You can add these yourself by creating a pull request, or open an issue to reque
- `land_cover`

For more information, see [their webpage](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover).

=== FAPAR Leaf Area Index
- `leaf_area_index`

For more info see [their webpage](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-lai-fapar).
4 changes: 4 additions & 0 deletions src/zampy/conventions/ALMA.json
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@
"variable": "CO2air",
"units": "kilogram/kilogram"
},
"leaf_area_index": {
"variable": "LAI",
"units": "fraction"
},
"land_cover": {
"variable": "land_cover",
"units": ""
Expand Down
2 changes: 2 additions & 0 deletions src/zampy/datasets/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from zampy.datasets.era5 import ERA5
from zampy.datasets.era5 import ERA5Land
from zampy.datasets.eth_canopy_height import EthCanopyHeight
from zampy.datasets.fapar_lai import FaparLAI
from zampy.datasets.land_cover import LandCover
from zampy.datasets.prism_dem import PrismDEM30
from zampy.datasets.prism_dem import PrismDEM90
Expand All @@ -18,5 +19,6 @@
"eth_canopy_height": EthCanopyHeight,
"prism_dem_30": PrismDEM30,
"prism_dem_90": PrismDEM90,
"fapar_lai": FaparLAI,
"land_cover": LandCover,
}
1 change: 1 addition & 0 deletions src/zampy/datasets/cds_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
SERVER_API = {
"era5": "cdsapi",
"era5-land": "cdsapi",
"fapar-lai": "cdsapi",
"cams": "adsapi",
"land-cover": "cdsapi",
}
Expand Down
292 changes: 292 additions & 0 deletions src/zampy/datasets/fapar_lai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,292 @@
"""Implementation of the FAPAR LAI dataset."""

import shutil
import tempfile
import zipfile
from pathlib import Path
from typing import Any
import cdsapi
import numpy as np
import pandas as pd
import xarray as xr
import xarray_regrid
from tqdm import tqdm
from zampy.datasets import cds_utils
from zampy.datasets import converter
from zampy.datasets import validation
from zampy.datasets.dataset_protocol import SpatialBounds
from zampy.datasets.dataset_protocol import TimeBounds
from zampy.datasets.dataset_protocol import Variable
from zampy.datasets.dataset_protocol import copy_properties_file
from zampy.datasets.dataset_protocol import write_properties_file
from zampy.reference.variables import VARIABLE_REFERENCE_LOOKUP
from zampy.reference.variables import unit_registry


## Ignore missing class/method docstrings: they are implemented in the Dataset class.
# ruff: noqa: D102


class FaparLAI: # noqa: D101
name = "fapar-lai"
time_bounds = TimeBounds(np.datetime64("1998-04-10"), np.datetime64("2020-06-30"))
spatial_bounds = SpatialBounds(90, 180, -90, -180)
crs = "EPSG:4326"

raw_variables = [Variable(name="lai", unit=unit_registry.fraction)]
variable_names = ["leaf_area_index"]
variables = [VARIABLE_REFERENCE_LOOKUP[var] for var in variable_names]

license = "non-standard"
license_url = (
"https://cds.climate.copernicus.eu/api/v2/terms/static/vito-proba-v.pdf"
)

bib = """
@misc{https://doi.org/10.24381/cds.7e59b01a,
doi = {10.24381/CDS.7E59B01A},
url = {https://cds.climate.copernicus.eu/doi/10.24381/cds.7e59b01a},
author = {{Copernicus Climate Change Service}},
title = {Leaf area index and fraction absorbed of photosynthetically active radiation 10-daily gridded data from 1981 to present},
publisher = {ECMWF},
year = {2018}
}
""" # noqa: E501

def __init__(self) -> None:
"""Init."""
pass

def download(
self,
download_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
variable_names: list[str],
overwrite: bool = False,
) -> bool:
validation.validate_download_request(
self,
download_dir,
time_bounds,
spatial_bounds,
variable_names,
)

download_folder = download_dir / self.name
download_folder.mkdir(parents=True, exist_ok=True)

url, api_key = cds_utils.cds_api_key("fapar-lai")

client = cdsapi.Client(
url=url,
key=api_key,
verify=True,
quiet=True,
)

years_months = get_year_month_pairs(time_bounds)
for year, month in tqdm(years_months):
print(f"Downloading FAPAR LAI data for {year}-{month}...")
download_fapar_lai(
client, year, month, download_folder, spatial_bounds, overwrite
)

write_properties_file(
download_folder, spatial_bounds, time_bounds, variable_names
)

return True

def ingest(
self,
download_dir: Path,
ingest_dir: Path,
overwrite: bool = False,
) -> bool:
download_folder = download_dir / self.name
ingest_folder = ingest_dir / self.name
ingest_folder.mkdir(parents=True, exist_ok=True)

zip_file_pattern = "*.zip"
zip_files = sorted(download_folder.glob(zip_file_pattern))

# Create tmp dir. See https://github.com/EcoExtreML/zampy/issues/36
# TODO: Fix in a nice way.
tmp_path = download_dir.parent / "tmp"
tmp_path.mkdir(parents=True, exist_ok=True)

# netCDF files follow CF-1.6, only unpacking the archives is required.
for file in zip_files:
with tempfile.TemporaryDirectory(dir=tmp_path) as _tmpdir:
tmpdir = Path(_tmpdir)

extract_fapar_zip(
zip_file=file,
ingest_folder=ingest_folder,
extract_dir=tmpdir,
overwrite=overwrite,
)

for ncfile in tmpdir.glob("*.nc"):
ingest_ncfile(ncfile, ingest_folder)

copy_properties_file(download_folder, ingest_folder)

return True

def load(
self,
ingest_dir: Path,
time_bounds: TimeBounds,
spatial_bounds: SpatialBounds,
resolution: float,
regrid_method: str, # should be deprecated.
variable_names: list[str],
) -> xr.Dataset:
files = list((ingest_dir / self.name).glob("*.nc"))

ds = xr.open_mfdataset(files, parallel=True)
ds = ds.sel(time=slice(time_bounds.start, time_bounds.end))

target_dataset = create_regridding_ds(spatial_bounds, resolution)
ds = ds.regrid.linear(target_dataset)

return ds

def convert( # Will be removed, see issue #43.
self,
ingest_dir: Path,
convention: str | Path,
) -> bool:
converter.check_convention(convention)
ingest_folder = ingest_dir / self.name

data_file_pattern = f"{self.name}_*.nc"

data_files = list(ingest_folder.glob(data_file_pattern))

for file in data_files:
# start conversion process
print(f"Start processing file `{file.name}`.")
ds = xr.open_dataset(file, chunks={"latitude": 2000, "longitude": 2000})
ds = converter.convert(ds, dataset=self, convention=convention)

return True


def create_regridding_ds(
spatial_bounds: SpatialBounds, resolution: float
) -> xr.Dataset:
"""Create dataset to use with xarray-regrid regridding.
Args:
spatial_bounds: Spatial bounds of the new dataset.
resolution: Latitude and longitude resolution of the new dataset.
Returns:
The dataset ready to be used in regridding.
"""
new_grid = xarray_regrid.Grid(
north=spatial_bounds.north,
east=spatial_bounds.east,
south=spatial_bounds.south,
west=spatial_bounds.west,
resolution_lat=resolution,
resolution_lon=resolution,
)
return xarray_regrid.create_regridding_dataset(new_grid)


def get_year_month_pairs(time_bounds: TimeBounds) -> list[tuple[int, int]]:
"""Get the year and month pairs covering the input time bounds."""
start = pd.to_datetime(time_bounds.start)
end = pd.to_datetime(time_bounds.end)

year_month = []
for year in range(start.year, end.year + 1):
for month in range(1, 13):
before_first_month = year == start.year and month < start.month
after_last_month = year == end.year and month > end.month
if not before_first_month and not after_last_month:
year_month.append((year, month))

return year_month


def get_lai_days(year: int, month: int) -> list[str]:
"""Get the days that the FAPAR LAI is available for a certain year and month."""
if month in [1, 3, 5, 7, 8, 10, 12]:
return ["10", "20", "31"]
if month == 2:
if year in [2000, 2004, 2008, 2012, 2016, 2020]:
return ["10", "20", "29"]
else:
return ["10", "20", "28"]
return ["10", "20", "30"]


def download_fapar_lai(
client: cdsapi.Client,
year: int,
month: int,
download_dir: Path,
spatial_bounds: SpatialBounds | None = None,
overwrite: bool = False,
) -> None:
"""Download the FAPAR LAI data from CDS."""
request: dict[str, Any] = {
"format": "zip",
"variable": "lai",
"horizontal_resolution": "1km",
"product_version": "V3",
"satellite": "spot" if year < 2014 else "proba",
"sensor": "vgt",
"month": f"{month:0>2}",
"nominal_day": get_lai_days(year, month),
"year": f"{year}",
}

if spatial_bounds is not None:
request["area"] = [
spatial_bounds.north,
spatial_bounds.west,
spatial_bounds.south,
spatial_bounds.east,
]

fpath = download_dir / f"satellite-lai-fapar_{year}-{month}.zip"

retrieval = client.retrieve("satellite-lai-fapar", request)

cds_utils._check_and_download(retrieval, fpath, overwrite)


def ingest_ncfile(ncfile: Path, ingest_folder: Path) -> None:
"""Ingest the 'raw' netCDF file to the Zampy standard format."""
print(f"Converting file {ncfile.name}...")
ds = xr.open_dataset(ncfile, decode_times=False, chunks={"lat": 5000, "lon": 5000})
ds = ds.rename(
{
"LAI": "leaf_area_index",
"lat": "latitude",
"lon": "longitude",
}
)
ds["leaf_area_index"].attrs["units"] = "fraction"
ds[["leaf_area_index"]].to_netcdf(
path=ingest_folder / ncfile.name,
encoding={"leaf_area_index": {"zlib": True, "complevel": 3}},
)


def extract_fapar_zip(
zip_file: Path, ingest_folder: Path, extract_dir: Path, overwrite: bool
) -> None:
"""Extract the FAPAR LAI zip file, if not all files already exist."""
with zipfile.ZipFile(zip_file, "r") as zf:
file_list = zf.namelist()
if all((ingest_folder / fname).exists() for fname in file_list) and not overwrite:
print(f"Files '{file_list}' already exist, skipping...")
else:
shutil.unpack_archive(zip_file, extract_dir=extract_dir, format="zip")
1 change: 1 addition & 0 deletions src/zampy/reference/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def unit_registration() -> UnitRegistry:
Variable("latitude", unit=unit_registry.degree_north),
Variable("longitude", unit=unit_registry.degree_east),
Variable("elevation", unit=unit_registry.meter),
Variable("leaf_area_index", unit=unit_registry.fraction),
Variable("land_cover", unit=unit_registry.dimensionless),
)

Expand Down
Empty file.
Binary file not shown.
Loading

0 comments on commit 24781ea

Please sign in to comment.