From 7ad91da3dcb18ebdbd3b0dc4508189ed1accfeed Mon Sep 17 00:00:00 2001 From: Yang Date: Mon, 11 Dec 2023 15:51:33 +0100 Subject: [PATCH] Improve readme and documentation (#45) * restructure readme * restructure mkdocs * add contributing doc * please linter * add readme for demo notebooks * add code of conduct * add readme for developer * add an empty changelog * fix link to test recipe * Apply suggestions from code review Co-authored-by: Bart Schilperoort * address more comments from Bart * adress more comments from Bart * fix mkdocs content tabs --------- Co-authored-by: Bart Schilperoort --- CITATION.cff | 30 ++++++ README.md | 120 +++++++++++++++++------- demo/README.md | 11 +++ demo/cams_co2_dataset_demo.ipynb | 4 +- demo/era5-land_dataset_demo.ipynb | 4 +- demo/era5_dataset_demo.ipynb | 4 +- demo/eth_dataset_demo.ipynb | 4 +- demo/land_cover_dataset_demo.ipynb | 2 +- demo/prism_dem_demo.ipynb | 16 ++-- docs/CHANGELOG.md | 13 +++ docs/CODE_OF_CONDUCT.md | 71 ++++++++++++++ docs/CONTRIBUTING.md | 43 +++++++++ docs/README.dev.md | 107 +++++++++++++++++++++ docs/available_datasets.md | 11 ++- docs/configuration.md | 54 +++++++++++ docs/index.md | 74 ++++++++++++++- docs/using_zampy.md | 64 ------------- mkdocs.yml | 9 ++ src/zampy/datasets/eth_canopy_height.py | 2 +- src/zampy/datasets/prism_dem.py | 2 +- 20 files changed, 518 insertions(+), 127 deletions(-) create mode 100644 CITATION.cff create mode 100644 demo/README.md create mode 100644 docs/CHANGELOG.md create mode 100644 docs/CODE_OF_CONDUCT.md create mode 100644 docs/CONTRIBUTING.md create mode 100644 docs/README.dev.md create mode 100644 docs/configuration.md delete mode 100644 docs/using_zampy.md diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..e19c4c2 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,30 @@ +# YAML 1.2 +--- +cff-version: "1.1.0" +title: "zampy" +authors: + - + affiliation: "Netherlands eScience Center" + family-names: Schilperoort + given-names: Bart + orcid: "https://orcid.org/0000-0003-4487-9822" + - + affiliation: "Netherlands eScience Center" + family-names: Alidoost + given-names: Sarah + orcid: "https://orcid.org/0000-0001-8407-6472" + - + affiliation: "Netherlands eScience Center" + family-names: Liu + given-names: Yang + orcid: "https://orcid.org/0000-0002-1966-8460" + +date-released: +doi: +version: "0.0.1" +repository-code: "https://github.com/EcoExtreML/zampy" +keywords: + - data preparation + - land surface modelling +message: "If you use this software, please cite it using these metadata." +license: Apache-2.0 \ No newline at end of file diff --git a/README.md b/README.md index f714470..ab57d83 100644 --- a/README.md +++ b/README.md @@ -1,64 +1,112 @@ # zampy -Tool for downloading Land Surface Model input data +Tool for downloading Land Surface Model (LSM) input data. + +Named after *Zam*; [the Avestan language term for the Zoroastrian concept of "earth"](https://en.wikipedia.org/wiki/Zam). [![github license badge](https://img.shields.io/github/license/EcoExtreML/zampy)](https://github.com/EcoExtreML/zampy) +[![Documentation Status](https://readthedocs.org/projects/zampy/badge/?version=latest)](https://zampy.readthedocs.io/en/latest/?badge=latest) [![build](https://github.com/EcoExtreML/zampy/actions/workflows/build.yml/badge.svg)](https://github.com/EcoExtreML/zampy/actions/workflows/build.yml) [![workflow scc badge](https://sonarcloud.io/api/project_badges/measure?project=EcoExtreML_zampy&metric=coverage)](https://sonarcloud.io/dashboard?id=EcoExtreML_zampy) -## Tool outline: +## Outline +`zampy` is designed to retrieve data for LSM model input. It can help you prepare the data within the following steps: +1. **Download** the data for the specified location(s) / geographical area. +2. **Ingest** data into unified (zampy) format. +3. **Load** the variables in a standardized way (standardized names & standardized units). +4. **Convert** the data to standard formats: + - ALMA / PLUMBER2's ALMA formatted netCDF. + - *CMOR formatted netCDF*. + +(Note: items in *italic* will not be worked on for now/low priority, but we want to allow space for these in the future.) + +## Getting start + +### Installation +[![workflow pypi badge](https://img.shields.io/pypi/v/zampy.svg?colorB=blue)](https://pypi.python.org/project/zampy/) +[![supported python versions](https://img.shields.io/pypi/pyversions/zampy)](https://pypi.python.org/project/zampy/) + +To install the latest release of `zampy`, do: +```console +python3 -m pip install zampy +``` + +To install the in-development version from the GitHub repository, do: - - Goal is to retrieve data for LSM model input. - 1. First **download** the data for the specified location(s) / geographical area. - 2. Be able to **load** the variables in a standardized way (standardized names & standardized units). - 3. **Output** the data to standard formats: - - ALMA / PLUMBER2's ALMA formatted netCDF. - - *CMOR formatted netCDF*. - - User-interaction should go through recipes. For example, see [springtime](https://github.com/phenology/springtime/blob/main/tests/recipes/daymet.yaml). - - Recipes define: - - data folder (where data should be downloaded to) - - time extent. - - spatial location / bounding box. - - datasets to be used - - variables within datasets - - Load recipes using Pydantic ([for example](https://github.com/phenology/springtime/blob/main/src/springtime/datasets/daymet.py)). - - Support both a CLI & Python API. +```console +python3 -m pip install git+https://github.com/EcoExtreML/zampy.git +``` -Note: items in *italic* will not be worked on for now/low priority, but we want to allow space for these in the future. +### Configuration +`Zampy` needs to be configured with a simple configuration file. -## Instructions for CDS datasets (e.g. ERA5) -To download the following datasets, users need access to CDS via cdsapi: +You need to create this file under your user home directory: -- ERA5 -- ERA5 land -- LAI -- land cover +`~/.config/zampy/zampy_config.yml` -First, you need to be a registered user on *CDS* via the [registration page](https://cds.climate.copernicus.eu/user/register?destination=%2F%23!%2Fhome). +The configuration file should contain the `working_directory`, for instance: +```yaml +working_directory: /path_to_a_working_directory/ #for example: /home/bart/Zampy +``` -Before submitting any request with `zampy`, please configure your zampy config file `zampy_config.yml` and put your `cdsapi` credentials in `zampy_config.yml`. Here is a short [instruction](https://cds.climate.copernicus.eu/api-how-to) about how to find your CDS API key. You can skip the steps related to `.cdsapirc` and instead create `zampy_config.yml` file under your _*user home*_/.config directory i.e. `~/.config/zampy/zampy_config.yml`. Then, add the key as: +If you need access to data on CDS or ADS server, you should add your CDS or ADS credentials to `zampy_config.yml`: ```yaml cdsapi: url: # for example https://cds.climate.copernicus.eu/api/v2 key: # for example 12345:xhashd-232jcsha-dsaj429-cdjajd29319 +adsapi: + url: # for example https://ads.atmosphere.copernicus.eu/api/v2 + key: # for example 12345:xhashd-232jcsha-dsaj429-cdjajd29319 ``` -## Instructions for ADS datasets (e.g. CAMS EGG4) -To download the following datasets, users need access to ADS via cdsapi: +About how to create CDS or ADS credentials, check the section below. -- CAMS EGG4 (e.g. co2) +### How to use `zampy` -First, you need to be a registered user on *ADS* via the [registration page](https://ads.atmosphere.copernicus.eu/user/register?destination=%2F%23!%2Fhome). +We recommend our users to use `zampy` with recipes. -Before submitting any request with `zampy`, please configure your zampy config file `zampy_config.yml` and put your `cdsapi` credentials in `zampy_config.yml`. Here is a short [instruction](https://cds.climate.copernicus.eu/api-how-to) about how to find your CDS API key. You can skip the steps related to `.cdsapirc` and instead create `zampy_config.yml` file under your _*user home*_/.config directory i.e. `~/.config/zampy/zampy_config.yml`. Then, add the key as: +A "recipe" is a file with yml extension, it defines: +- data downloading + - time extent. + - spatial location / bounding box. + - datasets to be downloaded + - variables within datasets +- data conversion + - convert to desired [conventions](./src/zampy/conventions/) + - output frequency + - output resolution -```yaml -adsapi: - url: # for example https://ads.atmosphere.copernicus.eu/api/v2 - key: # for example 12345:xhashd-232jcsha-dsaj429-cdjajd29319 +A sample recipe can be found in the [documentation](https://zampy.readthedocs.io/en/latest/#how-to-use-zampy). + +When you have your reciped created and saved on your disk, you can execute your recipe by running the following code in your shell: + +```py +zampy /path_to_recipe/sample_recipe.yml ``` +We also provide python API for you to intereact with `zampy`. You can find the example notebooks for each supported dataset [here](./demo/). + +## Instructions for CDS/ADS datasets + +To download the following datasets, users need access to CDS/ADS via `cdsapi`/`adsapi`: +- CDS + - ERA5 + - ERA5 land + - LAI + - land cover +- ADS + - CAMS EGG4 (e.g. co2) + +To generate these API keys, you need to be a registered user on *CDS* via the [registration page](https://cds.climate.copernicus.eu/user/register?destination=%2F%23!%2Fhome), or on *ADS* via the [registration page](https://ads.atmosphere.copernicus.eu/user/register?destination=%2F%23!%2Fhome). + +Before submitting any request with `zampy`, please put your `cdsapi`/`adsapi` credentials in `zampy_config.yml`. Here is a short [instruction](https://cds.climate.copernicus.eu/api-how-to) about how to find your CDS/ADS API key. You can skip the steps related to `.cdsapirc` and simply add the key to `zampy_config.yml`. + ### Agree to the Terms of Use on CDS/ADS -When downloading a dataset for the first time, it is **necessary to agree to the Terms of Use of every datasets that you intend to download**. This can only be done via the CDS website. When you try to download these datasets, you will be prompted to go to the terms of use and accept them. \ No newline at end of file +When downloading a dataset for the first time, it is **necessary to agree to the Terms of Use of every datasets that you intend to download**. This can only be done via the CDS/ADS website. When you try to download these datasets, you will be prompted to go to the terms of use and accept them. + + +## Acknowledgements + +This package was developed by the Netherlands eScience Center. Development was supported by the Netherlands eScience Center under grant number NLESC.ASDI.2020.026. \ No newline at end of file diff --git a/demo/README.md b/demo/README.md new file mode 100644 index 0000000..16dd118 --- /dev/null +++ b/demo/README.md @@ -0,0 +1,11 @@ +# Using `zampy` in notebooks + +It is possible to intereact with `zampy` via Python APIs. Here are some demo notebooks showing how to use `zampy` inside a Jupyter notebook environment for each supported dataset: + + - [ERA5](./era5_dataset_demo.ipynb) + - [ERA5-land](./era5-land_dataset_demo.ipynb) + - [ETH canopy height](./eth_dataset_demo.ipynb) + - [PRISM DEM](./prism_dem_demo.ipynb) + - [CAMS EGG4](./cams_co2_dataset_demo.ipynb) + - [Land cover classification gridded maps](./land_cover_dataset_demo.ipynb) + \ No newline at end of file diff --git a/demo/cams_co2_dataset_demo.ipynb b/demo/cams_co2_dataset_demo.ipynb index 5854e1a..b1394a6 100644 --- a/demo/cams_co2_dataset_demo.ipynb +++ b/demo/cams_co2_dataset_demo.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "### Handle CAMS EGG4 dataset with Zampy\n", - "Demo notebook for developers." + "Demo notebook for users and developers." ] }, { @@ -35,7 +35,7 @@ "metadata": {}, "outputs": [], "source": [ - "work_dir = Path(\"/home/yangliu/EcoExtreML/temp\")\n", + "work_dir = Path(\"/path_to_work_directory\")\n", "download_dir = work_dir / \"download\"\n", "ingest_dir = work_dir / \"ingest\"\n", "times = TimeBounds(np.datetime64(\"2003-01-02\"), np.datetime64(\"2003-01-31\"))\n", diff --git a/demo/era5-land_dataset_demo.ipynb b/demo/era5-land_dataset_demo.ipynb index c8d8323..357b015 100644 --- a/demo/era5-land_dataset_demo.ipynb +++ b/demo/era5-land_dataset_demo.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "### Handle ERA5 land dataset with Zampy\n", - "Demo notebook for developers." + "Demo notebook for users and developers." ] }, { @@ -35,7 +35,7 @@ "metadata": {}, "outputs": [], "source": [ - "work_dir = Path(\"/home/yangliu/EcoExtreML/temp\")\n", + "work_dir = Path(\"/path_to_work_directory\")\n", "download_dir = work_dir / \"download\"\n", "ingest_dir = work_dir / \"ingest\"\n", "times = TimeBounds(np.datetime64(\"2010-01-01T00:00:00\"), np.datetime64(\"2010-01-31T23:00:00\"))\n", diff --git a/demo/era5_dataset_demo.ipynb b/demo/era5_dataset_demo.ipynb index b068e8b..a31f621 100644 --- a/demo/era5_dataset_demo.ipynb +++ b/demo/era5_dataset_demo.ipynb @@ -5,7 +5,7 @@ "metadata": {}, "source": [ "### Handle ERA5 dataset with Zampy\n", - "Demo notebook for developers." + "Demo notebook for users and developers." ] }, { @@ -35,7 +35,7 @@ "metadata": {}, "outputs": [], "source": [ - "work_dir = Path(\"/home/yangliu/EcoExtreML/temp\")\n", + "work_dir = Path(\"/path_to_work_directory\")\n", "download_dir = work_dir / \"download\"\n", "ingest_dir = work_dir / \"ingest\"\n", "times = TimeBounds(np.datetime64(\"2010-01-01T00:00:00\"), np.datetime64(\"2010-01-31T23:00:00\"))\n", diff --git a/demo/eth_dataset_demo.ipynb b/demo/eth_dataset_demo.ipynb index d7c1c38..3865491 100644 --- a/demo/eth_dataset_demo.ipynb +++ b/demo/eth_dataset_demo.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "source": [ "### Handle ETH Canopy Height dataset with Zampy\n", - "Demo notebook for developers." + "Demo notebook for users and developers." ] }, { @@ -28,7 +28,7 @@ "from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds\n", "from pathlib import Path\n", "\n", - "work_dir = Path(\"/home/bart/Zampy\")\n", + "work_dir = Path(\"/path_to_work_directory\")\n", "download_dir = work_dir / \"download\"\n", "ingest_dir = work_dir / \"ingest\"\n", "times = TimeBounds(np.datetime64(\"2020-01-01\"), np.datetime64(\"2020-12-31\"))\n", diff --git a/demo/land_cover_dataset_demo.ipynb b/demo/land_cover_dataset_demo.ipynb index fc6533f..3862943 100644 --- a/demo/land_cover_dataset_demo.ipynb +++ b/demo/land_cover_dataset_demo.ipynb @@ -35,7 +35,7 @@ "metadata": {}, "outputs": [], "source": [ - "work_dir = Path(\"/home/yangliu/EcoExtreML/temp\")\n", + "work_dir = Path(\"/path_to_work_directory\")\n", "download_dir = work_dir / \"download\"\n", "ingest_dir = work_dir / \"ingest\"\n", "times = TimeBounds(np.datetime64(\"2010-01-01T00:00:00\"), np.datetime64(\"2011-01-31T23:00:00\"))\n", diff --git a/demo/prism_dem_demo.ipynb b/demo/prism_dem_demo.ipynb index 13e47c3..c2f1f44 100644 --- a/demo/prism_dem_demo.ipynb +++ b/demo/prism_dem_demo.ipynb @@ -6,15 +6,11 @@ "metadata": {}, "source": [ "### Download the Prism DEM dataset with Zampy\n", - "Demo notebook for developers.\n", + "Demo notebook for users and developers.\n", "\n", - "\n", - "Note:\n", - "```\n", - " two versions of this dataset are available, a 30 m and a 90 m resolution one.\n", - " These are considered to be different datasets, and can be downloaded by using\n", - " either the PrismDEM30 or the PrismDEM90 class.\n", - "```" + "Note:
\n", + "Two versions of this dataset are available, a 30 m and a 90 m resolution one.
\n", + "These are considered to be different datasets, and can be downloaded by using either the PrismDEM30 or the PrismDEM90 class." ] }, { @@ -36,7 +32,7 @@ "from zampy.datasets.dataset_protocol import TimeBounds, SpatialBounds\n", "from pathlib import Path\n", "\n", - "work_dir = Path(\"/home/bart/Zampy\")\n", + "work_dir = Path(\"/path_to_work_directory\")\n", "download_dir = work_dir / \"download\"\n", "ingest_dir = work_dir / \"ingest\"\n", "times = TimeBounds(np.datetime64(\"2020-01-01\"), np.datetime64(\"2020-12-31\"))\n", @@ -653,7 +649,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.0" }, "orig_nbformat": 4 }, diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md new file mode 100644 index 0000000..34bf39f --- /dev/null +++ b/docs/CHANGELOG.md @@ -0,0 +1,13 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added: + +### Removed: + +### Changed: diff --git a/docs/CODE_OF_CONDUCT.md b/docs/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..376b381 --- /dev/null +++ b/docs/CODE_OF_CONDUCT.md @@ -0,0 +1,71 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as +contributors and maintainers pledge to making participation in our project and +our community a harassment-free experience for everyone, regardless of age, body +size, disability, ethnicity, gender identity and expression, level of experience, +education, socio-economic status, nationality, personal appearance, race, +religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment +include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or + advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic + address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable +behavior and are expected to take appropriate and fair corrective action in +response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or +reject comments, commits, code, wiki edits, issues, and other contributions +that are not aligned to this Code of Conduct, or to ban temporarily or +permanently any contributor for other behaviors that they deem inappropriate, +threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces +when an individual is representing the project or its community. Examples of +representing a project or community include using an official project e-mail +address, posting via an official social media account, or acting as an appointed +representative at an online or offline event. Representation of a project may be +further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported by contacting the project team at f.alidoost@esciencecenter.nl. All +complaints will be reviewed and investigated and will result in a response that +is deemed necessary and appropriate to the circumstances. The project team is +obligated to maintain confidentiality with regard to the reporter of an incident. +Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good +faith may face temporary or permanent repercussions as determined by other +members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant](https://www.contributor-covenant.org), version 1.4, +available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md new file mode 100644 index 0000000..a22e567 --- /dev/null +++ b/docs/CONTRIBUTING.md @@ -0,0 +1,43 @@ +# Contributing Guide + +We welcome any kind of contribution to `zampy`, from simple comment or question to a full fledged pull request. Please read and follow our [Code of Conduct](CODE_OF_CONDUCT.md). + +A contribution can be one of the following cases: + +1. you have a question; +1. you think you may have found a bug (including unexpected behavior); +1. you want to make some kind of change to the code base (e.g. to fix a bug, to add a new feature, to update documentation); +1. you want to make a new release of the code base. + +The sections below outline the steps in each case. + +## You have a question + +1. use the search functionality [here](https://github.com/EcoExtreML/zampy/issues) to see if someone already filed the same issue; +2. if your issue search did not yield any relevant results, make a new issue; +3. apply the "Question" label; apply other labels when relevant. + +## You think you may have found a bug + +1. use the search functionality [here](https://github.com/EcoExtreML/zampy/issues) to see if someone already filed the same issue; +1. if your issue search did not yield any relevant results, make a new issue, making sure to provide enough information to the rest of the community to understand the cause and context of the problem. Depending on the issue, you may want to include: + - the SHA hashcode of the commit that is causing your problem; + - some identifying information (name and version number) for dependencies you're using; + - information about the operating system; +1. apply relevant labels to the newly created issue. + +## You want to make some kind of change to the code base + +If you want to make some kind of change to the code base, for instance, adding more datasets to the collection and supporting more conventions, please follow the steps below: + +1. (**important**) announce your plan to the rest of the community *before you start working*. This announcement should be in the form of a (new) issue; +1. (**important**) wait until some kind of consensus is reached about your idea being a good idea; +1. if needed, fork the repository to your own Github profile and create your own feature branch off of the latest master commit. While working on your feature branch, make sure to stay up to date with the master branch by pulling in changes, possibly from the 'upstream' repository; +1. make sure the existing tests still work by running ``pytest``; +1. add your own tests (if necessary); +1. update or expand the documentation; +1. update the `CHANGELOG.md` file with change; +1. push your feature branch to (your fork of) the `zampy` repository on GitHub; +1. create the pull request. + +In case you feel like you've made a valuable contribution, but you don't know how to write or run tests for it, or how to generate the documentation: don't let this discourage you from making the pull request; we can help you! Just go ahead and submit the pull request, but keep in mind that you might be asked to append additional commits to your pull request. diff --git a/docs/README.dev.md b/docs/README.dev.md new file mode 100644 index 0000000..6d3a444 --- /dev/null +++ b/docs/README.dev.md @@ -0,0 +1,107 @@ +# `zampy` developer documentation + +If you're looking for user documentation, go [here](index.md). + +## Development install + +```shell +# Create a virtual environment, e.g. with +python3 -m venv env + +# activate virtual environment +source env/bin/activate + +# make sure to have a recent version of pip and hatch +python3 -m pip install --upgrade pip hatch + +# (from the project root directory) +# install s2spy as an editable package +python3 -m pip install --no-cache-dir --editable . +# install development dependencies +python3 -m pip install --no-cache-dir --editable .[dev] +``` + +Afterwards check that the install directory is present in the `PATH` environment variable. + +## Running the tests + +Running tests has been configured using `hatch`, and can be started by running: + +```shell +hatch run test +``` + +The second is to use `tox`, which can be installed separately (e.g. with `pip install tox`), i.e. not necessarily inside the virtual environment you use for installing `zampy`, but then builds the necessary virtual environments itself by simply running: + +### Test coverage + +In addition to just running the tests to see if they pass, they can be used for coverage statistics, i.e. to determine how much of the package's code is actually executed during tests. +Inside the package directory, run: + +```shell +hatch run coverage +``` + +This runs tests and prints the results to the command line, as well as storing the result in a `coverage.xml` file (for analysis by, e.g. SonarCloud). + +## Running linters locally + +For linting we will use `flake8`, `black` and `isort`. We additionally use `mypy` to check the type hints. +All tools can simply be run by doing: + +# linter +```shell +hatch run lint +``` + +To easily comply with `black` and `isort`, you can also run: + +```shell +hatch run format +``` + +This will apply the `black` and `isort` formatting, and then check the code style. + + +## Generating the documentation +To generate the documentation, simply run the following command. This will also test the documentation code snippets. Note that you might need to install [`pandoc`](https://pandoc.org/) to be able to generate the documentation. + +```shell +hatch run docs:build +``` + +The documentation will be in `docs/_build/html`. + +## Versioning + +Bumping the version across all files is done with [bump-my-version](https://github.com/callowayproject/bump-my-version), e.g. + +```shell +bumpversion bump major +bumpversion bump minor +bumpversion bump patch +``` + +## Making a release + +This section describes how to make a release in 3 parts: preparation, release and validation. + +### Preparation + +1. Update the (don't forget to update links at bottom of page) +2. Verify that the information in `CITATION.cff` is correct, and that `.zenodo.json` contains equivalent data +3. Make sure the [version has been updated](#versioning). +4. Run the unit tests with `hatch run test` + +### Making the GitHub release + +Make a release and tag on GitHub.com. This will: + + - trigger Zenodo into making a snapshot of your repository and sticking a DOI on it. + - start a GitHub action that builds and uploads the new version to [PyPI](https://pypi.org/project/zampy/). + +### Validation + +After making the release, you should check that: + +- The [publishing action](https://github.com/EcoExtreML/zampy/.github/workflows/publish.yml) ran successfully, and that `pip install zampy` installs the new version. diff --git a/docs/available_datasets.md b/docs/available_datasets.md index 2d1bcaf..375b7e0 100644 --- a/docs/available_datasets.md +++ b/docs/available_datasets.md @@ -12,7 +12,7 @@ You can add these yourself by creating a pull request, or open an issue to reque Note: all hours in a day are covered and all days for the given month are included for downloading. - Fore more information, see [the ECMWF website](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels). + For more information, see [the ECMWF website](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-single-levels). === "ERA5-land" - `2m_temperature` @@ -20,7 +20,7 @@ You can add these yourself by creating a pull request, or open an issue to reque Note: all hours in a day are covered and all days for the given month are included for downloading. - Fore more information, see [the ECMWF website](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land). + For more information, see [the ECMWF website](https://cds.climate.copernicus.eu/cdsapp#!/dataset/reanalysis-era5-land). === "ETH canopy height" - `height_of_vegetation` @@ -40,12 +40,15 @@ You can add these yourself by creating a pull request, or open an issue to reque For more information, see [their webpage](https://ads.atmosphere.copernicus.eu/cdsapp#!/dataset/cams-global-ghg-reanalysis-egg4). -=== "Land cover classification gridded maps" +=== "Land cover classification maps" - `land_cover` For more information, see [their webpage](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-land-cover). -=== FAPAR Leaf Area Index +=== "FAPAR Leaf Area Index" - `leaf_area_index` For more info see [their webpage](https://cds.climate.copernicus.eu/cdsapp#!/dataset/satellite-lai-fapar). + + +If you want to add more datasets to the collection, or support more conventions, please check the [contributing guide](CONTRIBUTING.md) and [readme for developers](README.dev.md) before starting. diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..5455114 --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,54 @@ +## Installation +Zampy can be installed by doing: + +```console +python3 -m pip install zampy +``` + +To install the in-development version from the GitHub repository, do: + +```console +python3 -m pip install git+https://github.com/EcoExtreML/zampy.git +``` + +## Configuration +`Zampy` needs to be configured with a simple configuration file. + +You need to create this file under your user home directory: + +`~/.config/zampy/zampy_config.yml` + +The configuration file should contain the `working_directory`, for instance: +```yaml +working_directory: /path_to_a_working_directory/ #for example: /home/bart/Zampy +``` + +If you need access to data on CDS or ADS server, you should add your CDS or ADS credentials to `zampy_config.yml`: + +```yaml +cdsapi: + url: # for example https://cds.climate.copernicus.eu/api/v2 + key: # for example 12345:xhashd-232jcsha-dsaj429-cdjajd29319 +adsapi: + url: # for example https://ads.atmosphere.copernicus.eu/api/v2 + key: # for example 12345:xhashd-232jcsha-dsaj429-cdjajd29319 +``` + +## Instructions for CDS/ADS datasets + +To download the following datasets, users need access to CDS/ADS via `cdsapi`/`adsapi`: +- CDS + - ERA5 + - ERA5 land + - LAI + - land cover +- ADS + - CAMS EGG4 (e.g. co2) + +To generate these API keys, you need to be a registered user on *CDS* via the [registration page](https://cds.climate.copernicus.eu/user/register?destination=%2F%23!%2Fhome), or on *ADS* via the [registration page](https://ads.atmosphere.copernicus.eu/user/register?destination=%2F%23!%2Fhome). + +Before submitting any request with `zampy`, please put your `cdsapi`/`adsapi` credentials in `zampy_config.yml`. Here is a short [instruction](https://cds.climate.copernicus.eu/api-how-to) about how to find your CDS/ADS API key. You can skip the steps related to `.cdsapirc` and simply add the key to `zampy_config.yml`. + +### Agree to the Terms of Use on CDS/ADS + +When downloading a dataset for the first time, it is **necessary to agree to the Terms of Use of every datasets that you intend to download**. This can only be done via the CDS/ADS website. When you try to download these datasets, you will be prompted to go to the terms of use and accept them. diff --git a/docs/index.md b/docs/index.md index 9f051f5..ec3a971 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,8 +1,78 @@ # Zampy -A tool for downloading Land Surface Model input data. +A tool for downloading Land Surface Model (LSM) input data. Named after *Zam*; [the Avestan language term for the Zoroastrian concept of "earth"](https://en.wikipedia.org/wiki/Zam). +## Why using Zampy +`zampy` is designed to retrieve data for LSM model input. It can help you prepare the data within the following steps: + +1. **Download** the data for the specified location(s) / geographical area. +2. **Ingest** data into unified (zampy) format. +3. **Load** the variables in a standardized way (standardized names & standardized units). +4. **Convert** the data to standard formats: + - ALMA / PLUMBER2's ALMA formatted netCDF. + - *CMOR formatted netCDF*. + +(Note: items in *italic* will not be worked on for now/low priority, but we want to allow space for these in the future.) + ## How to use Zampy -See the section ["using Zampy"](using_zampy.md). Information on the available datasets and their variables is shown [here](available_datasets.md). + +To install and configure`zampy`, fist check [this guide](configuration.md) before continuing. Please also make sure that you have properly configured it following the guidelines. + +We recommend our users to use `zampy` with recipes. + +A "recipe" is a file with yml extension, it defines: + +- which data to download: + - the time extent + - a spatial bounding box + - the datasets to be downloaded + - the variables within each dataset +- and data conversion to the desired: + - [convention](https://github.com/EcoExtreML/zampy/tree/main/src/zampy/conventions) + - time frequency + - spatial resolution + +A sample recipe is shown below: + +```yaml +name: "test_recipe" + +download: + time: [2020-01-01, 2020-01-31] # must follow the numpy.datetime64 format. + bbox: [54, 6, 50, 3] # NESW + + datasets: + era5: + variables: + - eastward_component_of_wind + - surface_pressure + + cams: + variables: + - co2_concentration + +convert: + convention: ALMA + frequency: 1H # outputs at 1 hour frequency. Pandas-like freq-keyword. + resolution: 0.5 # output resolution in degrees. +``` + +You can specify multiple datasets and multiple variables per dataset. Information on the available datasets and their variables is shown [here](available_datasets.md). + +When you have your reciped created and saved on your disk, you can execute your recipe by running the following code in your shell: + +```py +zampy /path_to_recipe/sample_recipe.yml +``` + +### Interact with `zampy` in notebooks + +It is possible to use `zampy` directly in Python via its Python API. This is not recommended, as it is more difficult to reproduce the workflow if there is no recipe. +As it is an internal API, python code can break without warning on new versions of Zampy. +An example notebooks for each supported dataset is available [here](https://github.com/EcoExtreML/zampy/tree/main/demo). + +## Acknowledgements + +This package was developed by the Netherlands eScience Center. Development was supported by the Netherlands eScience Center under grant number NLESC.ASDI.2020.026. \ No newline at end of file diff --git a/docs/using_zampy.md b/docs/using_zampy.md deleted file mode 100644 index a12fca5..0000000 --- a/docs/using_zampy.md +++ /dev/null @@ -1,64 +0,0 @@ -# Using Zampy - -## Installing Zampy -Zampy can be installed by doing: -```bash -pip install zampy git+https://github.com/EcoExtreML/zampy -``` - -## Configuration -Zampy needs to be configured with a simple configuration file. - -You need to create this file under your -*user's home*-/.config directory: `~/.config/zampy/zampy_config.yml`, and should contain the following: - -```yaml -working_directory: /path_to_a_working_directory/ #for example: /home/bart/Zampy -``` - -If you need access to data on CDS or ADS server, you should add your CDS or ADS credentials to `zampy_config.yml`: - -```yaml -cdsapi: - url: # for example https://cds.climate.copernicus.eu/api/v2 - key: # for example 12345:xhashd-232jcsha-dsaj429-cdjajd29319 -adsapi: - url: # for example https://ads.atmosphere.copernicus.eu/api/v2 - key: # for example 12345:xhashd-232jcsha-dsaj429-cdjajd29319 -``` - -## Formulating a recipe -A "recipe" is a file with `yml` extension and has the following structure: - -```yaml -name: "test_recipe" - -download: - time: [2020-01-01, 2020-01-31] # must follow the numpy.datetime64 format. - bbox: [54, 6, 50, 3] # NESW - - datasets: - era5: - variables: - - eastward_component_of_wind - - surface_pressure - - cams: - variables: - - co2_concentration - -convert: - convention: ALMA - frequency: 1H # outputs at 1 hour frequency. Pandas-like freq-keyword. - resolution: 0.5 # output resolution in degrees. -``` - -You can specify multiple datasets and multiple variables per dataset. - -## Running a recipe -Save this recipe to disk and run the following code in your shell: - -```bash -zampy /home/username/path_to_file/simple_recipe.yml -``` - -This will execute the recipe (i.e. download, ingest, convert, resample and save the data). diff --git a/mkdocs.yml b/mkdocs.yml index 6c9fb19..58cb8a5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,4 +1,13 @@ site_name: Zampy Documentation +repo_url: https://github.com/EcoExtreML/zampy +repo_name: zampy + +nav: + - Getting started: index.md + - Configuration: configuration.md + - Available datasets: available_datasets.md + - For developers : README.dev.md + - Contributing : CONTRIBUTING.md theme: name: material diff --git a/src/zampy/datasets/eth_canopy_height.py b/src/zampy/datasets/eth_canopy_height.py index c82e686..a1cb518 100644 --- a/src/zampy/datasets/eth_canopy_height.py +++ b/src/zampy/datasets/eth_canopy_height.py @@ -202,7 +202,7 @@ def get_filenames(bounds: SpatialBounds, sd_file: bool = False) -> list[str]: def get_valid_filenames(filenames: list[str]) -> list[str]: - """Returns a new list with only the valid filenames.""" + """Return a new list with only the valid filenames.""" valid_name_file = ( Path(__file__).parent / "assets" / "h_canopy_filenames_compressed.txt.gz" ) diff --git a/src/zampy/datasets/prism_dem.py b/src/zampy/datasets/prism_dem.py index 3d37e7d..55ca59d 100644 --- a/src/zampy/datasets/prism_dem.py +++ b/src/zampy/datasets/prism_dem.py @@ -314,7 +314,7 @@ def get_archive_filenames( def get_valid_filenames(filenames: list[str]) -> list[str]: - """Returns a new list with only the valid filenames.""" + """Return a new list with only the valid filenames.""" valid_filenames = "" for valid_name_file in VALID_NAME_FILES: