From 87db21f13e1c64844d8d674f732bf96373e16a6d Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Fri, 18 Feb 2022 16:04:08 +0000 Subject: [PATCH] Version datasets individually (#18) Instead of using the entire data bundle and versioning all datasets based on the module name, we can now version them individually. To do so, we split up the data archives into separate repositories and Zenodo releases of https://github.com/fatiando-data. This means that functions won't have to be repeated and updating one dataset doesn't mean copying all of the others along with it (since the collection would be new). Versions are now specified as a required `version` argument in all `fetch_*` functions. When updating a dataset, functions for the others don't need to be repeated in a new module. Also only need 2 environment variables for setting the cache location and the data source (instead of 2 per version). A downside is that we can no longer accept a variable to set custom data source URL since each dataset has a different one. The new environment variable only sets fetching from GitHub or not. There was considerable refactoring of the code to make this work. --- .github/workflows/docs.yml | 14 +- .github/workflows/test.yml | 8 +- README.rst | 10 +- doc/api/index.rst | 51 +- doc/compatibility.rst | 30 +- doc/conf.py | 4 +- doc/gallery_src/README.txt | 17 + doc/gallery_src/{v1 => }/alps-gps-velocity.py | 8 +- doc/gallery_src/{v1 => }/britain-magnetic.py | 6 +- .../{v1 => }/british-columbia-lidar.py | 4 +- .../{v1 => }/caribbean-bathymetry.py | 8 +- doc/gallery_src/{v1 => }/earth-geoid.py | 8 +- doc/gallery_src/{v1 => }/earth-gravity.py | 8 +- doc/gallery_src/{v1 => }/earth-topography.py | 8 +- .../{v1 => }/southern-africa-gravity.py | 4 +- doc/gallery_src/v1/README.txt | 22 - doc/index.rst | 8 +- doc/tutorial_src/developers.py | 56 +- doc/tutorial_src/using.py | 41 +- ensaio/__init__.py | 11 + ensaio/_fetchers.py | 546 ++++++++++++++++++ ensaio/_utils.py | 36 -- ensaio/tests/test_fetchers.py | 80 +++ ensaio/tests/test_utils.py | 47 -- ensaio/tests/test_v1.py | 34 -- ensaio/v1.py | 346 ----------- env/requirements-docs.txt | 3 +- environment.yml | 3 +- 28 files changed, 778 insertions(+), 643 deletions(-) create mode 100644 doc/gallery_src/README.txt rename doc/gallery_src/{v1 => }/alps-gps-velocity.py (95%) rename doc/gallery_src/{v1 => }/britain-magnetic.py (94%) rename doc/gallery_src/{v1 => }/british-columbia-lidar.py (95%) rename doc/gallery_src/{v1 => }/caribbean-bathymetry.py (90%) rename doc/gallery_src/{v1 => }/earth-geoid.py (87%) rename doc/gallery_src/{v1 => }/earth-gravity.py (89%) rename doc/gallery_src/{v1 => }/earth-topography.py (88%) rename doc/gallery_src/{v1 => }/southern-africa-gravity.py (96%) delete mode 100644 doc/gallery_src/v1/README.txt create mode 100644 ensaio/_fetchers.py delete mode 100644 ensaio/_utils.py create mode 100644 ensaio/tests/test_fetchers.py delete mode 100644 ensaio/tests/test_utils.py delete mode 100644 ensaio/tests/test_v1.py delete mode 100644 ensaio/v1.py diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 83f0654..dedbd7e 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -111,19 +111,19 @@ jobs: - name: Install the package run: python -m pip install dist/*.whl - - name: Cache the v1 datasets + - name: Cache the datasets if: github.event_name == 'pull_request' uses: actions/cache@v2 with: - path: ${{ runner.temp }}/cache/ensaio/v1 - key: ensaio-data-v1-${{ hashFiles('ensaio/v1.py') }} + path: ${{ runner.temp }}/cache/ensaio + key: ensaio-data-${{ hashFiles('ensaio/_fetchers.py') }} - - name: Cache the v1 sphinx-gallery runs + - name: Cache the sphinx-gallery runs if: github.event_name == 'pull_request' uses: actions/cache@v2 with: - path: doc/gallery/v1 - key: gallery-v1-${{ hashFiles('doc/gallery_src/v1/*.py') }} + path: doc/gallery/ + key: gallery-${{ hashFiles('doc/gallery_src/*.py') }} - name: Cache the tutorial sphinx-gallery runs if: github.event_name == 'pull_request' @@ -136,7 +136,7 @@ jobs: run: make -C doc all env: # Define directory where sample data will be stored - ENSAIO_V1_DATA_DIR: ${{ runner.temp }}/cache/ensaio/v1 + ENSAIO_DATA_DIR: ${{ runner.temp }}/cache/ensaio/ # Store the docs as a build artifact so we can deploy it later - name: Upload HTML documentation as an artifact diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5295fd4..54703a7 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -144,17 +144,17 @@ jobs: - name: List installed packages run: python -m pip freeze - - name: Cache the v1 datasets + - name: Cache the datasets if: matrix.cached uses: actions/cache@v2 with: - path: ${{ runner.temp }}/cache/ensaio/v1 - key: ensaio-data-v1-${{ hashFiles('ensaio/v1.py') }} + path: ${{ runner.temp }}/cache/ensaio + key: ensaio-data-${{ hashFiles('ensaio/_fetchers.py') }} - name: Run the tests run: make test env: - ENSAIO_V1_DATA_DIR: ${{ runner.temp }}/cache/ensaio/v1 + ENSAIO_DATA_DIR: ${{ runner.temp }}/cache/ensaio - name: Convert coverage report to XML for codecov run: coverage xml diff --git a/README.rst b/README.rst index 8c9b1e5..599c5e2 100644 --- a/README.rst +++ b/README.rst @@ -26,8 +26,8 @@ About **Ensaio** (Portuguese for "rehearsal") is a Python package for downloading open-access sample datasets for Geoscience. -It taps into the curated collection from `fatiando/data -`__ that is designed for use in tutorials, +It taps into the `Fatiando a Terra FAIR data collection +`__ that is designed for use in tutorials, documentation, and teaching. It uses `Pooch `__ to manage downloading and @@ -43,9 +43,9 @@ Project goals * Only download and let the user load the data. This helps make tutorials and examples more easily extended to a user's own data. * Be fully backwards compatible. We achieve this by separating **data** - versions from **Ensaio** versions. Major releases of the data get separate - modules in Ensaio: `ensaio.v1`, `ensaio.v2`, etc. Major releases of Ensaio - will be few and far between (if any). + versions from **Ensaio** versions. Data fetching functions allow you to + choose any data version that is older than the version of Ensaio that's + installed. Major releases of Ensaio will be few and far between (if any). Contacting Us ------------- diff --git a/doc/api/index.rst b/doc/api/index.rst index 240cd49..02663aa 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -3,46 +3,17 @@ List of functions and classes (API) =================================== -Functions and variables used to download the datasets and cache them locally. -Use the respective module to access the datasets in each major version of the -data release. - -.. tip:: - - The best way to use Ensaio is to ``import ensaio.v1 as ensaio`` or likewise - with other versions that are available. This way your code will continue to - work even when Ensaio updates to include newer incompatible dataset - versions. See :ref:`compatibility`. - .. automodule:: ensaio -.. currentmodule:: ensaio - -``ensaio.v1`` -------------- - -.. automodule:: ensaio.v1 - -Functions: .. autosummary:: - :toctree: generated/ - - ensaio.v1.locate - ensaio.v1.fetch_alps_gps - ensaio.v1.fetch_britain_magnetic - ensaio.v1.fetch_british_columbia_lidar - ensaio.v1.fetch_caribbean_bathymetry - ensaio.v1.fetch_earth_geoid - ensaio.v1.fetch_earth_gravity - ensaio.v1.fetch_earth_topography - ensaio.v1.fetch_southern_africa_gravity - -Module variables: - -.. autosummary:: - :toctree: generated/ - - ensaio.v1.DOI - ensaio.v1.URL - ensaio.v1.ENVIRONMENT_VARIABLE_URL - ensaio.v1.ENVIRONMENT_VARIABLE_CACHE + :toctree: generated/ + + ensaio.locate + ensaio.fetch_alps_gps + ensaio.fetch_britain_magnetic + ensaio.fetch_british_columbia_lidar + ensaio.fetch_caribbean_bathymetry + ensaio.fetch_earth_geoid + ensaio.fetch_earth_gravity + ensaio.fetch_earth_topography + ensaio.fetch_southern_africa_gravity diff --git a/doc/compatibility.rst b/doc/compatibility.rst index 15616d5..906e21c 100644 --- a/doc/compatibility.rst +++ b/doc/compatibility.rst @@ -22,20 +22,22 @@ major releases sparingly and with ample warning.** Source data releases -------------------- -New releases of Ensaio will tend to accompany releases of the source data -collection in the `fatiando/data `__ -repository. -However, the **version numbers will not necessarily match**. - -A major release of the data collection will result in a **new module being -added to Ensaio** (for example, the data release ``2.0.0`` will prompt an -Ensaio release with the ``ensaio.v2`` module added). -The ``1.*.*`` data will still be accessible through the ``ensaio.v1`` module. -The modules for previous releases will not be removed unless absolutely -necessary. - -This means that upgrading Ensaio should almost always be safe and documentation -using ``1.*.*`` data should still work after ``2.*.*`` data is released. +New releases of Ensaio will tend to accompany releases of new datasets or new +versions of existing data in the +`Fatiando a Terra Datasets `__ collection. + +Older versions of each dataset will still remain available (as much as +possible) and can be accessed by setting the ``version`` argument of the +``fetch_*`` functions accordingly. +This means that **upgrading Ensaio should almost always be safe**. +Documentation using version ``1`` of a dataset will still use the same data +(and hopefully produce the same results) after version ``2`` is included in +Ensaio. + +.. seealso:: + + See :ref:`developers` for more tips and tricks. + .. _python-versions: diff --git a/doc/conf.py b/doc/conf.py index 0264760..cb08ff1 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -79,9 +79,9 @@ # ----------------------------------------------------------------------------- sphinx_gallery_conf = { # path to your examples scripts - "examples_dirs": ["gallery_src/v1", "tutorial_src"], + "examples_dirs": ["gallery_src", "tutorial_src"], # path where to save gallery generated examples - "gallery_dirs": ["gallery/v1", "tutorial"], + "gallery_dirs": ["gallery", "tutorial"], "filename_pattern": r"\.py", # Remove the "Download all examples" button from the top level gallery "download_all_examples": False, diff --git a/doc/gallery_src/README.txt b/doc/gallery_src/README.txt new file mode 100644 index 0000000..061832a --- /dev/null +++ b/doc/gallery_src/README.txt @@ -0,0 +1,17 @@ +.. _gallery: + +Available datasets +================== + +Use the functions in the :mod:`ensaio` module to download and cache (store) +each dataset on your computer. +See the :ref:`api` for more information about each dataset, the original data +sources, and their licenses. +The datasets are prepared for use in Ensaio in the repositories of the +`Fatiando a Terra Datasets `__ GitHub +organization. + +.. tip:: + + Click on the images for examples of fetching, loading, and plotting each + dataset. diff --git a/doc/gallery_src/v1/alps-gps-velocity.py b/doc/gallery_src/alps-gps-velocity.py similarity index 95% rename from doc/gallery_src/v1/alps-gps-velocity.py rename to doc/gallery_src/alps-gps-velocity.py index 5fcb554..920b972 100644 --- a/doc/gallery_src/v1/alps-gps-velocity.py +++ b/doc/gallery_src/alps-gps-velocity.py @@ -5,8 +5,8 @@ # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # """ -Alpine 3-component GPS velocities ---------------------------------- +GPS velocities (3-component) for the Alps +----------------------------------------- This is a compilation of 3D GPS velocities for the Alps. The horizontal velocities are reference to the Eurasian frame. All velocity components and @@ -21,11 +21,11 @@ import pandas as pd import pygmt -import ensaio.v1 as ensaio +import ensaio ############################################################################### # Download and cache the data and return the path to it on disk -fname = ensaio.fetch_alps_gps() +fname = ensaio.fetch_alps_gps(version=1) print(fname) ############################################################################### diff --git a/doc/gallery_src/v1/britain-magnetic.py b/doc/gallery_src/britain-magnetic.py similarity index 94% rename from doc/gallery_src/v1/britain-magnetic.py rename to doc/gallery_src/britain-magnetic.py index a8547fa..6b13499 100644 --- a/doc/gallery_src/v1/britain-magnetic.py +++ b/doc/gallery_src/britain-magnetic.py @@ -5,7 +5,7 @@ # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # """ -Airborne magnetic survey of Britain +Magnetic airborne survey of Britain ----------------------------------- This is a digitization of an airborne magnetic survey of Britain. Data are @@ -26,11 +26,11 @@ import pandas as pd import pygmt -import ensaio.v1 as ensaio +import ensaio ############################################################################### # Download and cache the data and return the path to it on disk -fname = ensaio.fetch_britain_magnetic() +fname = ensaio.fetch_britain_magnetic(version=1) print(fname) ############################################################################### diff --git a/doc/gallery_src/v1/british-columbia-lidar.py b/doc/gallery_src/british-columbia-lidar.py similarity index 95% rename from doc/gallery_src/v1/british-columbia-lidar.py rename to doc/gallery_src/british-columbia-lidar.py index 9aab48b..0078885 100644 --- a/doc/gallery_src/v1/british-columbia-lidar.py +++ b/doc/gallery_src/british-columbia-lidar.py @@ -20,11 +20,11 @@ import pandas as pd import pygmt -import ensaio.v1 as ensaio +import ensaio ############################################################################### # Download and cache the data and return the path to it on disk -fname = ensaio.fetch_british_columbia_lidar() +fname = ensaio.fetch_british_columbia_lidar(version=1) print(fname) ############################################################################### diff --git a/doc/gallery_src/v1/caribbean-bathymetry.py b/doc/gallery_src/caribbean-bathymetry.py similarity index 90% rename from doc/gallery_src/v1/caribbean-bathymetry.py rename to doc/gallery_src/caribbean-bathymetry.py index 33184bc..3a61fe9 100644 --- a/doc/gallery_src/v1/caribbean-bathymetry.py +++ b/doc/gallery_src/caribbean-bathymetry.py @@ -5,8 +5,8 @@ # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # """ -Single-beam bathymetry of the Caribbean ---------------------------------------- +Bathymetry single-beam surveys of the Caribbean +----------------------------------------------- This dataset is a compilation of several public domain single-beam bathymetry surveys of the ocean in the Caribbean. The data display a wide range of @@ -20,11 +20,11 @@ import pandas as pd import pygmt -import ensaio.v1 as ensaio +import ensaio ############################################################################### # Download and cache the data and return the path to it on disk -fname = ensaio.fetch_caribbean_bathymetry() +fname = ensaio.fetch_caribbean_bathymetry(version=1) print(fname) ############################################################################### diff --git a/doc/gallery_src/v1/earth-geoid.py b/doc/gallery_src/earth-geoid.py similarity index 87% rename from doc/gallery_src/v1/earth-geoid.py rename to doc/gallery_src/earth-geoid.py index d3e1ae0..d9bf913 100644 --- a/doc/gallery_src/v1/earth-geoid.py +++ b/doc/gallery_src/earth-geoid.py @@ -5,8 +5,8 @@ # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # """ -Geoid height of the Earth at 10 arc-minute resolution ------------------------------------------------------ +Earth geoid height grid at 10 arc-minute resolution +--------------------------------------------------- The grid is grid-node registered and stored in netCDF with CF-compliant metadata. The geoid height is derived from the EIGEN-6C4 spherical harmonic @@ -19,11 +19,11 @@ import pygmt import xarray as xr -import ensaio.v1 as ensaio +import ensaio ############################################################################### # Download and cache the data and return the path to it on disk. -fname = ensaio.fetch_earth_geoid() +fname = ensaio.fetch_earth_geoid(version=1) print(fname) ############################################################################### diff --git a/doc/gallery_src/v1/earth-gravity.py b/doc/gallery_src/earth-gravity.py similarity index 89% rename from doc/gallery_src/v1/earth-gravity.py rename to doc/gallery_src/earth-gravity.py index c679dde..2050298 100644 --- a/doc/gallery_src/v1/earth-gravity.py +++ b/doc/gallery_src/earth-gravity.py @@ -5,8 +5,8 @@ # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # """ -Gravity of the Earth at 10 arc-minute resolution ------------------------------------------------- +Earth gravity grid at 10 arc-minute resolution +---------------------------------------------- The grid is grid-node registered and stored in netCDF with CF-compliant metadata. The gravity values are derived from the EIGEN-6C4 spherical harmonic @@ -22,11 +22,11 @@ import pygmt import xarray as xr -import ensaio.v1 as ensaio +import ensaio ############################################################################### # Download and cache the data and return the path to it on disk. -fname = ensaio.fetch_earth_gravity() +fname = ensaio.fetch_earth_gravity(version=1) print(fname) ############################################################################### diff --git a/doc/gallery_src/v1/earth-topography.py b/doc/gallery_src/earth-topography.py similarity index 88% rename from doc/gallery_src/v1/earth-topography.py rename to doc/gallery_src/earth-topography.py index e1a44b6..0c591d3 100644 --- a/doc/gallery_src/v1/earth-topography.py +++ b/doc/gallery_src/earth-topography.py @@ -5,8 +5,8 @@ # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # """ -Topography of the Earth at 10 arc-minute resolution ---------------------------------------------------- +Earth topography grid at 10 arc-minute resolution +------------------------------------------------- The grid is grid-node registered and stored in netCDF with CF-compliant metadata. The values are derived from a spherical harmonic model of the ETOPO1 @@ -19,11 +19,11 @@ import pygmt import xarray as xr -import ensaio.v1 as ensaio +import ensaio ############################################################################### # Download and cache the data and return the path to it on disk. -fname = ensaio.fetch_earth_topography() +fname = ensaio.fetch_earth_topography(version=1) print(fname) ############################################################################### diff --git a/doc/gallery_src/v1/southern-africa-gravity.py b/doc/gallery_src/southern-africa-gravity.py similarity index 96% rename from doc/gallery_src/v1/southern-africa-gravity.py rename to doc/gallery_src/southern-africa-gravity.py index 2cdcfb9..43fa2cb 100644 --- a/doc/gallery_src/v1/southern-africa-gravity.py +++ b/doc/gallery_src/southern-africa-gravity.py @@ -22,11 +22,11 @@ import pandas as pd import pygmt -import ensaio.v1 as ensaio +import ensaio ############################################################################### # Download and cache the data and return the path to it on disk -fname = ensaio.fetch_southern_africa_gravity() +fname = ensaio.fetch_southern_africa_gravity(version=1) print(fname) ############################################################################### diff --git a/doc/gallery_src/v1/README.txt b/doc/gallery_src/v1/README.txt deleted file mode 100644 index f3302a7..0000000 --- a/doc/gallery_src/v1/README.txt +++ /dev/null @@ -1,22 +0,0 @@ -.. _gallery_v1: - -Available datasets (v1) -======================= - -Use the functions in the :mod:`ensaio.v1` module to download and cache (store) -each dataset on your computer. -See the :ref:`documentation of individual functions ` for more information -about the respective data, the original data sources, and their licenses. - -Information about the data compilation: - -:Source: `doi.org/10.5281/zenodo.5167357 `__ -:License: `CC-BY `__ -:Provenance: `github.com/fatiando/data/releases/tag/v1.0.0 `__ - -These are the datasets that make up the version 1 series: - -.. margin:: - - Click on the images for examples of fetching, loading, and plotting each - dataset. diff --git a/doc/index.rst b/doc/index.rst index 3cfdd59..3dfd501 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -15,8 +15,8 @@ **Ensaio** (Portuguese for "rehearsal") is a Python package for downloading open-access sample datasets for Geoscience. -It taps into the curated collection from `fatiando/data -`__ that is designed for use in tutorials, +It taps into the curated collection of `Fatiando a Terra Datasets +`__ that is designed for use in tutorials, documentation, and teaching. .. panels:: @@ -52,7 +52,7 @@ documentation, and teaching. Take a look at what's available - .. link-button:: gallery_v1 + .. link-button:: gallery :type: ref :text: Gallery :classes: btn-outline-primary btn-block stretched-link @@ -87,8 +87,8 @@ Table of contents install.rst tutorial/using.rst - gallery/v1/index.rst tutorial/developers.rst + gallery/index.rst .. toctree:: :caption: Reference diff --git a/doc/tutorial_src/developers.py b/doc/tutorial_src/developers.py index cee7d6c..b8ea242 100644 --- a/doc/tutorial_src/developers.py +++ b/doc/tutorial_src/developers.py @@ -15,50 +15,44 @@ These are a few tips and tricks for using Ensaio in your own project. """ ############################################################################### -# Importing a specific version +# Explicitly set data versions # ++++++++++++++++++++++++++++ # -# The recommended way to import Ensaio is: - -import ensaio.v1 as ensaio +# New version of each dataset may be included in new Ensaio releases. We'll do +# our very best to always keep the older data versions available as well to +# avoid breaking existing tutorials and documentation. +# +# We recommend always explicitly setting the data version when fetching a +# dataset: +# +import ensaio -fname = ensaio.fetch_southern_africa_gravity() +fname = ensaio.fetch_southern_africa_gravity(version=1) ############################################################################### -# .. note:: +# This way, your documentation/tutorial should still use the same data (and +# hopefully still produce the same result) even if new versions of Ensaio are +# installed. +# Otherwise, people going through older examples with newer versions of Ensaio +# could get different results (or worse, broken code). # -# Replace ``v1`` with the version you want. +# .. tip:: # -# Major releases of the data collection that -# break backwards compatibility (and would be likely to break your code) are -# encapsulated in their own modules. -# So using the :mod:`ensaio.v1` module will make sure your code works with -# any version of Ensaio. +# We still recommend updating to the latest data versions in new tutorials +# and documentation whenever you can. # -# Of course, please try to update your code to use newer versions of the data -# collection whenever possible. - -############################################################################### # Download from GitHub on CI # ++++++++++++++++++++++++++ # -# By default, the data source for Ensaio is an archive with a given DOI. -# You can also specify alternative data download URLs using the -# ``ENSAIO_V1_URL`` environment variable (each data version gets their own -# variable so adjust accordingly). +# By default, the data sources for Ensaio are the archives with the given DOIs +# for each dataset (usually +# `Zenodo `__). +# Alternatively, you can ask Ensaio to download from the GitHub release of each +# dataset by setting the environment variable ``ENSAIO_DATA_FROM_GITHUB=true``. # -# We recommend using the environment variable to download from the -# GitHub release of the data when running on continuous integration (CI). +# We recommend using the environment variable when running on continuous +# integration (CI). # This will minimize the load that is placed on public data servers like # Zenodo. # When using GitHub Actions, this may even make the downloads much faster since # the data source is likely physically closer to the CI infrastructure. -# -# See the ``URL`` module-level variables for each version to find the exact URL -# you need (like :const:`ensaio.v1.URL`). -# -# .. important:: -# -# You may need to update the URL whenever you update Ensaio to access new -# data added in a minor data release. -# diff --git a/doc/tutorial_src/using.py b/doc/tutorial_src/using.py index 898d9ab..0a72483 100644 --- a/doc/tutorial_src/using.py +++ b/doc/tutorial_src/using.py @@ -10,31 +10,31 @@ Downloading data ================ -Ensaio provides functions for downloading datasets from the `fatiando/data -`__ collection to your computer. The -functions are available through different modules for each major release of -the data collection. For example, datasets from the version 1.X series are -available through :mod:`ensaio.v1`. +Ensaio provides functions for downloading datasets from the `Fatiando a Terra +Datasets `__ collection to your computer. +These functions don't attempt to do any loading of the data into memory and +only return the path of the downloaded file on your computer. -The recommended way to use Ensaio is to import a particular version module like -so: +To take care of the actual loading of the data, we'll import +`Pandas `__ as well since the data we'll use is in +CSV format. """ -# Load Pandas as well so we can read in some data import pandas as pd -import ensaio.v1 as ensaio +import ensaio ############################################################################### -# To download a particular dataset, say our Southern Africa gravity data, -# call the corresponding ``fetch_`` functions: -fname = ensaio.fetch_southern_africa_gravity() +# To download a particular dataset, say version 1 of our Southern Africa +# gravity data, call the corresponding ``fetch_*`` functions: +fname = ensaio.fetch_southern_africa_gravity(version=1) print(fname) ############################################################################### # .. tip:: # -# You can browse a list of all available datasets in :ref:`api` or -# :ref:`gallery_v1`. +# The version of the data should **always** be explicitly included so that +# you code continues to work in the same way even if a newer version of the +# data is released. # # If the data are not yet available on your computer, Ensaio will automatically # download it and return the path to the downloaded file. @@ -59,22 +59,19 @@ data ############################################################################### -# .. admonition:: Using Ensaio in your project documentation? +# .. seealso:: # -# Make sure you take a look at :ref:`developers` for useful tips and -# tricks. +# You can browse a list of all available datasets in :ref:`api` or +# :ref:`gallery`. # # Where are the data? # ------------------- # # The location of the cache folder varies by operating system. Use the -# :func:`ensaio.v1.locate` function to get its location on your computer. +# :func:`ensaio.locate` function to get its location on your computer. print(ensaio.locate()) ############################################################################### -# You can also set the location manually by creating a ``ENSAIO_V1_DATA_DIR`` +# You can also set the location manually by creating a ``ENSAIO_DATA_DIR`` # environment variable with the desired path. Ensaio will search for this # variable and if found will use its value instead of the default cache folder. -# -# Similar variables and functions are available for each data collection -# version. diff --git a/ensaio/__init__.py b/ensaio/__init__.py index 54fed54..ffef028 100644 --- a/ensaio/__init__.py +++ b/ensaio/__init__.py @@ -5,4 +5,15 @@ # This code is part of the Fatiando a Terra project (https://www.fatiando.org) # # Import functions/classes to make the public API +from ._fetchers import ( + fetch_alps_gps, + fetch_britain_magnetic, + fetch_british_columbia_lidar, + fetch_caribbean_bathymetry, + fetch_earth_geoid, + fetch_earth_gravity, + fetch_earth_topography, + fetch_southern_africa_gravity, + locate, +) from ._version import __version__ diff --git a/ensaio/_fetchers.py b/ensaio/_fetchers.py new file mode 100644 index 0000000..9f45f01 --- /dev/null +++ b/ensaio/_fetchers.py @@ -0,0 +1,546 @@ +# Copyright (c) 2021 The Ensaio Developers. +# Distributed under the terms of the BSD 3-Clause License. +# SPDX-License-Identifier: BSD-3-Clause +# +# This code is part of the Fatiando a Terra project (https://www.fatiando.org) +# +""" +Functions that fetch each of our sample datasets. +""" +import os +from pathlib import Path + +import pooch + +REGISTRY = { + "alps-gps-velocity.csv.xz": { + "v1": { + "hash": "md5:195ee3d88783ce01b6190c2af89f2b14", + "doi": "doi:10.5281/zenodo.5879163", + "url": "https://github.com/fatiando-data/alps-gps-velocity/releases/download/v1", + }, + }, + "britain-magnetic.csv.xz": { + "v1": { + "hash": "md5:8dbbda02c7e74f63adc461909358f056", + "doi": "doi:10.5281/zenodo.5879260", + "url": "https://github.com/fatiando-data/britain-magnetic/releases/download/v1", + }, + }, + "british-columbia-lidar.csv.xz": { + "v1": { + "hash": "md5:354c725a95036bd8340bc14e043ece5a", + "doi": "doi:10.5281/zenodo.5881887", + "url": "https://github.com/fatiando-data/british-columbia-lidar/releases/download/v1", + }, + }, + "caribbean-bathymetry.csv.xz": { + "v1": { + "hash": "md5:a7332aa6e69c77d49d7fb54b764caa82", + "doi": "doi:10.5281/zenodo.5882211", + "url": "https://github.com/fatiando-data/caribbean-bathymetry/releases/download/v1", + }, + }, + "earth-geoid-10arcmin.nc": { + "v1": { + "hash": "md5:39b97344e704eb68fa381df2eb47da0f", + "doi": "doi:10.5281/zenodo.5882205", + "url": "https://github.com/fatiando-data/earth-geoid-10arcmin/releases/download/v1", + }, + }, + "earth-gravity-10arcmin.nc": { + "v1": { + "hash": "md5:56df20e0e67e28ebe4739a2f0357c4a6", + "doi": "doi:10.5281/zenodo.5882207", + "url": "https://github.com/fatiando-data/earth-gravity-10arcmin/releases/download/v1", + }, + }, + "earth-topography-10arcmin.nc": { + "v1": { + "hash": "md5:c43b61322e03669c4313ba3d9a58028d", + "doi": "doi:10.5281/zenodo.5882203", + "url": "https://github.com/fatiando-data/earth-topography-10arcmin/releases/download/v1", + }, + }, + "southern-africa-gravity.csv.xz": { + "v1": { + "hash": "md5:1dee324a14e647855366d6eb01a1ef35", + "doi": "doi:10.5281/zenodo.5882430", + "url": "https://github.com/fatiando-data/southern-africa-gravity/releases/download/v1", + }, + }, +} + + +def _repository(fname, version): + """ + Create the Pooch instance that fetches a dataset of a particular version + + Cache location defaults to ``pooch.os_cache("ensaio")`` and can be + overwritten with the ``ENSAIO_DATA_DIR`` environment variable. + + The data source defaults to the Zenodo DOI and can be switched to the + GitHub release URL by setting the environment variable + ``ENSAIO_DATA_FROM_GITHUB=true``. + + Parameters + ---------- + fname : str + Name of the data file we want to fetch. + version : int + Version number of the dataset that we want to fetch. + + Returns + ------- + repository : :class:`pooch.Pooch` + + """ + version_str = f"v{version}" + # Decide if we need to pull data from GitHub or the Zenodo DOIs + envvar = "ENSAIO_DATA_FROM_GITHUB" + if envvar in os.environ and os.environ[envvar].lower() == "true": + source = "url" + else: + source = "doi" + entry = REGISTRY[fname][version_str] + repository = pooch.create( + path=Path(pooch.os_cache("ensaio")), + # Just here so that Pooch doesn't complain about there not being a + # format marker in the string. + base_url="{version}", + version=version_str, + env="ENSAIO_DATA_DIR", + retry_if_failed=3, + registry={fname: entry["hash"]}, + urls={fname: _sanitize_url(entry[source]) + fname}, + ) + return repository + + +def _sanitize_url(url): + """ + Makes sure that the URL ends with a trailing ``/`` for Pooch. + + Parameters + ---------- + url : str + The URL for downloading the data, with or without a trailing ``/``. + + Returns + ------- + url : str + Sanitized download URL. + """ + if not url.endswith("/"): + return url + "/" + return url + + +def locate(): + """ + Return the location of the system-dependent data cache folder + + This folder is not guaranteed to exist in the file system until a dataset + has been downloaded. + + The default location is a ``ensaio/`` folder in the system-dependent + default cache folder. A different path can also be specified through the + ``ENSAIO_DATA_DIR`` environment variable. + + Returns + ------- + path : :class:`pathlib.Path` + Path to the cache folder. + """ + return _repository(fname="alps-gps-velocity.csv.xz", version=1).abspath.parent + + +def _check_versions(version, allowed, name): + """ + Check if the version is in the allowed range, raise an error if not. + + Parameters + ---------- + version : int + Integer version of the data. + allowed : set or list + List or set of allowed values for the version. + name : str + Name of the dataset (used in the error message). + + """ + if version not in allowed: + raise ValueError( + f"Invalid version={version} for {name} dataset. Must be one of {allowed}." + ) + + +def fetch_alps_gps(version): + """ + Alpine 3-component GPS velocity dataset + + This is a compilation of 3D GPS velocities for the Alps. The horizontal + velocities are reference to the Eurasian frame. Coordinates are referenced + to WGS84. All velocity components and even the position have error + estimates, which is very useful and rare to find in a lot of datasets. + + There ~200 stations in total. The data available are: station ID, + longitude, latitude (geodetic), height (geometric), ground velocity in the + East, North, and upward directions, and the estimated uncertainties in each + of these. + + **Format:** CSV with xz (lzma) compression. + + **Load with:** :func:`pandas.read_csv` + + **Original source:** + `Sánchez et al. (2018) `__ + + **Original license:** CC-BY + + **Versions:** + + * `1 + `_ + (doi:`10.5281/zenodo.5879163 `__) + + Parameters + ---------- + version : int + The data version to fetch. See the available versions above. + + Returns + ------- + fname : :class:`pathlib.Path` + Path to the downloaded file on disk. + + """ + _check_versions(version, allowed={1}, name="Alps GPS velocity") + fname = "alps-gps-velocity.csv.xz" + return Path(_repository(fname, version).fetch(fname)) + + +def fetch_britain_magnetic(version): + """ + Digitized airborne magnetic survey of Britain + + This is a digitization of an airborne magnetic survey of Britain. Data are + sampled where flight lines crossed contours on the archive maps. Contains + only the total field magnetic anomaly, not the magnetic field intensity + measurements or corrections. + + The exact date of measurements is not available (only the year). The + horizontal datum is WGS84 but the vertical datum is not specified. + + There are 541,508 measurements in total with 6 columns available: line and + segment ID, year, longitude, latitude (geodetic), height (unknown datum), + total field magnetic anomaly. + + Contains British Geological Survey materials © UKRI 2021. + + **Format:** CSV with xz (lzma) compression. + + **Load with:** :func:`pandas.read_csv` + + **Original source:** + `British Geological Survey + `__ + + **Original license:** Open Government Licence + + **Versions:** + + * `1 + `_ + (doi:`10.5281/zenodo.5879260 `__) + + Parameters + ---------- + version : int + The data version to fetch. See the available versions above. + + Returns + ------- + fname : :class:`pathlib.Path` + Path to the downloaded file on disk. + + """ + _check_versions(version, allowed={1}, name="Britain aeromagnetic") + fname = "britain-magnetic.csv.xz" + return Path(_repository(fname, version).fetch(fname)) + + +def fetch_british_columbia_lidar(version): + """ + Lidar point cloud data of the Trail Islands in BC, Canada + + This is a lidar point cloud (ground reflections only) sliced to the small + `Trail Islands `__ + to the North of Vancouver. The islands have some nice looking topography + and their isolated nature creates problems for some interpolation methods. + + The horizontal datum is WGS84 and the elevation is referenced to "mean sea + level". + + There are ~800,000 measurements in total with 3 columns available: + longitude, latitude (geodetic), and ground elevation (orthometric). + + **Format:** CSV with xz (lzma) compression. + + **Load with:** :func:`pandas.read_csv` + + **Original source:** `LidarBC + `__ + + **Original license:** Open Government Licence - British Columbia + + **Versions:** + + * `1 + `_ + (doi:`10.5281/zenodo.5881887 `__) + + Parameters + ---------- + version : int + The data version to fetch. See the available versions above. + + Returns + ------- + fname : :class:`pathlib.Path` + Path to the downloaded file on disk. + + """ + _check_versions(version, allowed={1}, name="British Columbia lidar") + fname = "british-columbia-lidar.csv.xz" + return Path(_repository(fname, version).fetch(fname)) + + +def fetch_caribbean_bathymetry(version): + """ + Single-beam bathymetry of the Caribbean + + This dataset is a compilation of several public domain single-beam + bathymetry surveys of the ocean in the Caribbean. The data display a wide + range of tectonic activity, uneven distribution, and even clear systematic + errors in some of the survey lines. + + The horizontal datum is WGS84 and the bathymetric depth is positive + downwards and referenced to "mean sea level". + + There are 1,938,095 measurements in total with 4 columns available: + survey ID, longitude, latitude (geodetic), and depth. + + **Format:** CSV with xz (lzma) compression. + + **Load with:** :func:`pandas.read_csv` + + **Original source:** `NOAA NCEI + `__ + + **Original license:** Public domain + + **Versions:** + + * `1 + `_ + (doi:`10.5281/zenodo.5882211 `__) + + Parameters + ---------- + version : int + The data version to fetch. See the available versions above. + + Returns + ------- + fname : :class:`pathlib.Path` + Path to the downloaded file on disk. + + """ + _check_versions(version, allowed={1}, name="Caribbean bathymetry") + fname = "caribbean-bathymetry.csv.xz" + return Path(_repository(fname, version).fetch(fname)) + + +def fetch_earth_geoid(version): + """ + Geoid height of the Earth at 10 arc-minute resolution + + The grid is global with node spacing of 10 arc-minutes (grid-node + registered) and stored in netCDF with CF-compliant metadata. + The geoid height is derived from the EIGEN-6C4 spherical harmonic model of + the Earth's gravity field with respect to the WGS84 ellipsoid. + + The horizontal datum is WGS84. + + There are 1081 x 2161 grid points in total. Coordinates are longitude and + latitude (geodetic). + + **Format:** netCDF4 with zlib compression + + **Load with:** :func:`xarray.load_dataarray` (requires the `netcdf4 + `__ library) + + **Original source:** `EIGEN-6C4 model + `__ + + **Original license:** CC-BY + + **Versions:** + + * `1 + `_ + (doi:`10.5281/zenodo.5882205 `__) + + Parameters + ---------- + version : int + The data version to fetch. See the available versions above. + + Returns + ------- + fname : :class:`pathlib.Path` + Path to the downloaded file on disk. + + """ + _check_versions(version, allowed={1}, name="Earth geoid grid") + fname = "earth-geoid-10arcmin.nc" + return Path(_repository(fname, version).fetch(fname)) + + +def fetch_earth_gravity(version): + """ + Gravity of the Earth at 10 arc-minute resolution + + The grid is global with node spacing of 10 arc-minutes (grid-node + registered) and stored in netCDF with CF-compliant metadata. + + The gravity values are derived from the EIGEN-6C4 spherical harmonic model + (calculated uniformly at 10 km above the WGS84 ellipsoid). Here "gravity" + refers to the combined gravitational and centrifugal accelerations. + + The horizontal and vertical datum is WGS84. + + There are 1081 x 2161 grid points in total. Coordinates are longitude and + latitude (geodetic) plus a non-dimensional coordinate height (geometric). + + **Format:** netCDF4 with zlib compression + + **Load with:** :func:`xarray.load_dataarray` (requires the `netcdf4 + `__ library) + + **Original source:** `EIGEN-6C4 model + `__ + + **Original license:** CC-BY + + **Versions:** + + * `1 + `_ + (doi:`10.5281/zenodo.5882207 `__) + + Parameters + ---------- + version : int + The data version to fetch. See the available versions above. + + Returns + ------- + fname : :class:`pathlib.Path` + Path to the downloaded file on disk. + + """ + _check_versions(version, allowed={1}, name="Earth gravity grid") + fname = "earth-gravity-10arcmin.nc" + return Path(_repository(fname, version).fetch(fname)) + + +def fetch_earth_topography(version): + """ + Topography of the Earth at 10 arc-minute resolution + + The grid is global with node spacing of 10 arc-minutes (grid-node + registered) and stored in netCDF with CF-compliant metadata. + + The values are derived from a spherical harmonic model of the ETOPO1 + bedrock grid. Topography/bathymetry values are referenced to "sea level" + and are positive upwards. The horizontal datum is WGS84. + + There are 1081 x 2161 grid points in total. Coordinates are longitude and + latitude (geodetic). + + **Format:** netCDF4 with zlib compression + + **Load with:** :func:`xarray.load_dataarray` (requires the `netcdf4 + `__ library) + + **Original source:** `ETOPO1 `__ + + **Original license:** Public domain + + **Versions:** + + * `1 + `_ + (doi:`10.5281/zenodo.5882203 `__) + + Parameters + ---------- + version : int + The data version to fetch. See the available versions above. + + Returns + ------- + fname : :class:`pathlib.Path` + Path to the downloaded file on disk. + + """ + _check_versions(version, allowed={1}, name="Earth topography grid") + fname = "earth-topography-10arcmin.nc" + return Path(_repository(fname, version).fetch(fname)) + + +def fetch_southern_africa_gravity(version): + """ + Gravity ground-based surveys of Southern Africa + + This dataset is a compilation of several public domain ground measurements + of gravity from Southern Africa. The observations are the absolute gravity + values in mGal. The horizontal datum is not specified and heights are + referenced to "sea level", which we will interpret as the geoid (which + realization is likely not relevant since the uncertainty in the height is + probably larger than geoid model differences). + + There are ~14,000 measurements in total with 4 columns available: + longitude, latitude (geodetic), height (orthometric), and absolute gravity. + + **Format:** CSV with xz (lzma) compression. + + **Load with:** :func:`pandas.read_csv` + + **Original source:** `NOAA NCEI + `__ + + **Original license:** Public domain + + **Versions:** + + * `1 + `_ + (doi:`10.5281/zenodo.5882430 `__) + + Parameters + ---------- + version : int + The data version to fetch. See the available versions above. + + Returns + ------- + fname : :class:`pathlib.Path` + Path to the downloaded file on disk. + + """ + _check_versions(version, allowed={1}, name="Southern Africa gravity") + fname = "southern-africa-gravity.csv.xz" + return Path(_repository(fname, version).fetch(fname)) diff --git a/ensaio/_utils.py b/ensaio/_utils.py deleted file mode 100644 index 58cc56b..0000000 --- a/ensaio/_utils.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2021 The Ensaio Developers. -# Distributed under the terms of the BSD 3-Clause License. -# SPDX-License-Identifier: BSD-3-Clause -# -# This code is part of the Fatiando a Terra project (https://www.fatiando.org) -# -""" -Misc utilities used throughout the package. -""" -import os - - -def download_url(url, env=None): - """ - Get the download URL from the source or the environment variable if set. - - Makes sure that the URL ends with a trailing ``/`` for Pooch. - - Parameters - ---------- - url : str - The canonical URL for downloading the data. - env : str - Name of an environment variable that can be used to replace the - canonical URL. - - Returns - ------- - url : str - Sanitized download URL. - """ - if env is not None and env in os.environ and os.environ[env]: - url = os.environ[env] - if not url.endswith("/"): - url = url + "/" - return url diff --git a/ensaio/tests/test_fetchers.py b/ensaio/tests/test_fetchers.py new file mode 100644 index 0000000..b97dd52 --- /dev/null +++ b/ensaio/tests/test_fetchers.py @@ -0,0 +1,80 @@ +# Copyright (c) 2021 The Ensaio Developers. +# Distributed under the terms of the BSD 3-Clause License. +# SPDX-License-Identifier: BSD-3-Clause +# +# This code is part of the Fatiando a Terra project (https://www.fatiando.org) +# +""" +Test the dataset download functions and other utilities. +""" +import inspect +import os + +import pytest + +from .. import _fetchers + +FETCH_FUNCTIONS = [ + function + for name, function in inspect.getmembers(_fetchers, inspect.isfunction) + if name.startswith("fetch_") +] + + +@pytest.mark.parametrize("fetch", FETCH_FUNCTIONS) +def test_fetch_datasets(fetch): + "Check that fetching works and the file exists once downloaded" + path = fetch(version=1) + assert path.exists() + + +def test_locate(): + "Check that the cache folder exists by default after a fetch call" + FETCH_FUNCTIONS[0](version=1) + path = _fetchers.locate() + assert path.exists() + assert path.parts[-1] != "v1" + + +@pytest.mark.parametrize( + "url,sanitized", + [ + ("https://fatiando.org/", "https://fatiando.org/"), + ("https://fatiando.org", "https://fatiando.org/"), + ("doi:fatiando/", "doi:fatiando/"), + ("doi:fatiando", "doi:fatiando/"), + ], +) +def test_sanitize_url(url, sanitized): + "Check if url sanitizing works" + assert _fetchers._sanitize_url(url) == sanitized + + +@pytest.mark.parametrize( + "use_github", + ["True", "False"], +) +def test_data_source_from_github(use_github): + "Check that GitHub is used as a data source when the env variable is set" + backup = None + try: + backup = os.environ.get("ENSAIO_DATA_FROM_GITHUB", None) + os.environ["ENSAIO_DATA_FROM_GITHUB"] = use_github + repo = _fetchers._repository(fname="alps-gps-velocity.csv.xz", version=1) + if use_github == "True": + marker = "https://github.com" + else: + marker = "doi:" + assert all(url.startswith(marker) for url in repo.urls.values()) + finally: + if backup is None: + os.environ.pop("ENSAIO_DATA_FROM_GITHUB") + else: + os.environ["ENSAIO_DATA_FROM_GITHUB"] = backup + + +def test_check_versions(): + "Make sure an exception is raised for invalid versions" + with pytest.raises(ValueError) as error: + _fetchers._check_versions(version=3, allowed={1, 2}, name="Bla") + assert "Bla" in str(error) diff --git a/ensaio/tests/test_utils.py b/ensaio/tests/test_utils.py deleted file mode 100644 index 8c872b9..0000000 --- a/ensaio/tests/test_utils.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2021 The Ensaio Developers. -# Distributed under the terms of the BSD 3-Clause License. -# SPDX-License-Identifier: BSD-3-Clause -# -# This code is part of the Fatiando a Terra project (https://www.fatiando.org) -# -""" -Test the misc utilities. -""" -import os - -import pytest - -from .._utils import download_url - - -@pytest.mark.parametrize( - "url,sanitized", - [ - ("https://fatiando.org/", "https://fatiando.org/"), - ("https://fatiando.org", "https://fatiando.org/"), - ("doi:fatiando/", "doi:fatiando/"), - ("doi:fatiando", "doi:fatiando/"), - ], -) -def test_download_url(url, sanitized): - "Check if the output is a sanitized url for different inputs." - assert download_url(url) == sanitized - - -@pytest.mark.parametrize( - "url,sanitized", - [ - ("https://fatiando.org/", "https://fatiando.org/"), - ("https://fatiando.org", "https://fatiando.org/"), - ("doi:fatiando/", "doi:fatiando/"), - ("doi:fatiando", "doi:fatiando/"), - ], -) -def test_download_url_env(url, sanitized): - "Check if the setting an environment variable works" - env = "ENSAIO_TEST_VARIABLE" - try: - os.environ[env] = url - assert download_url(url="bla", env=env) == sanitized - finally: - os.environ.pop(env) diff --git a/ensaio/tests/test_v1.py b/ensaio/tests/test_v1.py deleted file mode 100644 index 0605843..0000000 --- a/ensaio/tests/test_v1.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2021 The Ensaio Developers. -# Distributed under the terms of the BSD 3-Clause License. -# SPDX-License-Identifier: BSD-3-Clause -# -# This code is part of the Fatiando a Terra project (https://www.fatiando.org) -# -""" -Test the version 1 dataset download functions. -""" -import inspect - -import pytest - -from .. import v1 - -FETCH_FUNCTIONS = [ - function - for name, function in inspect.getmembers(v1, inspect.isfunction) - if name.startswith("fetch_") -] - - -@pytest.mark.parametrize("fetch", FETCH_FUNCTIONS) -def test_v1_fetch_datasets(fetch): - "Check that fetching works and the file exists once downloaded" - path = fetch() - assert path.exists() - - -def test_v1_locate(): - "Check that the cache folder exists by default after a fetch call" - FETCH_FUNCTIONS[0]() - path = v1.locate() - assert path.exists() diff --git a/ensaio/v1.py b/ensaio/v1.py deleted file mode 100644 index cbb0e9c..0000000 --- a/ensaio/v1.py +++ /dev/null @@ -1,346 +0,0 @@ -# Copyright (c) 2021 The Ensaio Developers. -# Distributed under the terms of the BSD 3-Clause License. -# SPDX-License-Identifier: BSD-3-Clause -# -# This code is part of the Fatiando a Terra project (https://www.fatiando.org) -# -""" -The datasets in the version 1 series. -""" -from pathlib import Path - -import pooch - -from ._utils import download_url - -#: The DOI of the source data archive on Zenodo -DOI = "10.5281/zenodo.5167357" -#: The download URL of the source data release on GitHub (append a data file -#: name to download it) -URL = "https://github.com/fatiando/data/releases/download/v1.0.0" -#: Environment variable used to specify the download URL -#: (defaults to the DOI) -ENVIRONMENT_VARIABLE_URL = "ENSAIO_V1_URL" -#: Environment variable used to specify the cache folder -#: (defaults to ``ensaio/v1`` in the system default cache) -ENVIRONMENT_VARIABLE_CACHE = "ENSAIO_V1_DATA_DIR" - - -def _repository(): - """ - Create the pooch.Pooch instance that fetches the datasets - - Returns - ------- - repository : :class:`pooch.Pooch` - """ - repository = pooch.create( - path=Path(pooch.os_cache("ensaio")) / "v1", - base_url=download_url(url=f"doi:{DOI}", env=ENVIRONMENT_VARIABLE_URL), - env=ENVIRONMENT_VARIABLE_CACHE, - retry_if_failed=3, - registry={ - "alps-gps-velocity.csv.xz": "md5:195ee3d88783ce01b6190c2af89f2b14", - "britain-magnetic.csv.xz": "md5:8dbbda02c7e74f63adc461909358f056", - "british-columbia-lidar.csv.xz": "md5:354c725a95036bd8340bc14e043ece5a", - "caribbean-bathymetry.csv.xz": "md5:a7332aa6e69c77d49d7fb54b764caa82", - "earth-geoid-10arcmin.nc": "md5:39b97344e704eb68fa381df2eb47da0f", - "earth-gravity-10arcmin.nc": "md5:56df20e0e67e28ebe4739a2f0357c4a6", - "earth-topography-10arcmin.nc": "md5:c43b61322e03669c4313ba3d9a58028d", - "southern-africa-gravity.csv.xz": "md5:1dee324a14e647855366d6eb01a1ef35", - }, - ) - return repository - - -def locate(): - """ - Return the location of the system-dependent data cache for v1 datasets - - This folder is not guaranteed to exist in the file system until a dataset - has been downloaded. - - The default location is a ``ensaio/v1/`` folder in the system-dependent - default cache folder. A different path can also be specified by the - ``ENSAIO_V1_DATA_DIR`` environment variable. - - Returns - ------- - path : :class:`pathlib.Path` - Path to the cache folder. - """ - return _repository().abspath - - -def fetch_alps_gps(): - """ - Alpine 3-component GPS velocity dataset - - This is a compilation of 3D GPS velocities for the Alps. The horizontal - velocities are reference to the Eurasian frame. All velocity components and - even the position have error estimates, which is very useful and rare to - find in a lot of datasets. - - There 186 stations in total. The data available are: station ID, longitude, - latitude (geodetic), height (geometric), ground velocity in the East, - North, and upward directions, and the estimated uncertainties in each of - these. - - **Format:** CSV with xz (lzma) compression. - - **Load with:** :func:`pandas.read_csv` - - **Datum:** WGS84 - - **Original source:** - `Sánchez et al. (2018) `__ - - **Original license:** CC-BY - - Returns - ------- - fname : :class:`pathlib.Path` - Path to the downloaded file on disk. - - """ - return Path(_repository().fetch("alps-gps-velocity.csv.xz")) - - -def fetch_britain_magnetic(): - """ - Digitized airborne magnetic survey of Britain - - This is a digitization of an airborne magnetic survey of Britain. Data are - sampled where flight lines crossed contours on the archive maps. Contains - only the total field magnetic anomaly, not the magnetic field intensity - measurements or corrections. - - The exact date of measurements is not available (only the year). - - There are 541,508 measurements in total with 6 columns available: line and - segment ID, year, longitude, latitude (geodetic), height (unknown datum), - total field magnetic anomaly. - - Contains British Geological Survey materials © UKRI 2021. - - **Format:** CSV with xz (lzma) compression. - - **Load with:** :func:`pandas.read_csv` - - **Datum:** WGS84 - - **Original source:** - `British Geological Survey - `__ - - **Original license:** Open Government Licence - - Returns - ------- - fname : :class:`pathlib.Path` - Path to the downloaded file on disk. - - """ - return Path(_repository().fetch("britain-magnetic.csv.xz")) - - -def fetch_british_columbia_lidar(): - """ - Lidar point cloud data of the Trail Islands in BC, Canada - - This is a lidar point cloud (ground reflections only) sliced to the small - `Trail Islands `__ - to the North of Vancouver. The islands have some nice looking topography - and their isolated nature creates problems for some interpolation methods. - - There are 829,733 measurements in total with 3 columns available: - longitude, latitude (geodetic), and ground elevation (orthometric). - - **Format:** CSV with xz (lzma) compression. - - **Load with:** :func:`pandas.read_csv` - - **Datum:** WGS84 - - **Original source:** `LidarBC - `__ - - **Original license:** Open Government Licence - British Columbia - - Returns - ------- - fname : :class:`pathlib.Path` - Path to the downloaded file on disk. - - """ - return Path(_repository().fetch("british-columbia-lidar.csv.xz")) - - -def fetch_caribbean_bathymetry(): - """ - Single-beam bathymetry of the Caribbean - - This dataset is a compilation of several public domain single-beam - bathymetry surveys of the ocean in the Caribbean. The data display a wide - range of tectonic activity, uneven distribution, and even clear systematic - errors in some of the survey lines. - - There are 1,938,095 measurements in total with 4 columns available: - survey ID, longitude, latitude (geodetic), and depth (positive downwards - and referenced to "sea level"). - - **Format:** CSV with xz (lzma) compression. - - **Load with:** :func:`pandas.read_csv` - - **Datum:** WGS84 - - **Original source:** `NOAA NCEI - `__ - - **Original license:** Public domain - - Returns - ------- - fname : :class:`pathlib.Path` - Path to the downloaded file on disk. - - """ - return Path(_repository().fetch("caribbean-bathymetry.csv.xz")) - - -def fetch_earth_geoid(): - """ - Geoid height of the Earth at 10 arc-minute resolution - - The grid is global with node spacing of 10 arc-minutes (grid-node - registered) and stored in netCDF with CF-compliant metadata. - - The geoid height is derived from the EIGEN-6C4 spherical harmonic model of - the Earth's gravity field. - - There are 1081 x 2161 grid points in total. Coordinates are longitude and - latitude (geodetic). - - **Format:** netCDF4 with zlib compression - - **Load with:** :func:`xarray.load_dataarray` (requires the `netcdf4 - `__ library) - - **Datum:** WGS84 - - **Original source:** `EIGEN-6C4 model - `__ - - **Original license:** CC-BY - - Returns - ------- - fname : :class:`pathlib.Path` - Path to the downloaded file on disk. - - """ - return Path(_repository().fetch("earth-geoid-10arcmin.nc")) - - -def fetch_earth_gravity(): - """ - Gravity of the Earth at 10 arc-minute resolution - - The grid is global with node spacing of 10 arc-minutes (grid-node - registered) and stored in netCDF with CF-compliant metadata. - - The gravity values are derived from the EIGEN-6C4 spherical harmonic model - (calculated uniformly at 10 km above the WGS84 ellipsoid). Here "gravity" - refers to the combined gravitational and centrifugal accelerations. - - There are 1081 x 2161 grid points in total. Coordinates are longitude and - latitude (geodetic) plus a non-dimensional coordinate height (geometric). - - **Format:** netCDF4 with zlib compression - - **Load with:** :func:`xarray.load_dataarray` (requires the `netcdf4 - `__ library) - - **Datum:** WGS84 - - **Original source:** `EIGEN-6C4 model - `__ - - **Original license:** CC-BY - - Returns - ------- - fname : :class:`pathlib.Path` - Path to the downloaded file on disk. - - """ - return Path(_repository().fetch("earth-gravity-10arcmin.nc")) - - -def fetch_earth_topography(): - """ - Topography of the Earth at 10 arc-minute resolution - - The grid is global with node spacing of 10 arc-minutes (grid-node - registered) and stored in netCDF with CF-compliant metadata. - - The values are derived from a spherical harmonic model of the ETOPO1 - bedrock grid. Topography/bathymetry values are referenced to "sea level" - and are positive upwards. - - There are 1081 x 2161 grid points in total. Coordinates are longitude and - latitude (geodetic). - - **Format:** netCDF4 with zlib compression - - **Load with:** :func:`xarray.load_dataarray` (requires the `netcdf4 - `__ library) - - **Datum:** WGS84 - - **Original source:** `ETOPO1 `__ - - **Original license:** Public domain - - Returns - ------- - fname : :class:`pathlib.Path` - Path to the downloaded file on disk. - - """ - return Path(_repository().fetch("earth-topography-10arcmin.nc")) - - -def fetch_southern_africa_gravity(): - """ - Gravity ground-based surveys of Southern Africa - - This dataset is a compilation of several public domain ground measurements - of gravity from Southern Africa. The observations are the absolute gravity - values in mGal. The horizontal datum is not specified and heights are - referenced to "sea level", which we will interpret as the geoid (which - realization is likely not relevant since the uncertainty in the height is - probably larger than geoid model differences). - - There are 14,359 measurements in total with 4 columns available: longitude, - latitude (geodetic), height (orthometric), and absolute gravity. - - **Format:** CSV with xz (lzma) compression. - - **Load with:** :func:`pandas.read_csv` - - **Datum:** WGS84 - - **Original source:** `NOAA NCEI - `__ - - **Original license:** Public domain - - Returns - ------- - fname : :class:`pathlib.Path` - Path to the downloaded file on disk. - - """ - return Path(_repository().fetch("southern-africa-gravity.csv.xz")) diff --git a/env/requirements-docs.txt b/env/requirements-docs.txt index 45bfba9..a4f3fac 100644 --- a/env/requirements-docs.txt +++ b/env/requirements-docs.txt @@ -6,5 +6,6 @@ numpy pandas xarray netcdf4 -pygmt +pygmt==0.5.0 +gmt==6.2.0 ipython diff --git a/environment.yml b/environment.yml index 80685ff..4277a17 100644 --- a/environment.yml +++ b/environment.yml @@ -24,7 +24,8 @@ dependencies: - pandas - xarray - netcdf4 - - pygmt + - pygmt==0.5.0 + - gmt==6.2.0 - ipython # Style - black