From 142b7ab4d7ea15d2107b60495d7cc0273dfb2fd5 Mon Sep 17 00:00:00 2001 From: Rachel Wegener <35503632+rwegener2@users.noreply.github.com> Date: Tue, 7 Nov 2023 07:17:42 -0500 Subject: [PATCH] Variables as an independent class (#451) Refactor Variables class to be user facing functionality --- .../IS2_data_access2-subsetting.ipynb | 42 +- .../IS2_data_variables.ipynb | 351 ++++++++++++- .../documentation/classes_dev_uml.svg | 497 +++++++++--------- .../documentation/classes_user_uml.svg | 33 +- .../user_guide/documentation/components.rst | 8 - .../user_guide/documentation/icepyx.rst | 1 + .../documentation/packages_user_uml.svg | 60 ++- .../user_guide/documentation/variables.rst | 25 + icepyx/__init__.py | 1 + icepyx/core/is2ref.py | 53 +- icepyx/core/query.py | 51 +- icepyx/core/read.py | 59 ++- icepyx/core/variables.py | 160 +++--- 13 files changed, 880 insertions(+), 461 deletions(-) create mode 100644 doc/source/user_guide/documentation/variables.rst diff --git a/doc/source/example_notebooks/IS2_data_access2-subsetting.ipynb b/doc/source/example_notebooks/IS2_data_access2-subsetting.ipynb index 89247de5f..3803b9fd6 100644 --- a/doc/source/example_notebooks/IS2_data_access2-subsetting.ipynb +++ b/doc/source/example_notebooks/IS2_data_access2-subsetting.ipynb @@ -51,7 +51,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "Create a query object and log in to Earthdata\n", "\n", @@ -83,7 +85,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "## Discover Subsetting Options\n", "\n", @@ -108,7 +112,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "By default, spatial and temporal subsetting based on your initial inputs is applied to your order unless you specify `subset=False` to `order_granules()` or `download_granules()` (which calls `order_granules` under the hood if you have not already placed your order) functions.\n", "Additional subsetting options must be specified as keyword arguments to the order/download functions.\n", @@ -118,7 +124,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "### _Why do I have to provide spatial bounds to icepyx even if I don't use them to subset my data order?_\n", "\n", @@ -132,7 +140,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "## About Data Variables in a query object\n", "\n", @@ -145,7 +155,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "### Determine what variables are available for your data product\n", "There are multiple ways to get a complete list of available variables.\n", @@ -159,7 +171,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "region_a.order_vars.avail()" @@ -167,7 +181,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "By passing the boolean `options=True` to the `avail` method, you can obtain lists of unique possible variable inputs (var_list inputs) and path subdirectory inputs (keyword_list and beam_list inputs) for your data product. These can be helpful for building your wanted variable list." ] @@ -175,7 +191,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "region_a.order_vars.avail(options=True)" @@ -353,9 +371,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "icepyx-dev", "language": "python", - "name": "python3" + "name": "icepyx-dev" }, "language_info": { "codemirror_mode": { @@ -367,7 +385,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/doc/source/example_notebooks/IS2_data_variables.ipynb b/doc/source/example_notebooks/IS2_data_variables.ipynb index 3ac1f99fe..78a250789 100644 --- a/doc/source/example_notebooks/IS2_data_variables.ipynb +++ b/doc/source/example_notebooks/IS2_data_variables.ipynb @@ -2,7 +2,9 @@ "cells": [ { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "# ICESat-2's Nested Variables\n", "\n", @@ -13,10 +15,10 @@ "\n", "A given ICESat-2 product may have over 200 variable + path combinations.\n", "icepyx includes a custom `Variables` module that is \"aware\" of the ATLAS sensor and how the ICESat-2 data products are stored.\n", - "The module can be accessed independently, but is optimally used as a component of a `Query` object (Case 1) or `Read` object (Case 2).\n", + "The module can be accessed independently, and can also be accessed as a component of a `Query` object or `Read` object.\n", "\n", - "This notebook illustrates in detail how the `Variables` module behaves using a `Query` data access example.\n", - "However, module usage is analogous through an icepyx ICESat-2 `Read` object.\n", + "This notebook illustrates in detail how the `Variables` module behaves. We use the module independently and also show how powerful it is directly in the icepyx workflow using a `Query` data access example.\n", + "Module usage using `Query` is analogous through an icepyx ICESat-2 `Read` object.\n", "More detailed example workflows specifically for the [query](https://icepyx.readthedocs.io/en/latest/example_notebooks/IS2_data_access.html) and [read](https://icepyx.readthedocs.io/en/latest/example_notebooks/IS2_data_read-in.html) tools within icepyx are available as separate Jupyter Notebooks.\n", "\n", "Questions? Be sure to check out the FAQs throughout this notebook, indicated as italic headings." @@ -24,11 +26,15 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "### _Why do ICESat-2 products need a custom variable manager?_\n", "\n", "_It can be confusing and cumbersome to comb through the 200+ variable and path combinations contained in ICESat-2 data products._\n", + "_An hdf5 file is built like a folder with files in it. Opening an ICESat-2 file can be like opening a new folder with over 200 files in it and manually searching for only ones you want!_\n", + "\n", "_The icepyx `Variables` module makes it easier for users to quickly find and extract the specific variables they would like to work with across multiple beams, keywords, and variables and provides reader-friendly formatting to browse variables._\n", "_A future development goal for `icepyx` includes developing an interactive widget to further improve the user experience._\n", "_For data read-in, additional tools are available to target specific beam characteristics (e.g. strong versus weak beams)._" @@ -38,35 +44,245 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Some technical details about the Variables module\n", - "For those eager to push the limits or who want to know more implementation details...\n", + "Import packages, including icepyx" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import icepyx as ipx\n", + "from pprint import pprint" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "## Creating or Accessing ICESat-2 Variables" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "There are three ways to create or access an ICESat-2 Variables object in icepyx:\n", + "1. Access via the `.order_vars` property of a Query object\n", + "2. Access via the `.vars` property of a Read object\n", + "3. Create a stand-alone ICESat-2 Variables object using a local file or a product name\n", "\n", - "The only required input to the `Variables` module is `vartype`.\n", - "`vartype` has two acceptible string values, 'order' and 'file'.\n", - "If you use the module as shown in icepyx examples (namely through a `Read` or `Query` object), then this flag will be passed automatically.\n", - "It simply tells the software how to generate the list of possible variable values - either by pinging NSIDC for a list of available variables (`query`) or from the user-supplied file (`read`)." + "An example of each of these is shown below." ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ - "Import packages, including icepyx" + "### 1. Access `Variables` via the `.order_vars` property of a Query object" ] }, { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "import icepyx as ipx\n", - "from pprint import pprint" + "region_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-22','2019-02-28'], \\\n", + " start_time='00:00:00', end_time='23:59:59')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Accessing Variables\n", + "region_a.order_vars" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Showing the variable paths\n", + "region_a.order_vars.avail()" ] }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "tags": [], + "user_expressions": [] + }, + "source": [ + "### 2. Access via the `.vars` property of a Read object" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "path_root = '/full/path/to/your/data/'\n", + "reader = ipx.Read(path_root)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Accessing Variables\n", + "reader.vars" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Showing the variable paths\n", + "# reader.vars.avail()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "### 3. Create a stand-alone Variables object\n", + "\n", + "You can also generate an independent Variables object. This can be done using either:\n", + "1. The filepath to a file you'd like a variables list for\n", + "2. The product name (and optionally version) of a an ICESat-2 product" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "Create a variables object from a filepath:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "filepath = '/full/path/to/your/data.h5'\n", + "v = ipx.Variables(path=filepath)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# v.avail()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "Create a variables object from a product. The version argument is optional." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "v = ipx.Variables(product='ATL03')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# v.avail()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "v = ipx.Variables(product='ATL03', version='004')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# v.avail()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "Now that you know how to create or access Variables the remainder of this notebook showcases the functions availble for building and modifying variables lists. Remember, the example shown below uses a Query object, but the same methods are available if you are using a Read object or a Variables object." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, "source": [ "## Interacting with ICESat-2 Data Variables\n", "\n", @@ -88,7 +304,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "Create a query object and log in to Earthdata\n", "\n", @@ -134,7 +352,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "### ICESat-2 data variables\n", "\n", @@ -157,7 +377,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "To increase readability, you can use built in functions to show the 200+ variable + path combinations as a dictionary where the keys are variable names and the values are the paths to that variable.\n", "`region_a.order_vars.parse_var_list(region_a.order_vars.avail())` will return a dictionary of variable:paths key:value pairs." @@ -174,7 +396,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "By passing the boolean `options=True` to the `avail` method, you can obtain lists of unique possible variable inputs (var_list inputs) and path subdirectory inputs (keyword_list and beam_list inputs) for your data product. These can be helpful for building your wanted variable list." ] @@ -188,6 +412,30 @@ "region_a.order_vars.avail(options=True)" ] }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "```{admonition} Remember\n", + "You can run these same methods no matter how you created or accessed your ICESat-2 Variables. So the methods in this section could be equivalently be accessed using a Read object, or by directly accessing a file on your computer:\n", + "\n", + "```\n", + "```python\n", + "# Using a Read object\n", + "reader.vars.avail()\n", + "reader.vars.parse_var_list(reader.vars.avail())\n", + "reader.vars.avail(options=True)\n", + "\n", + "# Using a file on your computer\n", + "v = Variables(path='/my/file.h5')\n", + "v.avail()\n", + "v.parse_var_list(v.avail())\n", + "v.avail(options=True)\n", + "```\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -228,7 +476,9 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "The keywords available for this product are shown in the error message upon entering a blank keyword_list, as seen in the next cell." ] @@ -745,13 +995,62 @@ }, { "cell_type": "markdown", - "metadata": {}, + "metadata": { + "user_expressions": [] + }, "source": [ "#### With a `Read` object\n", "Calling the `load()` method on your `Read` object will automatically look for your wanted variable list and use it.\n", "Please see the [read-in example Jupyter Notebook](https://icepyx.readthedocs.io/en/latest/example_notebooks/IS2_data_read-in.html) for a complete example of this usage.\n" ] }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "#### With a local filepath\n", + "\n", + "One of the benefits of using a local filepath in variables is that it allows you to easily inspect the variables that are available in your file. Once you have a variable of interest from the `avail` list, you could read that variable in with another library, such as xarray. The example below demonstrates this assuming an ATL06 ICESat-2 file." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "filepath = '/full/path/to/my/ATL06_file.h5'\n", + "v = ipx.Variables(path=filepath)\n", + "v.avail()\n", + "# Browse paths and decide you need `gt1l/land_ice_segments/`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import xarray as xr\n", + "\n", + "xr.open_dataset(filepath, group='gt1l/land_ice_segments/', engine='h5netcdf')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "user_expressions": [] + }, + "source": [ + "You'll notice in this workflow you are limited to viewing data only within a particular group. Icepyx also provides functionality for merging variables within or even across files. See the [read-in example Jupyter Notebook](https://icepyx.readthedocs.io/en/latest/example_notebooks/IS2_data_read-in.html) for more details about these features of icepyx." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -763,9 +1062,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "icepyx-dev", "language": "python", - "name": "python3" + "name": "icepyx-dev" }, "language_info": { "codemirror_mode": { @@ -777,7 +1076,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.11.4" } }, "nbformat": 4, diff --git a/doc/source/user_guide/documentation/classes_dev_uml.svg b/doc/source/user_guide/documentation/classes_dev_uml.svg index 0cd08c9e9..765e0d531 100644 --- a/doc/source/user_guide/documentation/classes_dev_uml.svg +++ b/doc/source/user_guide/documentation/classes_dev_uml.svg @@ -4,328 +4,329 @@ - - + + classes_dev_uml - + icepyx.core.auth.AuthenticationError - -AuthenticationError - - - + +AuthenticationError + + + icepyx.core.exceptions.DeprecationError - -DeprecationError - - - + +DeprecationError + + + icepyx.core.auth.EarthdataAuthMixin - -EarthdataAuthMixin - -_auth : Auth, NoneType -_s3_initial_ts : NoneType, datetime -_s3login_credentials : NoneType, dict -_session : NoneType -auth -s3login_credentials -session - -__init__(auth) -__str__() -earthdata_login(uid, email, s3token): None + +EarthdataAuthMixin + +_auth : NoneType +_s3_initial_ts : NoneType, datetime +_s3login_credentials : NoneType +_session : NoneType +auth +s3login_credentials +session + +__init__(auth) +__str__() +earthdata_login(uid, email, s3token): None icepyx.core.query.GenQuery - -GenQuery - -_spatial -_temporal -dates -end_time -spatial -spatial_extent -start_time -temporal - -__init__(spatial_extent, date_range, start_time, end_time) -__str__() + +GenQuery + +_spatial +_temporal +dates +end_time +spatial +spatial_extent +start_time +temporal + +__init__(spatial_extent, date_range, start_time, end_time) +__str__() icepyx.core.granules.Granules - -Granules - -avail : list -orderIDs : list - -__init__ -() -download(verbose, path, session, restart) -get_avail(CMRparams, reqparams, cloud) -place_order(CMRparams, reqparams, subsetparams, verbose, subset, session, geom_filepath) + +Granules + +avail : list +orderIDs : list + +__init__ +() +download(verbose, path, session, restart) +get_avail(CMRparams, reqparams, cloud) +place_order(CMRparams, reqparams, subsetparams, verbose, subset, session, geom_filepath) icepyx.core.query.Query - -Query - -CMRparams -_CMRparams -_about_product -_cust_options : dict -_cycles : list -_file_vars -_granules -_order_vars -_prod : NoneType, str -_readable_granule_name : list -_reqparams -_source : str -_subsetparams : NoneType -_tracks : list -_version -cycles -dataset -file_vars -granules -order_vars -product -product_version -reqparams -tracks - -__init__(product, spatial_extent, date_range, start_time, end_time, version, cycles, tracks, files, auth) -__str__() -avail_granules(ids, cycles, tracks, cloud) -download_granules(path, verbose, subset, restart) -latest_version() -order_granules(verbose, subset, email) -product_all_info() -product_summary_info() -show_custom_options(dictview) -subsetparams() -visualize_elevation() -visualize_spatial_extent() + +Query + +CMRparams +_CMRparams +_about_product +_cust_options : dict +_cycles : list +_file_vars +_granules +_order_vars +_prod : NoneType, str +_readable_granule_name : list +_reqparams +_source : str +_subsetparams : NoneType +_tracks : list +_version +cycles +dataset +file_vars +granules +order_vars +product +product_version +reqparams +tracks + +__init__(product, spatial_extent, date_range, start_time, end_time, version, cycles, tracks, files, auth) +__str__() +avail_granules(ids, cycles, tracks, cloud) +download_granules(path, verbose, subset, restart) +latest_version() +order_granules(verbose, subset, email) +product_all_info() +product_summary_info() +show_custom_options(dictview) +subsetparams() +visualize_elevation() +visualize_spatial_extent() icepyx.core.granules.Granules->icepyx.core.query.Query - - -_granules + + +_granules icepyx.core.granules.Granules->icepyx.core.query.Query - - -_granules + + +_granules icepyx.core.icesat2data.Icesat2Data - -Icesat2Data - - -__init__() + +Icesat2Data + + +__init__() icepyx.core.exceptions.NsidcQueryError - -NsidcQueryError - -errmsg -msgtxt : str - -__init__(errmsg, msgtxt) -__str__() + +NsidcQueryError + +errmsg +msgtxt : str + +__init__(errmsg, msgtxt) +__str__() icepyx.core.exceptions.QueryError - -QueryError - - - + +QueryError + + + icepyx.core.exceptions.NsidcQueryError->icepyx.core.exceptions.QueryError - - + + icepyx.core.APIformatting.Parameters - -Parameters - -_fmted_keys : NoneType, dict -_poss_keys : dict -_reqtype : NoneType, str -fmted_keys -partype -poss_keys - -__init__(partype, values, reqtype) -_check_valid_keys() -_get_possible_keys() -build_params() -check_req_values() -check_values() + +Parameters + +_fmted_keys : NoneType, dict +_poss_keys : dict +_reqtype : NoneType, str +fmted_keys +partype +poss_keys + +__init__(partype, values, reqtype) +_check_valid_keys() +_get_possible_keys() +build_params() +check_req_values() +check_values() icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_CMRparams + + +_CMRparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_reqparams + + +_reqparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_subsetparams + + +_subsetparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_subsetparams + + +_subsetparams icepyx.core.query.Query->icepyx.core.auth.EarthdataAuthMixin - - + + icepyx.core.query.Query->icepyx.core.query.GenQuery - - + + icepyx.core.read.Read - -Read - -_filelist : NoneType, list -_out_obj : Dataset -_product : NoneType, str -_read_vars -filelist -product -vars - -__init__(data_source, product, filename_pattern, catalog, glob_kwargs, out_obj_type) -_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) -_build_dataset_template(file) -_build_single_file_dataset(file, groups_list) -_check_source_for_pattern(source, filename_pattern) -_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) -_extract_product(filepath) -_read_single_grp(file, grp_path) -load() + +Read + +_filelist : NoneType, list +_out_obj : Dataset +_product : NoneType, str +_read_vars +filelist +product +vars + +__init__(data_source, product, filename_pattern, catalog, glob_kwargs, out_obj_type) +_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) +_build_dataset_template(file) +_build_single_file_dataset(file, groups_list) +_check_source_for_pattern(source, filename_pattern) +_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) +_read_single_grp(file, grp_path) +load() icepyx.core.spatial.Spatial - -Spatial - -_ext_type : str -_gdf_spat : GeoDataFrame -_geom_file : NoneType -_spatial_ext -_xdateln -extent -extent_as_gdf -extent_file -extent_type - -__init__(spatial_extent) -__str__() -fmt_for_CMR() -fmt_for_EGI() + +Spatial + +_ext_type : str +_gdf_spat : GeoDataFrame +_geom_file : NoneType +_spatial_ext +_xdateln +extent +extent_as_gdf +extent_file +extent_type + +__init__(spatial_extent) +__str__() +fmt_for_CMR() +fmt_for_EGI() icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery - - -_spatial + + +_spatial icepyx.core.spatial.Spatial->icepyx.core.query.GenQuery - - -_spatial + + +_spatial icepyx.core.temporal.Temporal - -Temporal - -_end : datetime -_start : datetime -end -start - -__init__(date_range, start_time, end_time) -__str__() + +Temporal + +_end : datetime +_start : datetime +end +start + +__init__(date_range, start_time, end_time) +__str__() icepyx.core.temporal.Temporal->icepyx.core.query.GenQuery - - -_temporal + + +_temporal icepyx.core.variables.Variables - -Variables - -_avail : NoneType, list -_vartype -_version : NoneType -path : NoneType -product : NoneType -wanted : NoneType, dict + +Variables + +_avail : NoneType, list +_path : NoneType +_product : NoneType, str +_version +path +product +version +wanted : NoneType, dict -__init__(vartype, avail, wanted, product, version, path, auth) +__init__(vartype, path, product, version, avail, wanted, auth) _check_valid_lists(vgrp, allpaths, var_list, beam_list, keyword_list) _get_combined_list(beam_list, keyword_list) _get_sum_varlist(var_list, all_vars, defaults) @@ -339,57 +340,57 @@ icepyx.core.variables.Variables->icepyx.core.auth.EarthdataAuthMixin - - + + icepyx.core.variables.Variables->icepyx.core.query.Query - - -_order_vars + + +_order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - -_order_vars + + +_order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - -_file_vars + + +_file_vars icepyx.core.variables.Variables->icepyx.core.read.Read - - -_read_vars + + +_read_vars icepyx.core.visualization.Visualize - -Visualize - -bbox : list -cycles : NoneType -date_range : NoneType -product : NoneType, str -tracks : NoneType - -__init__(query_obj, product, spatial_extent, date_range, cycles, tracks) -generate_OA_parameters(): list -grid_bbox(binsize): list -make_request(base_url, payload) -parallel_request_OA(): da.array -query_icesat2_filelist(): tuple -request_OA_data(paras): da.array -viz_elevation(): (hv.DynamicMap, hv.Layout) + +Visualize + +bbox : list +cycles : NoneType +date_range : NoneType +product : NoneType, str +tracks : NoneType + +__init__(query_obj, product, spatial_extent, date_range, cycles, tracks) +generate_OA_parameters(): list +grid_bbox(binsize): list +make_request(base_url, payload) +parallel_request_OA(): da.array +query_icesat2_filelist(): tuple +request_OA_data(paras): da.array +viz_elevation(): (hv.DynamicMap, hv.Layout) diff --git a/doc/source/user_guide/documentation/classes_user_uml.svg b/doc/source/user_guide/documentation/classes_user_uml.svg index a9c116469..59b8e8e6f 100644 --- a/doc/source/user_guide/documentation/classes_user_uml.svg +++ b/doc/source/user_guide/documentation/classes_user_uml.svg @@ -259,49 +259,50 @@ icepyx.core.variables.Variables - -Variables - -path : NoneType -product : NoneType -wanted : NoneType, dict - -append(defaults, var_list, beam_list, keyword_list) -avail(options, internal) -parse_var_list(varlist, tiered, tiered_vars) -remove(all, var_list, beam_list, keyword_list) + +Variables + +path +product +version +wanted : NoneType, dict + +append(defaults, var_list, beam_list, keyword_list) +avail(options, internal) +parse_var_list(varlist, tiered, tiered_vars) +remove(all, var_list, beam_list, keyword_list) icepyx.core.variables.Variables->icepyx.core.auth.EarthdataAuthMixin - + icepyx.core.variables.Variables->icepyx.core.query.Query - + _order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - + _order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - + _file_vars icepyx.core.variables.Variables->icepyx.core.read.Read - + _read_vars diff --git a/doc/source/user_guide/documentation/components.rst b/doc/source/user_guide/documentation/components.rst index b4b658385..dea41a970 100644 --- a/doc/source/user_guide/documentation/components.rst +++ b/doc/source/user_guide/documentation/components.rst @@ -67,14 +67,6 @@ validate\_inputs :undoc-members: :show-inheritance: -variables ---------- - -.. automodule:: icepyx.core.variables - :members: - :undoc-members: - :show-inheritance: - visualize --------- diff --git a/doc/source/user_guide/documentation/icepyx.rst b/doc/source/user_guide/documentation/icepyx.rst index 56ff7f496..a8a9a6f8e 100644 --- a/doc/source/user_guide/documentation/icepyx.rst +++ b/doc/source/user_guide/documentation/icepyx.rst @@ -23,4 +23,5 @@ Diagrams are updated automatically after a pull request (PR) is approved and bef query read quest + variables components diff --git a/doc/source/user_guide/documentation/packages_user_uml.svg b/doc/source/user_guide/documentation/packages_user_uml.svg index 44a041c77..8d8cf0dc9 100644 --- a/doc/source/user_guide/documentation/packages_user_uml.svg +++ b/doc/source/user_guide/documentation/packages_user_uml.svg @@ -4,11 +4,11 @@ - + packages_user_uml - + icepyx.core @@ -24,14 +24,14 @@ icepyx.core.auth - -icepyx.core.auth + +icepyx.core.auth icepyx.core.exceptions - -icepyx.core.exceptions + +icepyx.core.exceptions @@ -42,14 +42,14 @@ icepyx.core.icesat2data - -icepyx.core.icesat2data + +icepyx.core.icesat2data icepyx.core.is2ref - -icepyx.core.is2ref + +icepyx.core.is2ref @@ -60,8 +60,8 @@ icepyx.core.query->icepyx.core.auth - - + + @@ -96,44 +96,50 @@ icepyx.core.read - -icepyx.core.read + +icepyx.core.read icepyx.core.read->icepyx.core.exceptions - - + + icepyx.core.read->icepyx.core.variables - - + + icepyx.core.spatial - -icepyx.core.spatial + +icepyx.core.spatial icepyx.core.temporal - -icepyx.core.temporal + +icepyx.core.temporal icepyx.core.validate_inputs - -icepyx.core.validate_inputs + +icepyx.core.validate_inputs icepyx.core.variables->icepyx.core.auth - - + + + + + +icepyx.core.variables->icepyx.core.exceptions + + diff --git a/doc/source/user_guide/documentation/variables.rst b/doc/source/user_guide/documentation/variables.rst new file mode 100644 index 000000000..e147bfd64 --- /dev/null +++ b/doc/source/user_guide/documentation/variables.rst @@ -0,0 +1,25 @@ +Variables Class +================= + +.. currentmodule:: icepyx + + +Constructor +----------- + +.. autosummary:: + :toctree: ../../_icepyx/ + + Variables + + +Methods +------- + +.. autosummary:: + :toctree: ../../_icepyx/ + + Variables.avail + Variables.parse_var_list + Variables.append + Variables.remove diff --git a/icepyx/__init__.py b/icepyx/__init__.py index 3d92e2e60..40ea9e1ec 100644 --- a/icepyx/__init__.py +++ b/icepyx/__init__.py @@ -1,5 +1,6 @@ from icepyx.core.query import Query, GenQuery from icepyx.core.read import Read from icepyx.quest.quest import Quest +from icepyx.core.variables import Variables from _icepyx_version import version as __version__ diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py index 5faaef110..a90c8fafa 100644 --- a/icepyx/core/is2ref.py +++ b/icepyx/core/is2ref.py @@ -1,3 +1,4 @@ +import h5py import json import numpy as np import requests @@ -110,7 +111,11 @@ def _get_custom_options(session, product, version): # reformatting formats = [Format.attrib for Format in root.iter("Format")] format_vals = [formats[i]["value"] for i in range(len(formats))] - format_vals.remove("") + try: + format_vals.remove("") + except KeyError: + # ATL23 does not have an empty value + pass cust_options.update({"fileformats": format_vals}) # reprojection only applicable on ICESat-2 L3B products. @@ -324,3 +329,49 @@ def gt2spot(gt, sc_orient): raise ValueError("Could not compute the spot number.") return np.uint8(spot) + +def latest_version(product): + """ + Determine the most recent version available for the given product. + + Examples + -------- + >>> latest_version('ATL03') + '006' + """ + _about_product = about_product(product) + return max( + [entry["version_id"] for entry in _about_product["feed"]["entry"]] + ) + +def extract_product(filepath): + """ + Read the product type from the metadata of the file. Return the product as a string. + """ + with h5py.File(filepath, 'r') as f: + try: + product = f.attrs['short_name'] + if isinstance(product, bytes): + # For most products the short name is stored in a bytes string + product = product.decode() + elif isinstance(product, np.ndarray): + # ATL14 saves the short_name as an array ['ATL14'] + product = product[0] + product = _validate_product(product) + except KeyError: + raise 'Unable to parse the product name from file metadata' + return product + +def extract_version(filepath): + """ + Read the version from the metadata of the file. Return the version as a string. + """ + with h5py.File(filepath, 'r') as f: + try: + version = f['METADATA']['DatasetIdentification'].attrs['VersionID'] + if isinstance(version, np.ndarray): + # ATL14 stores the version as an array ['00x'] + version = version[0] + except KeyError: + raise 'Unable to parse the version from file metadata' + return version diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 3459fd132..8700d5655 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -12,6 +12,7 @@ import icepyx.core.APIformatting as apifmt from icepyx.core.auth import EarthdataAuthMixin import icepyx.core.granules as granules + # QUESTION: why doesn't from granules import Granules work, since granules=icepyx.core.granules? from icepyx.core.granules import Granules import icepyx.core.is2ref as is2ref @@ -432,7 +433,7 @@ def __init__( super().__init__(spatial_extent, date_range, start_time, end_time, **kwargs) - self._version = val.prod_version(self.latest_version(), version) + self._version = val.prod_version(is2ref.latest_version(self._prod), version) # build list of available CMR parameters if reducing by cycle or RGT # or a list of explicitly named files (full or partial names) @@ -448,6 +449,7 @@ def __init__( # initialize authentication properties EarthdataAuthMixin.__init__(self) + # ---------------------------------------------------------------------- # Properties @@ -646,6 +648,27 @@ def subsetparams(self, **kwargs): if self._subsetparams == None and not kwargs: return {} else: + # If the user has supplied a subset list of variables, append the + # icepyx required variables to the Coverage dict + if "Coverage" in kwargs.keys(): + var_list = [ + "orbit_info/sc_orient", + "orbit_info/sc_orient_time", + "ancillary_data/atlas_sdp_gps_epoch", + "orbit_info/cycle_number", + "orbit_info/rgt", + "ancillary_data/data_start_utc", + "ancillary_data/data_end_utc", + "ancillary_data/granule_start_utc", + "ancillary_data/granule_end_utc", + "ancillary_data/start_delta_time", + "ancillary_data/end_delta_time", + ] + # Add any variables from var_list to Coverage that are not already included + for var in var_list: + if var not in kwargs["Coverage"].keys(): + kwargs["Coverage"][var.split("/")[-1]] = [var] + if self._subsetparams == None: self._subsetparams = apifmt.Parameters("subset") if self._spatial._geom_file is not None: @@ -688,17 +711,16 @@ def order_vars(self): # DevGoal: check for active session here if hasattr(self, "_cust_options"): self._order_vars = Variables( - self._source, - auth = self.auth, product=self.product, + version=self._version, avail=self._cust_options["variables"], + auth=self.auth, ) else: self._order_vars = Variables( - self._source, - auth=self.auth, product=self.product, version=self._version, + auth=self.auth, ) # I think this is where property setters come in, and one should be used here? Right now order_vars.avail is only filled in @@ -722,17 +744,18 @@ def file_vars(self): Examples -------- >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28']) # doctest: +SKIP - + >>> reg_a.file_vars # doctest: +SKIP """ if not hasattr(self, "_file_vars"): if self._source == "file": - self._file_vars = Variables(self._source, - auth=self.auth, - product=self.product, - ) + self._file_vars = Variables( + auth=self.auth, + product=self.product, + version=self._version, + ) return self._file_vars @@ -815,6 +838,8 @@ def product_all_info(self): def latest_version(self): """ + A reference function to is2ref.latest_version. + Determine the most recent version available for the given product. Examples @@ -823,11 +848,7 @@ def latest_version(self): >>> reg_a.latest_version() '006' """ - if not hasattr(self, "_about_product"): - self._about_product = is2ref.about_product(self._prod) - return max( - [entry["version_id"] for entry in self._about_product["feed"]["entry"]] - ) + return is2ref.latest_version(self.product) def show_custom_options(self, dictview=False): """ diff --git a/icepyx/core/read.py b/icepyx/core/read.py index 5ef1867f2..b62e5d2fe 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -320,10 +320,10 @@ class Read: # ---------------------------------------------------------------------- # Constructors - + def __init__( self, - data_source=None, + data_source=None, # DevNote: Make this a required arg when catalog is removed product=None, filename_pattern=None, catalog=None, @@ -336,10 +336,9 @@ def __init__( "The `catalog` argument has been deprecated and intake is no longer supported. " "Please use the `data_source` argument to specify your dataset instead." ) - + if data_source is None: raise ValueError("data_source is a required arguemnt") - # Raise warnings for deprecated arguments if filename_pattern: warnings.warn( @@ -380,7 +379,7 @@ def __init__( # Create a dictionary of the products as read from the metadata product_dict = {} for file_ in self._filelist: - product_dict[file_] = self._extract_product(file_) + product_dict[file_] = is2ref.extract_product(file_) # Raise warnings or errors for multiple products or products not matching the user-specified product all_products = list(set(product_dict.values())) @@ -456,12 +455,9 @@ def vars(self): """ if not hasattr(self, "_read_vars"): - self._read_vars = Variables( - "file", path=self.filelist[0], product=self.product - ) - + self._read_vars = Variables(path=self.filelist[0]) return self._read_vars - + @property def filelist(self): """ @@ -478,22 +474,6 @@ def product(self): # ---------------------------------------------------------------------- # Methods - - @staticmethod - def _extract_product(filepath): - """ - Read the product type from the metadata of the file. Return the product as a string. - """ - with h5py.File(filepath, "r") as f: - try: - product = f.attrs["short_name"].decode() - product = is2ref._validate_product(product) - except KeyError: - raise AttributeError( - f"Unable to extract the product name from file metadata." - ) - return product - @staticmethod def _check_source_for_pattern(source, filename_pattern): """ @@ -739,8 +719,33 @@ def load(self): # so to get a combined dataset, we need to keep track of spots under the hood, open each group, and then combine them into one xarray where the spots are IDed somehow (or only the strong ones are returned) # this means we need to get/track from each dataset we open some of the metadata, which we include as mandatory variables when constructing the wanted list + if not self.vars.wanted: + raise AttributeError( + 'No variables listed in self.vars.wanted. Please use the Variables class ' + 'via self.vars to search for desired variables to read and self.vars.append(...) ' + 'to add variables to the wanted variables list.' + ) + + # Append the minimum variables needed for icepyx to merge the datasets + # Skip products which do not contain required variables + if self.product not in ['ATL14', 'ATL15', 'ATL23']: + var_list=[ + "sc_orient", + "atlas_sdp_gps_epoch", + "cycle_number", + "rgt", + "data_start_utc", + "data_end_utc", + ] + + # Adjust the nec_varlist for individual products + if self.product == "ATL11": + var_list.remove("sc_orient") + + self.vars.append(defaults=False, var_list=var_list) + try: - groups_list = list_of_dict_vals(self._read_vars.wanted) + groups_list = list_of_dict_vals(self.vars.wanted) except AttributeError: pass diff --git a/icepyx/core/variables.py b/icepyx/core/variables.py index d46561f46..94645ca94 100644 --- a/icepyx/core/variables.py +++ b/icepyx/core/variables.py @@ -1,9 +1,13 @@ import numpy as np import os import pprint +import warnings from icepyx.core.auth import EarthdataAuthMixin import icepyx.core.is2ref as is2ref +from icepyx.core.exceptions import DeprecationError +import icepyx.core.validate_inputs as val +import icepyx.core as ipxc # DEVGOAL: use h5py to simplify some of these tasks, if possible! @@ -25,11 +29,21 @@ class Variables(EarthdataAuthMixin): contained in ICESat-2 products. Parameters - ---------- + ---------- vartype : string + This argument is deprecated. The vartype will be inferred from data_source. One of ['order', 'file'] to indicate the source of the input variables. This field will be auto-populated when a variable object is created as an attribute of a query object. + path : string, default None + The path to a local Icesat-2 file. The variables list will contain the variables + present in this file. Either path or product are required input arguments. + product : string, default None + Properly formatted string specifying a valid ICESat-2 product. The variables list will + contain all available variables for this product. Either product or path are required + input arguments. + version : string, default None + Properly formatted string specifying a valid version of the ICESat-2 product. avail : dictionary, default None Dictionary (key:values) of available variable names (keys) and paths (values). wanted : dictionary, default None @@ -38,47 +52,72 @@ class Variables(EarthdataAuthMixin): A session object authenticating the user to download data using their Earthdata login information. The session object will automatically be passed from the query object if you have successfully logged in there. - product : string, default None - Properly formatted string specifying a valid ICESat-2 product - version : string, default None - Properly formatted string specifying a valid version of the ICESat-2 product - path : string, default None - For vartype file, a path to a directory of or single input data file (not yet implemented) + """ def __init__( self, - vartype, - avail=None, - wanted=None, + vartype=None, + path=None, product=None, version=None, - path=None, + avail=None, + wanted=None, auth=None, ): - - assert vartype in ["order", "file"], "Please submit a valid variables type flag" + # Deprecation error + if vartype in ['order', 'file']: + raise DeprecationError( + 'It is no longer required to specify the variable type `vartype`. Instead please ', + 'provide either the path to a local file (arg: `path`) or the product you would ', + 'like variables for (arg: `product`).' + ) + + if path and product: + raise TypeError( + 'Please provide either a filepath or a product. If a filepath is provided ', + 'variables will be read from the file. If a product is provided all available ', + 'variables for that product will be returned.' + ) + # Set the product and version from either the input args or the file + if path: + self._path = path + self._product = is2ref.extract_product(self._path) + self._version = is2ref.extract_version(self._path) + elif product: + # Check for valid product string + self._product = is2ref._validate_product(product) + # Check for valid version string + # If version is not specified by the user assume the most recent version + self._version = val.prod_version(is2ref.latest_version(self._product), version) + else: + raise TypeError('Either a filepath or a product need to be given as input arguments.') + # initialize authentication properties EarthdataAuthMixin.__init__(self, auth=auth) - self._vartype = vartype - self.product = product self._avail = avail self.wanted = wanted # DevGoal: put some more/robust checks here to assess validity of inputs - - if self._vartype == "order": - if self._avail == None: - self._version = version - elif self._vartype == "file": - # DevGoal: check that the list or string are valid dir/files - self.path = path - - # @property - # def wanted(self): - # return self._wanted + + @property + def path(self): + if self._path: + path = self._path + else: + path = None + return path + + @property + def product(self): + return self._product + + @property + def version(self): + return self._version + def avail(self, options=False, internal=False): """ @@ -97,16 +136,14 @@ def avail(self, options=False, internal=False): . 'quality_assessment/gt3r/signal_selection_source_fraction_3'] """ - # if hasattr(self, '_avail'): - # return self._avail - # else: + if not hasattr(self, "_avail") or self._avail == None: - if self._vartype == "order": + if not hasattr(self, 'path'): self._avail = is2ref._get_custom_options( - self.session, self.product, self._version + self.session, self.product, self.version )["variables"] - - elif self._vartype == "file": + else: + # If a path was given, use that file to read the variables import h5py self._avail = [] @@ -446,53 +483,14 @@ def append(self, defaults=False, var_list=None, beam_list=None, keyword_list=Non and keyword_list == None ), "You must enter parameters to add to a variable subset list. If you do not want to subset by variable, ensure your is2.subsetparams dictionary does not contain the key 'Coverage'." - req_vars = {} + final_vars = {} - # if not hasattr(self, 'avail') or self.avail==None: self.get_avail() - # vgrp, paths = self.parse_var_list(self.avail) - # allpaths = [] - # [allpaths.extend(np.unique(np.array(paths[p]))) for p in range(len(paths))] vgrp, allpaths = self.avail(options=True, internal=True) - self._check_valid_lists(vgrp, allpaths, var_list, beam_list, keyword_list) - # add the mandatory variables to the data object - if self._vartype == "order": - nec_varlist = [ - "sc_orient", - "sc_orient_time", - "atlas_sdp_gps_epoch", - "data_start_utc", - "data_end_utc", - "granule_start_utc", - "granule_end_utc", - "start_delta_time", - "end_delta_time", - ] - elif self._vartype == "file": - nec_varlist = [ - "sc_orient", - "atlas_sdp_gps_epoch", - "cycle_number", - "rgt", - "data_start_utc", - "data_end_utc", - ] - - # Adjust the nec_varlist for individual products - if self.product == "ATL11": - nec_varlist.remove("sc_orient") - - try: - self._check_valid_lists(vgrp, allpaths, var_list=nec_varlist) - except ValueError: - # Assume gridded product since user input lists were previously validated - nec_varlist = [] - + # Instantiate self.wanted to an empty dictionary if it doesn't exist if not hasattr(self, "wanted") or self.wanted == None: - for varid in nec_varlist: - req_vars[varid] = vgrp[varid] - self.wanted = req_vars + self.wanted = {} # DEVGOAL: add a secondary var list to include uncertainty/error information for lower level data if specific data variables have been specified... @@ -501,21 +499,21 @@ def append(self, defaults=False, var_list=None, beam_list=None, keyword_list=Non # Case only variables (but not keywords or beams) are specified if beam_list == None and keyword_list == None: - req_vars.update(self._iter_vars(sum_varlist, req_vars, vgrp)) + final_vars.update(self._iter_vars(sum_varlist, final_vars, vgrp)) # Case a beam and/or keyword list is specified (with or without variables) else: - req_vars.update( - self._iter_paths(sum_varlist, req_vars, vgrp, beam_list, keyword_list) + final_vars.update( + self._iter_paths(sum_varlist, final_vars, vgrp, beam_list, keyword_list) ) # update the data object variables - for vkey in req_vars.keys(): + for vkey in final_vars.keys(): # add all matching keys and paths for new variables if vkey not in self.wanted.keys(): - self.wanted[vkey] = req_vars[vkey] + self.wanted[vkey] = final_vars[vkey] else: - for vpath in req_vars[vkey]: + for vpath in final_vars[vkey]: if vpath not in self.wanted[vkey]: self.wanted[vkey].append(vpath)