From ed422f6238fea2e6fa84c0267747a08b4e6c02f6 Mon Sep 17 00:00:00 2001
From: Jacob Mims <122570226+jtmims@users.noreply.github.com>
Date: Wed, 18 Sep 2024 11:37:09 -0500
Subject: [PATCH] Container Documentation (#687)

* Create container_config_demo.jsonc

* Create container_cat.csv

* Create container_cat.json

* Update container_config_demo.jsonc

* docs

* Update ref_container.rst

* Update ref_container.rst

* Update ref_container.rst

* Update ref_container.rst

* Update ref_container.rst

* Update dev_start.rst

* Update ref_container.rst

* Update dev_start.rst

* Update ref_container.rst

* Update doc/sphinx/dev_start.rst

Co-authored-by: Jess <20195932+wrongkindofdoctor@users.noreply.github.com>

* Update doc/sphinx/ref_container.rst

Co-authored-by: Jess <20195932+wrongkindofdoctor@users.noreply.github.com>

* Update doc/sphinx/ref_container.rst

Co-authored-by: Jess <20195932+wrongkindofdoctor@users.noreply.github.com>

* Update doc/sphinx/ref_container.rst

Co-authored-by: Jess <20195932+wrongkindofdoctor@users.noreply.github.com>

* Update doc/sphinx/dev_start.rst

Co-authored-by: Jess <20195932+wrongkindofdoctor@users.noreply.github.com>

---------

Co-authored-by: Jess <20195932+wrongkindofdoctor@users.noreply.github.com>
---
 .../example_multicase/container_cat.csv       |   3 +
 .../example_multicase/container_cat.json      | 181 ++++++++++++++++++
 .../container_config_demo.jsonc               | 117 +++++++++++
 doc/sphinx/dev_start.rst                      |   4 +
 doc/sphinx/ref_container.rst                  |  89 +++++++++
 doc/sphinx/ref_toc.rst                        |   1 +
 6 files changed, 395 insertions(+)
 create mode 100644 diagnostics/example_multicase/container_cat.csv
 create mode 100644 diagnostics/example_multicase/container_cat.json
 create mode 100644 diagnostics/example_multicase/container_config_demo.jsonc
 create mode 100644 doc/sphinx/ref_container.rst

diff --git a/diagnostics/example_multicase/container_cat.csv b/diagnostics/example_multicase/container_cat.csv
new file mode 100644
index 000000000..d3db31f86
--- /dev/null
+++ b/diagnostics/example_multicase/container_cat.csv
@@ -0,0 +1,3 @@
+activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version
+CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/proj/MDTF-diagnostics/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none
+CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/proj/MDTF-diagnostics/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none
diff --git a/diagnostics/example_multicase/container_cat.json b/diagnostics/example_multicase/container_cat.json
new file mode 100644
index 000000000..6917f854e
--- /dev/null
+++ b/diagnostics/example_multicase/container_cat.json
@@ -0,0 +1,181 @@
+{
+  "esmcat_version": "0.0.1",
+  "attributes": [
+    {
+      "column_name": "activity_id",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "branch_method",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "branch_time_in_child",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "branch_time_in_parent",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "experiment",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "experiment_id",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "frequency",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "grid",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "grid_label",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "institution_id",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "nominal_resolution",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "parent_activity_id",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "parent_experiment_id",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "parent_source_id",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "parent_time_units",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "parent_variant_label",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "product",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "realm",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "source_id",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "source_type",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "sub_experiment",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "sub_experiment_id",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "table_id",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "variable_id",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "variant_label",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "member_id",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "standard_name",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "long_name",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "units",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "vertical_levels",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "init_year",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "start_time",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "end_time",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "time_range",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "path",
+      "vocabulary": ""
+    },
+    {
+      "column_name": "version",
+      "vocabulary": ""
+    }
+  ],
+  "assets": {
+    "column_name": "path",
+    "format": "netcdf",
+    "format_column_name": null
+  },
+  "aggregation_control": {
+    "variable_column_name": "variable_id",
+    "groupby_attrs": [
+      "activity_id",
+      "institution_id",
+      "source_id",
+      "experiment_id",
+      "frequency",
+      "member_id",
+      "table_id",
+      "grid_label",
+      "realm",
+      "variant_label"
+    ],
+    "aggregations": [
+      {
+        "type": "union",
+        "attribute_name": "variable_id",
+        "options": {}
+      }
+    ]
+  },
+  "id": "esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv",
+  "description": null,
+  "title": null,
+  "last_updated": "2023-06-01",
+  "catalog_file": "file:/proj/MDTF-diagnostics/diagnostics/example_multicase/container_cat.csv"
+}
diff --git a/diagnostics/example_multicase/container_config_demo.jsonc b/diagnostics/example_multicase/container_config_demo.jsonc
new file mode 100644
index 000000000..081896643
--- /dev/null
+++ b/diagnostics/example_multicase/container_config_demo.jsonc
@@ -0,0 +1,117 @@
+// This a template for configuring MDTF to run PODs that analyze multi-run/ensemble data
+//
+// Copy this file, rename it, and customize the settings as needed
+// Pass your file to the framework using the -f/--input-file flag.
+// Any other explicit command line options will override what's listed here.
+//
+// All text to the right of an unquoted "//" is a comment and ignored, as well
+// as blank lines (JSONC quasi-standard.)
+//
+// Remove your test config file, or any changes you make to this template if you do not rename it,
+// from your remote repository before you submit a PR for review.
+// To generate CMIP synthetic data in the example dataset, run the following:
+// > mamba env create --force -q -f ./src/conda/_env_synthetic_data.yml
+// > conda activate _MDTF_synthetic_data
+// > pip install mdtf-test-data
+// > cd <root directory>/mdtf
+// > mkdir mdtf_test_data && cd mdtf_test_data
+// > mdtf_synthetic.py -c CMIP --startyear 1980 --nyears 5
+// > mdtf_synthetic.py -c CMIP --startyear 1985 --nyears 5
+// Note that MODEL_DATA_ROOT assumes that mdtf_test_data is one directory above MDTF-diagnostics
+// in this sample config file
+{
+  // Run each ensemble on the example POD.
+  // Add other PODs that work on ensemble datasets to the pod_list as needed
+  "pod_list" : [
+      //"example"
+     "example_multicase"
+   ],
+   // Each case corresponds to a different simulation/output dataset
+   // startdate, enddate: either YYYY-MM-DD, YYYYMMDD:HHMMSS, or YYYY-MM-DD:HHMMSS
+   "case_list":
+    {
+      "CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231":
+        {
+          "model": "test",
+          "convention": "CMIP",
+          "startdate": "19800101",
+          "enddate": "19841231"
+        }
+      ,
+      "CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231":
+        {
+          "model": "test",
+          "convention": "CMIP",
+          "startdate": "19850101",
+          "enddate": "19891231"
+        }
+    },
+  // PATHS ---------------------------------------------------------------------
+  // Location of supporting data downloaded when the framework was installed.
+  // If a relative path is given, it's resolved relative to the MDTF-diagnostics
+  // code directory. Environment variables (eg, $HOME) can be referenced with a
+  // "$" and will be expended to their current values when the framework runs.
+  // Full or relative path to model data ESM-intake catalog header file
+
+  "DATA_CATALOG": "/proj/MDTF-diagnostics/diagnostics/example_multicase/container_cat.json",
+
+  // Parent directory containing observational data used by individual PODs.
+  "OBS_DATA_ROOT": "../inputdata/obs_data",
+
+  // Working directory.
+  "WORK_DIR": "/proj/wkdir",
+
+  // Directory to write output. The results of each run of the framework will be
+  // put in a subdirectory of this directory. Defaults to WORKING_DIR if blank.
+  "OUTPUT_DIR": "/proj/wkdir",
+
+  // Location of the Anaconda/miniconda or micromamba installation to use for managing
+  // dependencies (path returned by running `conda info --base` or `micromamba info`.)
+  "conda_root": "/opt/conda",
+
+  // Directory containing the framework-specific conda environments. This should
+  // be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left
+  // blank, the framework will look for its environments in conda_root/envs
+  "conda_env_root": "/opt/conda/envs",
+
+  // Location of the micromamba executable. Required if using micromamba
+  "micromamba_exe": "/bin/micromaba",
+
+  // SETTINGS ------------------------------------------------------------------
+  // Any command-line option recognized by the mdtf script (type `mdtf --help`)
+  // can be set here, in the form "flag name": "desired setting".
+
+  // Settings affecting what output is generated:
+  // Set to true to run the preprocessor; default true:
+  "run_pp": true,
+  // Set to true to perform data translation; default false:
+  "translate_data": true,
+  // Set to true to have PODs save postscript figures in addition to bitmaps.
+  "save_ps": false,
+
+  // Set to true for files > 4 GB
+  "large_file": false,
+
+  // If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs
+  // run to completion
+  "save_pp_data": true,
+
+  // Set to true to save HTML and bitmap plots in a .tar file.
+  "make_variab_tar": false,
+
+  // Generate html output for multiple figures per case
+  "make_multicase_figure_html": false,
+
+  // Set to true to overwrite results in OUTPUT_DIR; otherwise results saved
+  // under a unique name.
+  "overwrite": false,
+
+  // List with custom preprocessing script(s) to run on data
+  // Place these scripts in the user_scripts directory of your copy of the MDTF-diagnostics repository
+  "user_pp_scripts" : [],
+
+  // Settings used in debugging:
+
+  // Log verbosity level.
+  "verbose": 1
+}
diff --git a/doc/sphinx/dev_start.rst b/doc/sphinx/dev_start.rst
index cb2be8c03..f6464a26b 100644
--- a/doc/sphinx/dev_start.rst
+++ b/doc/sphinx/dev_start.rst
@@ -26,6 +26,10 @@ Developers may download the code from GitHub as described in :ref:`ref-download`
 clone the repo in order to keep up with changes in the main branch, and to simplify submitting pull requests with your
 POD's code. Instructions for how to do this are given in :doc:`dev_git_intro`.
 
+Users may also install and run the MDTF-diagnostics Docker container that includes pre-built base, python3_base, and synthetic_data Conda environments (NCL is not compatible with Docker).
+Further details can be found in :doc:`ref_container`. The container is a new addition.
+Beta Testers are very much welcome!
+
 Installing dependencies with Conda
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/doc/sphinx/ref_container.rst b/doc/sphinx/ref_container.rst
new file mode 100644
index 000000000..f7efbb428
--- /dev/null
+++ b/doc/sphinx/ref_container.rst
@@ -0,0 +1,89 @@
+.. role:: code-rst(code)
+   :language: reStructuredText
+.. _ref-container:
+Container Reference
+===============================
+This section provides basic directions for downloading,
+installing, and running the example_multicase POD in the
+Model Diagnostics Task Force (MDTF) container.
+
+Getting the Container
+-------------------------------
+The container assumes that the MDTF-diangnostics GitHub repo is located on your local machine.
+If you have not already, please clone the repo to your local machine with:
+
+   .. code-block:: bash
+
+      git clone https://github.com/NOAA-GFDL/MDTF-diagnostics.git
+
+The container can then be pulled from the GitHub
+container registry with the command:
+
+   .. code-block:: bash
+
+      docker pull ghcr.io/noaa-gfdl/mdtf-diagnostics:container
+
+or with the equivalent command in your container software.
+If you do not have a container software, Docker can be downloaded from `here <https://docs.docker.com/desktop/>`_.
+
+Launching the Container
+-------------------------------
+The container itself can be launched with Docker using:
+
+   .. code-block:: bash
+
+      docker run -it -v {DIAG_DIR}:/proj/MDTF-diagnostics/diagnostics/ -v {WKDIR}:/proj/wkdir mdtf
+
+wherein:
+   * :code-rst:`{DIAG_DIR}` is the path to the diagnostics directory on your local machine.
+     This volume is not required, but heavily recommended.
+   * :code-rst:`{WKDIR}` is where you would like to store the output on your local machine.
+     This allows the output HTML to be reachable without having to open a port to the container.
+
+These happen to be the only required volumes. Further volumes may need to be mounted including volumes such as data storage.
+
+Generating Synthetic Data
+-------------------------------
+Now that we are in the container, we can create some data to run the POD with.
+The MDTF has a synthetic data generator for just this case. First, move into the MDTF-diagnostics dir:
+
+   .. code-block:: bash
+
+      cd /proj/MDTF-diagnostics/
+
+We generate our synthetic data by running:
+
+   .. code-block:: bash
+
+      micromamba activate _MDTF_synthetic_data
+      pip install mdtf-test-data
+      mkdir mdtf_test_data && cd mdtf_test_data
+      mdtf_synthetic.py -c CMIP --startyear 1980 --nyears 5
+      mdtf_synthetic.py -c CMIP --startyear 1985 --nyears 5
+
+Now would be a good time to generate a catalog for the synthetic data, but, in the sake
+of testing, we provide a catalog for the files needed to run the example POD.
+
+Running the POD
+-------------------------------
+The POD can now be ran using:
+
+   .. code-block:: bash
+
+      micromamba activate _MDTF_base
+      mdtf_framework.py -f /proj/MDTF-diagnostics/diagnostics/example_multicase/container_config_demo.jsonc
+
+The results can be found in :code-rst:`/proj/wkdir/`
+
+Building the Container
+--------------------------------
+If you would like, you can build the container using the Dockerfile found in the GitHub repo.
+If using podman (as required internally at the GFDL),
+please build with the command:
+
+   .. code-block:: bash
+
+      podman build . --format docker -t mdtf
+
+:code-rst:`--format docker` is essential to have your copy commands work and
+have the expected permissions in your container.
diff --git a/doc/sphinx/ref_toc.rst b/doc/sphinx/ref_toc.rst
index 13188ccb3..eea25a836 100644
--- a/doc/sphinx/ref_toc.rst
+++ b/doc/sphinx/ref_toc.rst
@@ -11,3 +11,4 @@ Framework reference
    ref_envvars
    ref_output
    ref_submodules
+   ref_container