From 6b2dd9e320efd642d6b4106a0eca6d478156b0e3 Mon Sep 17 00:00:00 2001 From: Gabriele Bozzola Date: Mon, 1 Apr 2024 10:06:22 -0700 Subject: [PATCH] ClimaDiagnostics 0.0.1 It is a good early draft --- .github/dependabot.yml | 7 + .github/workflows/CI.yml | 41 ++ .github/workflows/CompatHelper.yml | 17 + .github/workflows/Documentation.yml | 28 ++ .github/workflows/TagBot.yml | 14 + Manifest.toml | 76 ++- Project.toml | 11 +- README.md | 18 + docs/src/index.md | 4 +- docs/src/internals.md | 40 ++ src/AbstractTypes.jl | 15 + src/Callbacks.jl | 195 +++++++- src/ClimaDiagnostics.jl | 10 +- src/DiagnosticVariables.jl | 163 +++++++ src/Diagnostics.jl | 711 ---------------------------- src/ScheduledDiagnostics.jl | 603 +++++++++++++++++++++++ src/Writers.jl | 4 + src/clima_diagnostics.jl | 279 +++++++++++ src/diagnostics_utils.jl | 128 ----- src/dict_writer.jl | 28 ++ src/hdf5_writer.jl | 31 +- src/netcdf_writer.jl | 31 +- src/utils.jl | 42 ++ test/TestTools.jl | 51 ++ test/callback.jl | 95 ++++ test/diagnostic_variable.jl | 25 + test/diagnostics.jl | 202 ++++++++ test/doctest.jl | 6 + test/runtests.jl | 7 + test/writers.jl | 15 + 30 files changed, 1997 insertions(+), 900 deletions(-) create mode 100644 .github/dependabot.yml create mode 100644 .github/workflows/CI.yml create mode 100644 .github/workflows/CompatHelper.yml create mode 100644 .github/workflows/Documentation.yml create mode 100644 .github/workflows/TagBot.yml create mode 100644 docs/src/internals.md create mode 100644 src/AbstractTypes.jl create mode 100644 src/DiagnosticVariables.jl delete mode 100644 src/Diagnostics.jl create mode 100644 src/ScheduledDiagnostics.jl create mode 100644 src/clima_diagnostics.jl delete mode 100644 src/diagnostics_utils.jl create mode 100644 src/dict_writer.jl create mode 100644 src/utils.jl create mode 100644 test/TestTools.jl create mode 100644 test/callback.jl create mode 100644 test/diagnostic_variable.jl create mode 100644 test/diagnostics.jl create mode 100644 test/doctest.jl create mode 100644 test/writers.jl diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..700707ce --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,7 @@ +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml new file mode 100644 index 00000000..c8c82a48 --- /dev/null +++ b/.github/workflows/CI.yml @@ -0,0 +1,41 @@ +name: CI +on: + pull_request: + push: + branches: + - main + tags: '*' + +# Needed to allow julia-actions/cache to delete old caches that it has created +permissions: + actions: write + contents: read + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + climadiagnostics: + runs-on: ubuntu-latest + strategy: + matrix: + version: + - '1.9' + - '1.10' + timeout-minutes: 30 + steps: + - name: Checkout + uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@latest + with: + version: ${{ matrix.version }} + - uses: julia-actions/cache@v1 + - uses: julia-actions/julia-buildpkg@latest + - uses: julia-actions/julia-runtest@latest + - uses: julia-actions/julia-processcoverage@latest + - uses: codecov/codecov-action@v4 + with: + files: lcov.info + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml new file mode 100644 index 00000000..42680abf --- /dev/null +++ b/.github/workflows/CompatHelper.yml @@ -0,0 +1,17 @@ +name: CompatHelper + +on: + schedule: + - cron: '00 00 * * *' + +jobs: + CompatHelper: + runs-on: ubuntu-latest + steps: + - uses: julia-actions/setup-julia@latest + - name: Pkg.add("CompatHelper") + run: julia -e 'using Pkg; Pkg.add("CompatHelper")' + - name: CompatHelper.main() + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: julia -e 'using CompatHelper; CompatHelper.main()' diff --git a/.github/workflows/Documentation.yml b/.github/workflows/Documentation.yml new file mode 100644 index 00000000..89ad8a17 --- /dev/null +++ b/.github/workflows/Documentation.yml @@ -0,0 +1,28 @@ +name: Documentation + +on: + push: + branches: + - main + pull_request: + tags: '*' + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@latest + with: + version: 1 + - name: Install dependencies + run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' + - name: Build and deploy + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} # For authentication with SSH deploy key + run: julia --project=docs/ docs/make.jl diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml new file mode 100644 index 00000000..fd81cc1a --- /dev/null +++ b/.github/workflows/TagBot.yml @@ -0,0 +1,14 @@ +name: TagBot +on: + issue_comment: + types: + - created +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/Manifest.toml b/Manifest.toml index eb7e4853..daeccbd7 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,8 +1,13 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.10.0" +julia_version = "1.10.2" manifest_format = "2.0" -project_hash = "cdeef9b141080e066a2723370b0023ce6ad799b1" +project_hash = "5483fa0b563f8745422df8ee3f196ad3888c38e9" + +[[deps.ADTypes]] +git-tree-sha1 = "016833eb52ba2d6bea9fcb50ca295980e728ee24" +uuid = "47edcb42-4c32-4615-8424-f2b9edc5f35b" +version = "0.2.7" [[deps.AbstractFFTs]] deps = ["LinearAlgebra"] @@ -218,6 +223,11 @@ git-tree-sha1 = "d7d7b58e149f19c322840a50d1bc20e8c23addb4" uuid = "1fbeeb36-5f17-413c-809b-666fb144f157" version = "0.3.5" +[[deps.CommonSolve]] +git-tree-sha1 = "0eee5eb66b1cf62cd6ad1b460238e60e4b09400c" +uuid = "38540f10-b2f7-11e9-35d8-d573e4eb0ff2" +version = "0.2.4" + [[deps.CommonSubexpressions]] deps = ["MacroTools", "Test"] git-tree-sha1 = "7b8a93dba8af7e3b42fecabf646260105ac373f7" @@ -237,7 +247,7 @@ weakdeps = ["Dates", "LinearAlgebra"] [[deps.CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.0.5+1" +version = "1.1.0+0" [[deps.CompositionsBase]] git-tree-sha1 = "802bb88cd69dfd1509f6670416bd4434015693ad" @@ -334,6 +344,11 @@ git-tree-sha1 = "71c79e77221ab3a29918aaf6db4f217b89138608" uuid = "b305315f-e792-5b7a-8f41-49f472929428" version = "1.0.1" +[[deps.EnumX]] +git-tree-sha1 = "bdb1942cd4c45e3c678fd11569d5cccd80976237" +uuid = "4e289a0a-7415-4d19-859d-a7e5c4648b56" +version = "1.0.4" + [[deps.ExprTools]] git-tree-sha1 = "27415f162e6028e81c72b82ef756bf321213b6ec" uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" @@ -386,6 +401,17 @@ weakdeps = ["StaticArrays"] [deps.ForwardDiff.extensions] ForwardDiffStaticArraysExt = "StaticArrays" +[[deps.FunctionWrappers]] +git-tree-sha1 = "d62485945ce5ae9c0c48f124a84998d755bae00e" +uuid = "069b7b12-0de2-55c6-9aab-29f3d0a68a2e" +version = "1.1.3" + +[[deps.FunctionWrappersWrappers]] +deps = ["FunctionWrappers"] +git-tree-sha1 = "b104d487b34566608f8b4e1c39fb0b10aa279ff8" +uuid = "77dc65aa-8811-40c2-897b-53d922fa7daf" +version = "0.1.3" + [[deps.Future]] deps = ["Random"] uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" @@ -764,7 +790,7 @@ weakdeps = ["Adapt"] [[deps.OpenBLAS_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" -version = "0.3.23+2" +version = "0.3.23+4" [[deps.OpenLibm_jll]] deps = ["Artifacts", "Libdl"] @@ -928,6 +954,42 @@ version = "0.5.12" uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" version = "0.7.0" +[[deps.SciMLBase]] +deps = ["ADTypes", "ArrayInterface", "CommonSolve", "ConstructionBase", "Distributed", "DocStringExtensions", "EnumX", "FunctionWrappersWrappers", "IteratorInterfaceExtensions", "LinearAlgebra", "Logging", "Markdown", "PrecompileTools", "Preferences", "Printf", "RecipesBase", "RecursiveArrayTools", "Reexport", "RuntimeGeneratedFunctions", "SciMLOperators", "SciMLStructures", "StaticArraysCore", "Statistics", "SymbolicIndexingInterface", "Tables"] +git-tree-sha1 = "d15c65e25615272e1b1c5edb1d307484c7942824" +uuid = "0bca4576-84f4-4d90-8ffe-ffa030f20462" +version = "2.31.0" + + [deps.SciMLBase.extensions] + SciMLBaseChainRulesCoreExt = "ChainRulesCore" + SciMLBaseMakieExt = "Makie" + SciMLBasePartialFunctionsExt = "PartialFunctions" + SciMLBasePyCallExt = "PyCall" + SciMLBasePythonCallExt = "PythonCall" + SciMLBaseRCallExt = "RCall" + SciMLBaseZygoteExt = "Zygote" + + [deps.SciMLBase.weakdeps] + ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2" + ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" + Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" + PartialFunctions = "570af359-4316-4cb7-8c74-252c00c2016b" + PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0" + PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d" + RCall = "6f49c342-dc21-5d91-9882-a32aef131414" + Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[[deps.SciMLOperators]] +deps = ["ArrayInterface", "DocStringExtensions", "LinearAlgebra", "MacroTools", "Setfield", "SparseArrays", "StaticArraysCore"] +git-tree-sha1 = "10499f619ef6e890f3f4a38914481cc868689cd5" +uuid = "c0aeaf25-5076-4817-a8d5-81caf7dfa961" +version = "0.3.8" + +[[deps.SciMLStructures]] +git-tree-sha1 = "5833c10ce83d690c124beedfe5f621b50b02ba4d" +uuid = "53ae85a6-f571-4167-b2af-e1d143709226" +version = "1.1.0" + [[deps.Scratch]] deps = ["Dates"] git-tree-sha1 = "3bac05bc7e74a75fd9cba4295cde4045d9fe2386" @@ -943,6 +1005,12 @@ version = "1.4.1" [[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +[[deps.Setfield]] +deps = ["ConstructionBase", "Future", "MacroTools", "StaticArraysCore"] +git-tree-sha1 = "e2cc6d8c88613c05e1defb55170bf5ff211fbeac" +uuid = "efcf1570-3423-57d1-acb7-fd33fddbac46" +version = "1.1.1" + [[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" diff --git a/Project.toml b/Project.toml index e44eecc9..a204c30b 100644 --- a/Project.toml +++ b/Project.toml @@ -1,25 +1,34 @@ name = "ClimaDiagnostics" uuid = "1ecacbb8-0713-4841-9a07-eb5aa8a2d53f" authors = ["Gabriele Bozzola "] -version = "0.1.0" +version = "0.0.1" [deps] ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" +SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" [compat] Aqua = "0.8" ClimaComms = "0.5" ClimaCore = "0.13" +ClimaTimeSteppers = "0.7" +Documenter = "1" JuliaFormatter = "1" NCDatasets = "0.13, 0.14" SafeTestsets = "0.1" +SciMLBase = "1, 2" Test = "1" julia = "1.9" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Aqua", "ClimaTimeSteppers", "Documenter", "JuliaFormatter", "SafeTestsets", "Test"] diff --git a/README.md b/README.md index be6f6e80..ca9e015b 100644 --- a/README.md +++ b/README.md @@ -2,3 +2,21 @@
ClimaDiagnostics.jl + +`ClimaDiagnostics.jl` provides a simple framework to add diagnostics to `CliMA` +simulations. + +`ClimaDiagnostics.jl`defined two important concepts: +- `DiagnosticVariable`: A recipe to compute a diagnostic from the integrator + alongside with names, units, comments. +- `ScheduledDiagnostic`: When to compute and output the `DiagnosticVariable` and + what type of accumulation to perform. + +To add the diagnostics to a simulation from a list of `ScheduledDiagnostic`s, +one first needs to initialize them with `diagnostic_handler = +DiagnosticHandler(diangostics, Y, p, t; dt)`, and finally add the +`DiagnosticsCallback(diagnostic_handler)` callback to the integrator. + +> :warning: README under construction. While we work on the README, you can find +> all the relevant information in the +> [documentation](https://clima.github.io/ClimaDiagnostics.jl/dev/). diff --git a/docs/src/index.md b/docs/src/index.md index 05410d62..3aeff406 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,4 +1,6 @@ # `ClimaDiagnostics.jl` -`ClimaDiagnostics.jl` +`ClimaDiagnostics.jl` + + diff --git a/docs/src/internals.md b/docs/src/internals.md new file mode 100644 index 00000000..fc235845 --- /dev/null +++ b/docs/src/internals.md @@ -0,0 +1,40 @@ + +## Accumulation + +Several diagnostics require performing reductions, such as taking the maximum or +the average. Since it is not feasible to store all the lists of all the +intermediate values, we aggregate the results in specific storage areas (e.g., +we take `max(max(max(max(t1, t2), t3), t4), t5)` instead of `max(t1, t2, t3, t4, +t5)` In this, it is convenient to preallocate the space where we want to +accumulate the intermediate. + +Accumulation is accomplished by the `accumulate!` function. All this function +does is applying the binary `reduction_time_func` to the previous accumulated +value and the newly computed one and store the output to the accumulator. + + + + +After an accumulated variable is output, the accumulator is reset to its natural +state. This is achieved with the `reset_accumulator!` function. + + + +However, we have to fill the space with something +that does not affect the reduction. This, by definition, is the identity of the +operation. The identity of the operation `+` is `0` because `x + 0 = x` for +every `x`. + +We have to know the identity for every operation we want to support. Of course, +users are welcome to define their own by adding new methods to +identity_of_reduction. + +For instance, to define the identity of the reduction `-`, one would write +```julia +function ClimaDiagnostics.Diagnostics.identity_of_reduction(::typeof(-)) + return 0 +end +``` +(Or add this to the `reduction_identities.jl` file.) + + diff --git a/src/AbstractTypes.jl b/src/AbstractTypes.jl new file mode 100644 index 00000000..094a7e7f --- /dev/null +++ b/src/AbstractTypes.jl @@ -0,0 +1,15 @@ +# Abstract types needed across submodules + +""" + AbstractWriter + +An object that knows how to save some output. + +`AbstractWriter`s have to provide one function, `write_field!` + +The function has to have singature +`write_field!(writer::Writer, field, diagnostic::ScheduledDiagnostic, u, p, t)` + +It is up to the writer to implment this +""" +abstract type AbstractWriter end diff --git a/src/Callbacks.jl b/src/Callbacks.jl index cd4270bc..fea99ab5 100644 --- a/src/Callbacks.jl +++ b/src/Callbacks.jl @@ -1,6 +1,9 @@ -# Callback.jl module Callbacks +import ..seconds_to_str_short, ..seconds_to_str_long + +import SciMLBase + """ Callback @@ -12,38 +15,202 @@ struct Callback{FUNC <: Function, SCHEDULE} """Function to be called. It has to take one argument, the integrator.""" callback_func::FUNC - """Boolean function (or, more often, a callable struct) that determines whether - `callback_func` should be called or not. It has to take one argument, the integrator. - Most typically, only `integrator.t` or `integrator.step` are used.""" + """Boolean function (or, more often, a callable struct, e.g., an `AbstractSchedule`) that + determines whether `callback_func` should be called or not. It has to take one argument, + the integrator. Most typically, only `integrator.t` or `integrator.step` are used.""" schedule_func::SCHEDULE end +""" + orchestrate_diagnostics(integrator, callbacks) + +Loop over all the `callbacks`, for each, check it the condition to trigger the call is met +(by calling `callback.schedule_func(integrator)`). If yes, call +`callback.callback_func(integrator)`. + +`callbacks` has to be a container of `Callback`s. +""" +function orchestrate_callbacks(integrator, callbacks) + for callback in callbacks + if callback.schedule_func(integrator) + callback.callback_func(integrator) + end + end + return nothing +end + +""" + CallbackOrchestrator(callbacks) + +Return a `SciMLBase.Callback` that executes the diagnostic callbacks specified by +`diagnostics_callbacks` according to their schedules. + +`callbacks` has to be a container of `Callback`s. +""" +function CallbackOrchestrator(callbacks) + sciml_callback(integrator) = orchestrate_callbacks(integrator, callbacks) + + # SciMLBase.DiscreteCallback checks if the given condition is true at the end of each + # step. So, we set a condition that is always true, the callback is called at the end of + # every step. This callback runs `orchestrate_callbacks`, which manages which + # diagnostics functions to call + condition = (_, _, _) -> true + + return SciMLBase.DiscreteCallback(condition, sciml_callback) +end + ############# # Schedules # ############# +""" + AbstractSchedule + +`AbstractSchedule`s are structs that behave like functions and are used for the purpose of +defining a schedule to be used in `Callback`. They also may contain additional information. +""" +abstract type AbstractSchedule end + +""" + short_name(schedule) + +Short of name of the given `schedule`. Typically used in names of files/datasets. +""" +function short_name end + +""" + long_name(schedule) + +Long of name of the given `schedule`. Typically used in attributes. +""" +function long_name end + +function Base.show(io::IO, schedule::AbstractSchedule) + # This function is used in names of files/datasets + print(io, short_name(schedule)) +end """ DivisorSchedule -True when the iteration number is evenly divisible by a given number (this is roughly -equivalent to: "run this call back every N steps"). +True when the iteration number is evenly divisible by a given number. + +This is roughly equivalent to: "run this call back every N steps", with the difference that +no initial offset is possible. """ -struct DivisorSchedule +struct DivisorSchedule <: AbstractSchedule """Return true when the step number is divided evenly by this number (ie, step % divisor == 0) """ divisor::Int +end - """String that can be used to identify this schedule when saving files/datasets.""" - filename_str::String +""" + DivisorSchedule(integrator) + +Returns true if `integrator.step` is evenly divided by the divisor. +""" +function (schedule::DivisorSchedule)(integrator) + return rem(integrator.step, schedule.divisor) == 0 +end + +""" + short_name(schedule::DivisorSchedule) - function DivisorSchedule(divisor::Int) - filename_str = string(divisor) - new(divisor, filename_str) +Short name of the given `schedule`. Typically used in names of files/datasets. + +By default, the name of this schedule is `it`, with `` the value. +""" +function short_name(schedule::DivisorSchedule) + return "$(schedule.divisor)it" +end + +""" + long_name(schedule::DivisorSchedule) + +Long name of the given `schedule`. Typically used in attributes. + +By default, the name of this schedule is "every iterations" (even this +is not technically correct...). +""" +function long_name(schedule::DivisorSchedule) + return "every $(schedule.divisor) iterations" +end + +""" + EveryStepSchedule() + +Return a schedule that executes at the end of every step. +""" +function EveryStepSchedule() + return DivisorSchedule(1) +end + +""" + EveryDtSchedule + +True every time the current time is larger than the previous time this schedule was true + Dt. + +Note, this function performs no checks on whether the step is aligned with `dt` or not. +""" +struct EveryDtSchedule{T} <: AbstractSchedule + """The integrator time the last time this function returned true.""" + t_last::Ref{T} + + """The interval of time needed to elapse for the next time that this function will + return true.""" + dt::T + + """ + EveryDtSchedule(dt; t_start = zero(dt)) + + True every time the current time is larger than the previous time this schedule was true + dt. + """ + function EveryDtSchedule(dt; t_start = zero(dt)) + new{typeof(dt)}(Ref(t_start), dt) end end -function (schedule::DivisorSchedule)(integrator) - return rem(integrator.step, schedule.divisor) == 0 +""" + EveryDtSchedule(integrator) + +Returns true if `integrator.step` is evenly divided by the divisor. +""" +function (schedule::EveryDtSchedule)(integrator) + next_t = schedule.t_last[] + schedule.dt + # Dealing with floating point precision... + if integrator.t > next_t || integrator.t ≈ next_t + schedule.t_last[] = integrator.t + return true + else + return false + end +end + +""" + short_name(schedule::EveryDtSchedule) + +Short of name of the given `schedule`. Typically used in names of files/datasets. + +By default, the name of this schedule is the value converted into DDd_HHh_MMm_SSs. + +Note: + +This assumes that units are seconds. +""" +function short_name(schedule::EveryDtSchedule) + return seconds_to_str_short(schedule.dt) +end + +""" + long_name(schedule::EveryDtSchedule) + +Short of name of the given `schedule`. Typically used in attributes. + +Note: + +This assumes that units are seconds. +""" +function long_name(schedule::EveryDtSchedule) + return seconds_to_str_long(schedule.dt) end end diff --git a/src/ClimaDiagnostics.jl b/src/ClimaDiagnostics.jl index eae57730..db24feca 100644 --- a/src/ClimaDiagnostics.jl +++ b/src/ClimaDiagnostics.jl @@ -1,7 +1,15 @@ module ClimaDiagnostics +include("AbstractTypes.jl") + +include("utils.jl") include("Callbacks.jl") +include("DiagnosticVariables.jl") +import .DiagnosticVariables: DiagnosticVariable, average_pre_output_hook! +include("ScheduledDiagnostics.jl") +import .ScheduledDiagnostics: ScheduledDiagnostic include("Writers.jl") -include("Diagnostics.jl") + +include("clima_diagnostics.jl") end diff --git a/src/DiagnosticVariables.jl b/src/DiagnosticVariables.jl new file mode 100644 index 00000000..7d74aa8d --- /dev/null +++ b/src/DiagnosticVariables.jl @@ -0,0 +1,163 @@ +module DiagnosticVariables + +import ..Callbacks: AbstractSchedule, long_name + +""" + DiagnosticVariable + +A recipe to compute a diagnostic variable from the state, along with some useful metadata. + +The primary use for `DiagnosticVariable`s is to be embedded in a `ScheduledDiagnostic` to +compute diagnostics while the simulation is running. + +The metadata is used exclusively by the `output_writer` in the `ScheduledDiagnostic`. It is +responsibility of the `output_writer` to follow the conventions about the meaning of the +metadata and their use. + +In `ClimaAtmos`, we roughly follow the naming conventions listed in this file: +https://airtable.com/appYNLuWqAgzLbhSq/shrKcLEdssxb8Yvcp/tblL7dJkC3vl5zQLb + +Keyword arguments +================= + +- `compute!`: Function that compute the diagnostic variable from the state. It has to take + two arguments: the `integrator`, and a pre-allocated area of memory where to + write the result of the computation. It the no pre-allocated area is + available, a new one will be allocated. To avoid extra allocations, this + function should perform the calculation in-place (i.e., using `.=`). + +- `short_name`: Name used to identify the variable in the output files and in the file + names. Short but descriptive. `ClimaAtmos` follows the CMIP conventions and + the diagnostics are identified by the short name. + +- `long_name`: Name used to describe the variable in the output files. + +- `standard_name`: Standard name, as in + http://cfconventions.org/Data/cf-standard-names/71/build/cf-standard-name-table.html + +- `units`: Physical units of the variable. + +- `comments`: More verbose explanation of what the variable is, or comments related to how + it is defined or computed. +""" +struct DiagnosticVariable{T <: Function} + compute!::T + short_name::String + long_name::String + standard_name::String + units::String + comments::String +end + + +function DiagnosticVariable(; + compute!, + short_name::String = "", + long_name::String = "", + standard_name::String = "", + units::String = "", + comments::String = "", +) + DiagnosticVariable{typeof(compute!)}( + compute!, + short_name, + long_name, + standard_name, + units, + comments, + ) +end + +""" + short_name(dv::DiagnosticVariable) + +Return the short name associated to the given `DiagnosticVariable`. +""" +function short_name(dv::DiagnosticVariable) + return dv.short_name +end + +""" + average_pre_output_hook! + +Function to use as `pre_output_hook!` for a `ScheduledDiagnostic` to compute an arithmetic average. +""" +function average_pre_output_hook!(accum, counter) + @. accum = accum / counter + return nothing +end + +""" + descriptive_short_name(variable::DiagnosticVariable, + output_schedule_func, + reduction_time_func, + pre_output_hook!) + +Return a compact, unique-ish, identifier generated from the given information. This function +is useful for filenames and error messages. +""" +function descriptive_short_name( + variable::DiagnosticVariable, + output_schedule_func, + reduction_time_func, + pre_output_hook!; +) + var = "$(variable.short_name)" + isa_reduction = !isnothing(reduction_time_func) + + if isa_reduction + red = "$(reduction_time_func)" + + # Let's check if we are computing the average. Note that this might slip under the + # radar if the user passes their own pre_output_hook!. + if reduction_time_func == (+) && + nameof(pre_output_hook!) == :average_pre_output_hook! + red = "average" + end + suffix = "$red" + else + suffix = "inst" + end + return "$(var)_$(output_schedule_func)_$(suffix)" +end + +""" + descriptive_long_name(variable::DiagnosticVariable, + output_every, + reduction_time_func, + pre_output_hook!) + +Return a verbose description of the given output variable. + +This function is useful for attributes in output files. +""" +function descriptive_long_name( + variable::DiagnosticVariable, + output_schedule_func, + reduction_time_func, + pre_output_hook!; +) + var = "$(variable.long_name)" + isa_reduction = !isnothing(reduction_time_func) + + if isa_reduction + red = "$(reduction_time_func)" + + # Let's check if we are computing the average. Note that this might slip under the + # radar if the user passes their own pre_output_hook!. + if reduction_time_func == (+) && + pre_output_hook! == average_pre_output_hook! + red = "average" + end + + if output_schedule_func isa AbstractSchedule + suffix = "$(red) within $(long_name(output_schedule_func))" + else + suffix = red + end + else + suffix = "Instantaneous" + end + return "$(var), $(suffix)" +end +end diff --git a/src/Diagnostics.jl b/src/Diagnostics.jl deleted file mode 100644 index 0b464e2b..00000000 --- a/src/Diagnostics.jl +++ /dev/null @@ -1,711 +0,0 @@ -# Diagnostics.jl -# -# This file contains: -# -# - The definition of what a DiagnosticVariable is. Conceptually, a DiagnosticVariable is a -# variable we know how to compute from the state. We attach more information to it for -# documentation and to reference to it with its short name. DiagnosticVariables can exist -# irrespective of the existence of an actual simulation that is being run. Science -# packages are encourage to define their set of pre-made `DiagnosticVariables`, for -# example, ClimaAtmos comes with several diagnostics already defined (in the -# `ALL_DIAGNOSTICS` dictionary). -# -# - A dictionary `ALL_DIAGNOSTICS` with all the diagnostics we know how to compute, keyed -# over their short name. If you want to add more diagnostics, look at the included files. -# You can add your own file if you want to define several new diagnostics that are -# conceptually related. The dictionary `ALL_DIAGNOSTICS` should be considered an -# implementation detail. -# -# - The definition of what a ScheduledDiagnostics is. Conceptually, a ScheduledDiagnostics is a -# DiagnosticVariable we want to compute in a given simulation. For example, it could be -# the temperature averaged over a day. We can have multiple ScheduledDiagnostics for the -# same DiagnosticVariable (e.g., daily and monthly average temperatures). -# -# We provide two types of ScheduledDiagnostics: ScheduledDiagnosticIterations and -# ScheduledDiagnosticTime, with the difference being only in what domain the recurrence -# time is defined (are we doing something at every N timesteps or every T seconds?). It is -# much cleaner and simpler to work with ScheduledDiagnosticIterations because iterations -# are well defined and consistent. On the other hand, working in the time domain requires -# dealing with what happens when the timestep is not lined up with the output period. -# Possible solutions to this problem include: uneven output, interpolation, or restricting -# the user from picking specific combinations of timestep/output period. In the current -# implementation, we choose the third option. So, ScheduledDiagnosticTime is provided -# because it is the physically interesting quantity. If we know what is the timestep, we -# can convert between the two and check if the diagnostics are well-posed in terms of the -# relationship between the periods and the timesteps. In some sense, you can think of -# ScheduledDiagnosticIterations as an internal representation and ScheduledDiagnosticTime -# as the external interface. -# -# - A function to convert a list of ScheduledDiagnosticIterations into a list of -# AtmosCallbacks. This function takes three arguments: the list of diagnostics and two -# dictionaries that map each scheduled diagnostic to an area of memory where to save the -# result and where to keep track of how many times the function was called (so that we -# can compute stuff like averages). - -module Diagnostics - -import ClimaCore: Spaces -import ..Callbacks: Callback -import ..Writers: write_field! - -""" - DiagnosticVariable - -A recipe to compute a diagnostic variable from the state, along with some useful metadata. - -The primary use for `DiagnosticVariable`s is to be embedded in a `ScheduledDiagnostic` to -compute diagnostics while the simulation is running. - -The metadata is used exclusively by the `output_writer` in the `ScheduledDiagnostic`. It is -responsibility of the `output_writer` to follow the conventions about the meaning of the -metadata and their use. - -In `ClimaAtmos`, we roughly follow the naming conventions listed in this file: -https://airtable.com/appYNLuWqAgzLbhSq/shrKcLEdssxb8Yvcp/tblL7dJkC3vl5zQLb - -Keyword arguments -================= - -- `short_name`: Name used to identify the variable in the output files and in the file - names. Short but descriptive. `ClimaAtmos` follows the CMIP conventions and - the diagnostics are identified by the short name. - -- `long_name`: Name used to describe the variable in the output files. - -- `standard_name`: Standard name, as in - http://cfconventions.org/Data/cf-standard-names/71/build/cf-standard-name-table.html - -- `units`: Physical units of the variable. - -- `comments`: More verbose explanation of what the variable is, or comments related to how - it is defined or computed. - -- `compute!`: Function that compute the diagnostic variable from the state. - It has to take two arguments: the `integrator`, and a - pre-allocated area of memory where to write the result of the - computation. It the no pre-allocated area is available, a new - one will be allocated. To avoid extra allocations, this - function should perform the calculation in-place (i.e., using - `.=`). -""" -Base.@kwdef struct DiagnosticVariable{T <: Function} - short_name::String - long_name::String - standard_name::String - units::String - comments::String - compute!::T -end - -# Helper functions -function average_pre_output_hook!(accum, counter) - @. accum = accum / counter - return nothing -end - -include("diagnostics_utils.jl") - -# ScheduledDiagnostics - -# NOTE: The definitions of ScheduledDiagnosticTime and ScheduledDiagnosticIterations are -# nearly identical except for the fact that one is assumed to use units of seconds the other -# units of integration steps. However, we allow for this little repetition of code to avoid -# adding an extra layer of abstraction just to deal with these two objects (some people say -# that "duplication is better than over-abstraction"). Most users will only work with -# ScheduledDiagnosticTime. (It would be nice to have defaults fields in abstract types, as -# proposed in 2013 in https://github.com/JuliaLang/julia/issues/4935) Having two distinct -# types allow us to implement different checks and behaviors (e.g., we allow -# ScheduledDiagnosticTime to have placeholders values for {compute, output}_every so that we -# can plug the timestep in it). - -# Design note: pre_output_hook! -# -# One of our key requirements is to be able to compute arithmetic averages. Unfortunately, -# computing an arithmetic average requires keeping track of how many elements we are summing -# up. pre_output_hook! was introduced so that we can think of an average as a sum coupled -# with division, and perform the division (by the number of elements) before output. -# pre_output_hook! could be used for other operations, but we decided to keep it simple and -# target directly the most important use case for us. -# -# This choice restricts what reductions can be performed. For example, it is not possible to -# have a geometric average. If more complex reduction are needed, this mechanism has to be -# changed. - -struct ScheduledDiagnosticIterations{T1, T2, OW, F1, F2, PO, T} - variable::DiagnosticVariable{T} - output_every::T1 - output_writer::OW - reduction_time_func::F1 - reduction_space_func::F2 - compute_every::T2 - pre_output_hook!::PO - output_short_name::String - output_long_name::String - - """ - ScheduledDiagnosticIterations(; variable::DiagnosticVariable, - output_schedule_func, - output_writer, - reduction_time_func = nothing, - reduction_space_func = nothing, - compute_schedule_func = isa_reduction ? 1 : output_every, - pre_output_hook! = nothing, - output_short_name = descriptive_short_name(self), - output_short_name = descriptive_long_name(self)) - - - A `DiagnosticVariable` that has to be computed and output during a simulation with a cadence - defined by the number of iterations, with an optional reduction applied to it (e.g., compute - the maximum temperature over the course of every 10 timesteps). This object is turned into - two callbacks (one for computing and the other for output) and executed by the integrator. - - Keyword arguments - ================= - - - `variable`: The diagnostic variable that has to be computed and output. - - - `output_every`: Save the results to disk every `output_every` iterations. If `output_every` - is non-positive, only output at the first time step. - - - `output_writer`: Function that controls out to save the computed diagnostic variable to - disk. `output_writer` has to take three arguments: the value that has to - be output, the `ScheduledDiagnostic`, and the integrator. Internally, the - integrator contains extra information (such as the current timestep). It - is responsibility of the `output_writer` to properly use the provided - information for meaningful output. - - - `reduction_time_func`: If not `nothing`, this `ScheduledDiagnostic` receives an area of - scratch space `acc` where to accumulate partial results. Then, at - every `compute_every`, `reduction_time_func` is computed between - the previously stored value in `acc` and the new value. This - implements a running reduction. For example, if - `reduction_time_func = max`, the space `acc` will hold the running - maxima of the diagnostic. To implement operations like the - arithmetic average, the `reduction_time_func` has to be chosen as - `sum`, and a `pre_output_hook!` that renormalize `acc` by the - number of samples has to be provided. For custom reductions, it is - necessary to also specify the identity of operation by defining a - new method to `identity_of_reduction`. - - - `reduction_space_func`: NOT IMPLEMENTED YET - - - `compute_every`: Run the computations every `compute_every` iterations. This is not - particularly useful for point-wise diagnostics, where we enforce that - `compute_every` = `output_every`. For time reductions, `compute_every` is - set to 1 (compute at every timestep) by default. `compute_every` has to - evenly divide `output_every`. - - - `pre_output_hook!`: Function that has to be run before saving to disk for reductions - (mostly used to implement averages). The function `pre_output_hook!` - is called with two arguments: the value accumulated during the - reduction, and the number of times the diagnostic was computed from - the last time it was output. `pre_output_hook!` should mutate the - accumulator in place. The return value of `pre_output_hook!` is - discarded. An example of `pre_output_hook!` to compute the arithmetic - average is `pre_output_hook!(acc, N) = @. acc = acc / N`. - - - `output_short_name`: A descriptive name for this particular diagnostic. If none is - provided, one will be generated mixing the short name of the - variable, the reduction, and the period of the reduction. - Normally, it has to be unique. In `ClimaAtmos`, we follow the CMIP - conventions for this. - - - `output_long_name`: A descriptive name for this particular diagnostic. If none is - provided, one will be generated mixing the short name of the - variable, the reduction, and the period of the reduction. - - """ - function ScheduledDiagnosticIterations(; - variable::DiagnosticVariable{T}, - output_every, - output_writer, - reduction_time_func = nothing, - reduction_space_func = nothing, - compute_every = isnothing(reduction_time_func) ? output_every : 1, - pre_output_hook! = nothing, - output_short_name = descriptive_short_name( - variable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = false, - ), - output_long_name = descriptive_long_name( - variable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = false, - ), - ) where {T} - - # We provide an inner constructor to enforce some constraints - - (output_every <= 0 || output_every % compute_every == 0) || error( - "output_every ($output_every) should be multiple of compute_every ($compute_every) for diagnostic $(output_short_name)", - ) - - isa_reduction = !isnothing(reduction_time_func) - - # If it is not a reduction, we compute only when we output - if !isa_reduction && compute_every != output_every - @warn "output_every ($output_every) != compute_every ($compute_every) for $(output_short_name), changing compute_every to match" - compute_every = output_every - end - - # pre_output_hook! has to be a function, but it is much more intuitive to specify - # `nothing` when we want nothing to happen. Here, we convert the nothing keyword - # into a function that does nothing - if isnothing(pre_output_hook!) - pre_output_hook! = (accum, count) -> nothing - end - - T1 = typeof(output_every) - T2 = typeof(compute_every) - OW = typeof(output_writer) - F1 = typeof(reduction_time_func) - F2 = typeof(reduction_space_func) - PO = typeof(pre_output_hook!) - - new{T1, T2, OW, F1, F2, PO, T}( - variable, - output_every, - output_writer, - reduction_time_func, - reduction_space_func, - compute_every, - pre_output_hook!, - output_short_name, - output_long_name, - ) - end -end - - -struct ScheduledDiagnosticTime{T1, T2, OW, F1, F2, PO} - variable::DiagnosticVariable - output_every::T1 - output_writer::OW - reduction_time_func::F1 - reduction_space_func::F2 - compute_every::T2 - pre_output_hook!::PO - output_short_name::String - output_long_name::String - - """ - ScheduledDiagnosticTime(; variable::DiagnosticVariable, - output_every, - output_writer, - reduction_time_func = nothing, - reduction_space_func = nothing, - compute_every = isa_reduction ? :timestep : output_every, - pre_output_hook! = nothing, - output_short_name = descriptive_short_name(self), - output_long_name = descriptive_long_name(self), - ) - - - A `DiagnosticVariable` that has to be computed and output during a simulation with a - cadence defined by how many seconds in simulation time, with an optional reduction - applied to it (e.g., compute the maximum temperature over the course of every day). This - object is turned into a `ScheduledDiagnosticIterations`, which is turned into two - callbacks (one for computing and the other for output) and executed by the integrator. - - Keyword arguments - ================= - - - `variable`: The diagnostic variable that has to be computed and output. - - - `output_every`: Save the results to disk every `output_every` seconds. If `output_every` - is non-positive, only output at the first time step. - - - `output_writer`: Function that controls out to save the computed diagnostic variable to - disk. `output_writer` has to take three arguments: the value that has to - be output, the `ScheduledDiagnostic`, and the integrator. Internally, the - integrator contains extra information (such as the current timestep). It - is responsibility of the `output_writer` to properly use the provided - information for meaningful output. - - - `reduction_time_func`: If not `nothing`, this `ScheduledDiagnostic` receives an area of - scratch space `acc` where to accumulate partial results. Then, at - every `compute_every`, `reduction_time_func` is computed between - the previously stored value in `acc` and the new value. This - implements a running reduction. For example, if - `reduction_time_func = max`, the space `acc` will hold the running - maxima of the diagnostic. To implement operations like the - arithmetic average, the `reduction_time_func` has to be chosen as - `sum`, and a `pre_output_hook!` that renormalize `acc` by the - number of samples has to be provided. For custom reductions, it is - necessary to also specify the identity of operation by defining a - new method to `identity_of_reduction`. - - - `reduction_space_func`: NOT IMPLEMENTED YET - - - `compute_every`: Run the computations every `compute_every` seconds. This is not - particularly useful for point-wise diagnostics, where we enforce that - `compute_every` = `output_every`. For time reductions, - `compute_every` is set to `:timestep` (compute at every timestep) by - default. `compute_every` has to evenly divide `output_every`. - `compute_every` can take the special symbol `:timestep` which is a - placeholder for the timestep of the simulation to which this - `ScheduledDiagnostic` is attached. - - - `pre_output_hook!`: Function that has to be run before saving to disk for reductions - (mostly used to implement averages). The function `pre_output_hook!` - is called with two arguments: the value accumulated during the - reduction, and the number of times the diagnostic was computed from - the last time it was output. `pre_output_hook!` should mutate the - accumulator in place. The return value of `pre_output_hook!` is - discarded. An example of `pre_output_hook!` to compute the arithmetic - average is `pre_output_hook!(acc, N) = @. acc = acc / N`. - - - `output_short_name`: A descriptive name for this particular diagnostic. If none is - provided, one will be generated mixing the short name of the - variable, the reduction, and the period of the reduction. - Normally, it has to be unique. In `ClimaAtmos`, we follow the CMIP - conventions for this. - - - `output_long_name`: A descriptive name for this particular diagnostic. If none is - provided, one will be generated mixing the short name of the - variable, the reduction, and the period of the reduction. - """ - function ScheduledDiagnosticTime(; - variable::DiagnosticVariable, - output_every, - output_writer, - reduction_time_func = nothing, - reduction_space_func = nothing, - compute_every = isnothing(reduction_time_func) ? output_every : - :timestep, - pre_output_hook! = nothing, - output_short_name = descriptive_short_name( - variable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true, - ), - output_long_name = descriptive_long_name( - variable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true, - ), - ) - - # We provide an inner constructor to enforce some constraints - - # compute_every could be a Symbol (:timestep). We process this that when we process - # the list of diagnostics - if !isa(compute_every, Symbol) - (output_every <= 0 || output_every % compute_every == 0) || error( - "output_every ($output_every) should be multiple of compute_every ($compute_every) for diagnostic $(output_short_name)", - ) - end - - isa_reduction = !isnothing(reduction_time_func) - - # If it is not a reduction, we compute only when we output - if !isa_reduction && compute_every != output_every - @warn "output_every ($output_every) != compute_every ($compute_every) for $(output_short_name), changing compute_every to match" - compute_every = output_every - end - - # pre_output_hook! has to be a function, but it is much more intuitive to specify - # `nothing` when we want nothing to happen. Here, we convert the nothing keyword - # into a function that does nothing - if isnothing(pre_output_hook!) - pre_output_hook! = (accum, count) -> nothing - end - - T1 = typeof(output_every) - T2 = typeof(compute_every) - OW = typeof(output_writer) - F1 = typeof(reduction_time_func) - F2 = typeof(reduction_space_func) - PO = typeof(pre_output_hook!) - - new{T1, T2, OW, F1, F2, PO}( - variable, - output_every, - output_writer, - reduction_time_func, - reduction_space_func, - compute_every, - pre_output_hook!, - output_short_name, - output_long_name, - ) - end -end - -""" - ScheduledDiagnosticIterations(sd_time::ScheduledDiagnosticTime, Δt) - - -Create a `ScheduledDiagnosticIterations` given a `ScheduledDiagnosticTime` and a timestep -`Δt`. In this, ensure that `compute_every` and `output_every` are meaningful for the given -timestep. - -""" -function ScheduledDiagnosticIterations( - sd_time::ScheduledDiagnosticTime, - Δt::T, -) where {T} - - # If we have the timestep, we can convert time in seconds into iterations - - # if compute_every is :timestep, then we want to compute after every iterations - compute_every = - sd_time.compute_every == :timestep ? 1 : sd_time.compute_every / Δt - output_every = sd_time.output_every / Δt - - # When Δt is a Float32, loss of precision might lead to spurious results (e.g., 1. / - # 0.1f0 = 9.99999985098839). So, we round to the number of significant digits that we - # expect from the float type. - # - # FIXME: eps(typeof(Δt)) is not the best value to pick the number of significant digits - # because it makes sense only for values of order unity. - sigdigits = eps(typeof(Δt)) |> log10 |> abs |> round |> Int - - output_every = round(output_every; sigdigits) - compute_every = round(compute_every; sigdigits) - - isinteger(output_every) || error( - "output_every ($(sd_time.output_every)) should be multiple of the timestep ($Δt) for diagnostic $(sd_time.output_short_name)", - ) - isinteger(compute_every) || error( - "compute_every ($(sd_time.compute_every)) should be multiple of the timestep ($Δt) for diagnostic $(sd_time.output_short_name)", - ) - - ScheduledDiagnosticIterations(; - sd_time.variable, - output_every = convert(Int, output_every), - sd_time.output_writer, - sd_time.reduction_time_func, - sd_time.reduction_space_func, - compute_every = convert(Int, compute_every), - sd_time.pre_output_hook!, - sd_time.output_short_name, - sd_time.output_long_name, - ) -end - -""" - ScheduledDiagnosticTime(sd_time::ScheduledDiagnosticIterations, Δt) - - -Create a `ScheduledDiagnosticTime` given a `ScheduledDiagnosticIterations` and a timestep -`Δt`. - -""" -function ScheduledDiagnosticTime( - sd_time::ScheduledDiagnosticIterations, - Δt::T, -) where {T} - - # If we have the timestep, we can convert time in iterations to seconds - - # if compute_every is :timestep, then we want to compute after every iterations - compute_every = - sd_time.compute_every == 1 ? :timestep : sd_time.compute_every * Δt - output_every = sd_time.output_every * Δt - - ScheduledDiagnosticTime(; - sd_time.variable, - output_every, - sd_time.output_writer, - sd_time.reduction_time_func, - sd_time.reduction_space_func, - compute_every, - sd_time.pre_output_hook!, - sd_time.output_short_name, - sd_time.output_long_name, - ) -end - -# We provide also a companion constructor for ScheduledDiagnosticIterations which returns -# itself (without copy) when called with a timestep. -# -# This is so that we can assume that -# ScheduledDiagnosticIterations(ScheduledDiagnostic{Time, Iterations}, Δt) -# always returns a valid ScheduledDiagnosticIterations -ScheduledDiagnosticIterations( - sd::ScheduledDiagnosticIterations, - _Δt::T, -) where {T} = sd - - -# We define all the known identities in reduction_identities.jl -include("reduction_identities.jl") - -# Helper functions for the callbacks: -# - reset_accumulator! -# - accumulate! - -# When the reduction is nothing, do nothing -reset_accumulator!(_, reduction_time_func::Nothing) = nothing - -# If we have a reduction, we have to reset the accumulator to its neutral state. (If we -# don't have a reduction, we don't have to do anything) -# -# ClimaAtmos defines methods for identity_of_reduction for standard reduction_time_func in -# reduction_identities.jl -function reset_accumulator!(diag_accumulator, reduction_time_func) - # identity_of_reduction works by dispatching over operation - identity = identity_of_reduction(reduction_time_func) - float_type = Spaces.undertype(axes((diag_accumulator))) - identity_ft = convert(float_type, identity) - fill!(parent(diag_accumulator), identity_ft) - return nothing -end - -# When the reduction is nothing, we do not need to accumulate anything -accumulate!(_, _, reduction_time_func::Nothing) = nothing - -# When we have a reduction, apply it between the accumulated value one -function accumulate!(diag_accumulator, diag_storage, reduction_time_func) - diag_accumulator .= reduction_time_func.(diag_accumulator, diag_storage) - return nothing -end - -function compute_callback!( - integrator, - accumulators, - storage, - diag, - counters, - compute!, -) - compute!(storage, integrator.u, integrator.p, integrator.t) - - # accumulator[diag] is not defined for non-reductions - diag_accumulator = get(accumulators, diag, nothing) - - accumulate!(diag_accumulator, storage, diag.reduction_time_func) - counters[diag] += 1 - return nothing -end - -function output_callback!( - integrator, - accumulators, - storage, - diag, - counters, - output_dir, -) - # Move accumulated value to storage so that we can output it (for - # reductions). This provides a unified interface to pre_output_hook! and - # output, at the cost of an additional copy. If this copy turns out to be - # too expensive, we can move the if statement below. - isnothing(diag.reduction_time_func) || (storage .= accumulators[diag]) - - # Any operations we have to perform before writing to output? - # Here is where we would divide by N to obtain an arithmetic average - diag.pre_output_hook!(storage, counters[diag]) - - # Write to disk - write_field!( - diag.output_writer, - storage, - diag, - integrator.u, - integrator.p, - integrator.t, - output_dir, - ) - - # accumulator[diag] is not defined for non-reductions - diag_accumulator = get(accumulators, diag, nothing) - - reset_accumulator!(diag_accumulator, diag.reduction_time_func) - counters[diag] = 0 - return nothing -end - -function orchestrate_diagnostics(integrator, diagnostics_functions) - for d in diagnostics_functions - n = d.schedule - if n > 0 && integrator.step % n == 0 - d.callback_func(integrator) - end - end - return nothing -end - - -""" - get_callbacks_from_diagnostics(diagnostics, storage, counters) - -Translate a list of diagnostics into a list of callbacks. - -Positional arguments -===================== - -- `diagnostics`: List of `ScheduledDiagnosticIterations` that have to be converted to - callbacks. We want to have `ScheduledDiagnosticIterations` here so that we - can define callbacks that occur at the end of every N integration steps. - -- `storage`: Dictionary that maps a given `ScheduledDiagnosticIterations` to a potentially - pre-allocated area of memory where to save the newly computed results. - -- `accumulator`: Dictionary that maps a given `ScheduledDiagnosticIterations` to a potentially - pre-allocated area of memory where to accumulate results. - -- `counters`: Dictionary that maps a given `ScheduledDiagnosticIterations` to the counter - that tracks how many times the given diagnostics was computed from the last - time it was output to disk. - -""" -function get_callbacks_from_diagnostics( - diagnostics, - storage, - accumulators, - counters, - output_dir, -) - # We have two types of callbacks: to compute and accumulate diagnostics, and to dump - # them to disk. Note that our callbacks do not contain any branching - - # storage is used to pre-allocate memory and to accumulate partial results for those - # diagnostics that perform reductions. - - callback_arrays = map(diagnostics) do diag - compute_callback = - integrator -> begin - compute_callback!( - integrator, - accumulators, - storage[diag], - diag, - counters, - diag.variable.compute!, - ) - end - output_callback = - integrator -> begin - output_callback!( - integrator, - accumulators, - storage[diag], - diag, - counters, - output_dir, - ) - end - [ - Callback(compute_callback, diag.compute_every), - Callback(output_callback, diag.output_every), - ] - end - - # TODO: Add NetCDF sync - - # We need to flatten to tuples - return vcat(callback_arrays...) -end - -end diff --git a/src/ScheduledDiagnostics.jl b/src/ScheduledDiagnostics.jl new file mode 100644 index 00000000..2875ce16 --- /dev/null +++ b/src/ScheduledDiagnostics.jl @@ -0,0 +1,603 @@ +module ScheduledDiagnostics + +import ..AbstractWriter +import ..Callbacks: EveryStepSchedule +import ..DiagnosticVariables: + DiagnosticVariable, descriptive_short_name, descriptive_long_name + +""" + ScheduledDiagnostic + +Conceptually, a ScheduledDiagnostics is a DiagnosticVariable we want to compute in a given +simulation. For example, it could be the temperature averaged over a day. We can have +multiple ScheduledDiagnostics for the same DiagnosticVariable (e.g., daily and monthly +average temperatures). +""" +struct ScheduledDiagnostic{ + T1, + T2, + OW <: AbstractWriter, + F1, + PO, + DV <: DiagnosticVariable, +} + """The `DiagnosticVariable` that has to be computed and output""" + variable::DV + + """A boolean function that determines when this diagnostic should be output. It has to + take one argument, the integrator. Most typically, only `integrator.t` or + `integrator.step` are used. Could be a Callback.AbstractSchedule.""" + output_schedule_func::T1 + + """Struct that controls out to save the computed diagnostic variable to disk. + `output_writer` has to implement a method `write_field!` that takes three arguments: the + value that has to be output, the `ScheduledDiagnostic`, and the integrator. Internally, + the integrator contains extra information (such as the current timestep). It is + responsibility of the `output_writer` to properly use the provided information for + meaningful output.""" + output_writer::OW + + """If not `nothing`, this `ScheduledDiagnostic` receives an area of scratch space `acc` + where to accumulate partial results. Then, ar directed by the `compute_schedule_func`, + `reduction_time_func` is computed between the previously stored value in `acc` and the + new value. This implements a running reduction. For example, if `reduction_time_func = + max`, the space `acc` will hold the running maxima of the diagnostic. To implement + operations like the arithmetic average, the `reduction_time_func` has to be chosen as + `sum`, and a `pre_output_hook!` that renormalizes `acc` by the number of samples has to + be provided. For custom reductions, it is necessary to also specify the identity of + operation by defining a new method to `identity_of_reduction`.""" + reduction_time_func::F1 + + """A boolean function that determines when this diagnostic should be computed. It has to + take one argument, the integrator. Most typically, only `integrator.t` or + `integrator.step` are used. Could be a Callback.AbstractSchedule.""" + compute_schedule_func::T2 + + # Design note: pre_output_hook! + # + # One of our key requirements is to be able to compute arithmetic averages. + # Unfortunately, computing an arithmetic average requires keeping track of how many + # elements we are summing up. pre_output_hook! was introduced so that we can think of an + # average as a sum coupled with division, and perform the division (by the number of + # elements) before output. pre_output_hook! could be used for other operations, but we + # decided to keep it simple and target directly the most important use case for us. + # + # This choice restricts what reductions can be performed. For example, it is not + # possible to have a geometric average. If more complex reduction are needed, this + # mechanism has to be changed. + + """Function that has to be run before saving to disk for reductions (mostly used to + implement averages). The function `pre_output_hook!` is called with two arguments: the value + accumulated during the reduction, and the number of times the diagnostic was computed from + the last time it was output. `pre_output_hook!` should mutate the accumulator in place. The + return value of `pre_output_hook!` is discarded. An example of `pre_output_hook!` to compute + the arithmetic average is `pre_output_hook!(acc, N) = @. acc = acc / N`.""" + pre_output_hook!::PO + + """Short name used to output this ScheduledDiagnostic for file names or datasets.""" + output_short_name::String + + """Descriptive name used to output this ScheduledDiagnostic in metadata or attributes.""" + output_long_name::String +end + +""" + ScheduledDiagnosticIterations(; variable::DiagnosticVariable, + output_schedule_func, + output_writer, + reduction_time_func = nothing, + reduction_space_func = nothing, + compute_schedule_func = isa_reduction ? 1 : output_every, + pre_output_hook! = nothing, + output_short_name = descriptive_short_name(self), + output_short_name = descriptive_long_name(self)) + + + A `DiagnosticVariable` that has to be computed and output during a simulation with a cadence + defined by the number of iterations, with an optional reduction applied to it (e.g., compute + the maximum temperature over the course of every 10 timesteps). This object is turned into + two callbacks (one for computing and the other for output) and executed by the integrator. + + Keyword arguments + ================= + + - `variable`: The `DiagnosticVariable` that has to be computed and output. + + - `output_every`: Save the results to disk every `output_every` iterations. If `output_every` + is non-positive, only output at the first time step. + + - `output_writer`: Function that controls out to save the computed diagnostic variable to + disk. `output_writer` has to take three arguments: the value that has to + be output, the `ScheduledDiagnostic`, and the integrator. Internally, the + integrator contains extra information (such as the current timestep). It + is responsibility of the `output_writer` to properly use the provided + information for meaningful output. + + - `reduction_time_func`: If not `nothing`, this `ScheduledDiagnostic` receives an area of + scratch space `acc` where to accumulate partial results. Then, at + every `compute_every`, `reduction_time_func` is computed between + the previously stored value in `acc` and the new value. This + implements a running reduction. For example, if + `reduction_time_func = max`, the space `acc` will hold the running + maxima of the diagnostic. To implement operations like the + arithmetic average, the `reduction_time_func` has to be chosen as + `sum`, and a `pre_output_hook!` that renormalize `acc` by the + number of samples has to be provided. For custom reductions, it is + necessary to also specify the identity of operation by defining a + new method to `identity_of_reduction`. + + - `reduction_space_func`: NOT IMPLEMENTED YET + + - `compute_every`: Run the computations every `compute_every` iterations. This is not + particularly useful for point-wise diagnostics, where we enforce that + `compute_every` = `output_every`. For time reductions, `compute_every` is + set to 1 (compute at every timestep) by default. `compute_every` has to + evenly divide `output_every`. + + - `pre_output_hook!`: Function that has to be run before saving to disk for reductions + (mostly used to implement averages). The function `pre_output_hook!` + is called with two arguments: the value accumulated during the + reduction, and the number of times the diagnostic was computed from + the last time it was output. `pre_output_hook!` should mutate the + accumulator in place. The return value of `pre_output_hook!` is + discarded. An example of `pre_output_hook!` to compute the arithmetic + average is `pre_output_hook!(acc, N) = @. acc = acc / N`. + +- `output_short_name`: A descriptive name for this particular diagnostic. If none is + provided, one will be generated mixing the short name of the + variable, the reduction, and the period of the reduction. + Normally, it has to be unique. In `ClimaAtmos`, we follow the CMIP + conventions for this. + +- `output_long_name`: A descriptive name for this particular diagnostic. If none is + provided, one will be generated mixing the short name of the + variable, the reduction, and the period of the reduction. + + """ +function ScheduledDiagnostic(; + variable::DiagnosticVariable, + output_writer, + reduction_time_func = nothing, + compute_schedule_func = EveryStepSchedule(), + output_schedule_func = isnothing(reduction_time_func) ? + compute_schedule_func : EveryStepSchedule(), + pre_output_hook! = (accum, count) -> nothing, + output_short_name = descriptive_short_name( + variable, + output_schedule_func, + reduction_time_func, + pre_output_hook!, + ), + output_long_name = descriptive_long_name( + variable, + output_schedule_func, + reduction_time_func, + pre_output_hook!, + ), +) + # pre_output_hook! has to be a function, but it is much more intuitive to specify + # `nothing` when we want nothing to happen. Here, we convert the nothing keyword + # into a function that does nothing + if isnothing(pre_output_hook!) + pre_output_hook! = (accum, count) -> nothing + end + + T = typeof(variable) + T1 = typeof(output_schedule_func) + T2 = typeof(compute_schedule_func) + OW = typeof(output_writer) + F1 = typeof(reduction_time_func) + PO = typeof(pre_output_hook!) + + ScheduledDiagnostic{T1, T2, OW, F1, PO, T}( + variable, + output_schedule_func, + output_writer, + reduction_time_func, + compute_schedule_func, + pre_output_hook!, + output_short_name, + output_long_name, + ) +end + +""" + output_short_name(sd::ScheduledDiagnostic) + +Return the short name to use for output of the `sd` `ScheduledDiagnostic`. +""" +function output_short_name(sd::ScheduledDiagnostic) + return sd.output_short_name +end + +# """ +# ScheduledDiagnosticIterations(; variable::DiagnosticVariable, +# output_schedule_func, +# output_writer, +# reduction_time_func = nothing, +# reduction_space_func = nothing, +# compute_schedule_func = isa_reduction ? 1 : output_every, +# pre_output_hook! = nothing, +# output_short_name = descriptive_short_name(self), +# output_short_name = descriptive_long_name(self)) + + +# A `DiagnosticVariable` that has to be computed and output during a simulation with a cadence +# defined by the number of iterations, with an optional reduction applied to it (e.g., compute +# the maximum temperature over the course of every 10 timesteps). This object is turned into +# two callbacks (one for computing and the other for output) and executed by the integrator. + +# Keyword arguments +# ================= + +# - `variable`: The diagnostic variable that has to be computed and output. + +# - `output_every`: Save the results to disk every `output_every` iterations. If `output_every` +# is non-positive, only output at the first time step. + +# - `output_writer`: Function that controls out to save the computed diagnostic variable to +# disk. `output_writer` has to take three arguments: the value that has to +# be output, the `ScheduledDiagnostic`, and the integrator. Internally, the +# integrator contains extra information (such as the current timestep). It +# is responsibility of the `output_writer` to properly use the provided +# information for meaningful output. + +# - `reduction_time_func`: If not `nothing`, this `ScheduledDiagnostic` receives an area of +# scratch space `acc` where to accumulate partial results. Then, at +# every `compute_every`, `reduction_time_func` is computed between +# the previously stored value in `acc` and the new value. This +# implements a running reduction. For example, if +# `reduction_time_func = max`, the space `acc` will hold the running +# maxima of the diagnostic. To implement operations like the +# arithmetic average, the `reduction_time_func` has to be chosen as +# `sum`, and a `pre_output_hook!` that renormalize `acc` by the +# number of samples has to be provided. For custom reductions, it is +# necessary to also specify the identity of operation by defining a +# new method to `identity_of_reduction`. + +# - `reduction_space_func`: NOT IMPLEMENTED YET + +# - `compute_every`: Run the computations every `compute_every` iterations. This is not +# particularly useful for point-wise diagnostics, where we enforce that +# `compute_every` = `output_every`. For time reductions, `compute_every` is +# set to 1 (compute at every timestep) by default. `compute_every` has to +# evenly divide `output_every`. + +# - `pre_output_hook!`: Function that has to be run before saving to disk for reductions +# (mostly used to implement averages). The function `pre_output_hook!` +# is called with two arguments: the value accumulated during the +# reduction, and the number of times the diagnostic was computed from +# the last time it was output. `pre_output_hook!` should mutate the +# accumulator in place. The return value of `pre_output_hook!` is +# discarded. An example of `pre_output_hook!` to compute the arithmetic +# average is `pre_output_hook!(acc, N) = @. acc = acc / N`. + +# `output_short_name`: A descriptive name for this particular diagnostic. If none is +# provided, one will be generated mixing the short name of the +# variable, the reduction, and the period of the reduction. +# Normally, it has to be unique. In `ClimaAtmos`, we follow the CMIP +# conventions for this. + +# `output_long_name`: A descriptive name for this particular diagnostic. If none is +# provided, one will be generated mixing the short name of the +# variable, the reduction, and the period of the reduction. + +# """ +# function ScheduledDiagnosticIterations(; +# variable::DiagnosticVariable{T}, +# output_every, +# output_writer, +# reduction_time_func = nothing, +# reduction_space_func = nothing, +# compute_every = isnothing(reduction_time_func) ? output_every : 1, +# pre_output_hook! = nothing, +# output_short_name = descriptive_short_name( +# variable, +# output_every, +# reduction_time_func, +# pre_output_hook!; +# ), +# output_long_name = descriptive_long_name( +# variable, +# output_every, +# reduction_time_func, +# pre_output_hook!; +# ), +# ) where {T} + +# # We provide an inner constructor to enforce some constraints + +# (output_every <= 0 || output_every % compute_every == 0) || error( +# "output_every ($output_every) should be multiple of compute_every ($compute_every) for diagnostic $(output_short_name)", +# ) + +# isa_reduction = !isnothing(reduction_time_func) + +# # If it is not a reduction, we compute only when we output +# if !isa_reduction && compute_every != output_every +# @warn "output_every ($output_every) != compute_every ($compute_every) for $(output_short_name), changing compute_every to match" +# compute_every = output_every +# end + +# # pre_output_hook! has to be a function, but it is much more intuitive to specify +# # `nothing` when we want nothing to happen. Here, we convert the nothing keyword +# # into a function that does nothing +# if isnothing(pre_output_hook!) +# pre_output_hook! = (accum, count) -> nothing +# end + +# T1 = typeof(output_every) +# T2 = typeof(compute_every) +# OW = typeof(output_writer) +# F1 = typeof(reduction_time_func) +# F2 = typeof(reduction_space_func) +# PO = typeof(pre_output_hook!) + +# new{T1, T2, OW, F1, F2, PO, T}( +# variable, +# output_every, +# output_writer, +# reduction_time_func, +# reduction_space_func, +# compute_every, +# pre_output_hook!, +# output_short_name, +# output_long_name, +# ) +# end + + + +# struct ScheduledDiagnosticTime{T1, T2, OW, F1, F2, PO} +# variable::DiagnosticVariable +# output_every::T1 +# output_writer::OW +# reduction_time_func::F1 +# reduction_space_func::F2 +# compute_every::T2 +# pre_output_hook!::PO +# output_short_name::String +# output_long_name::String + +# """ +# ScheduledDiagnosticTime(; variable::DiagnosticVariable, +# output_every, +# output_writer, +# reduction_time_func = nothing, +# reduction_space_func = nothing, +# compute_every = isa_reduction ? :timestep : output_every, +# pre_output_hook! = nothing, +# output_short_name = descriptive_short_name(self), +# output_long_name = descriptive_long_name(self), +# ) + + +# A `DiagnosticVariable` that has to be computed and output during a simulation with a +# cadence defined by how many seconds in simulation time, with an optional reduction +# applied to it (e.g., compute the maximum temperature over the course of every day). This +# object is turned into a `ScheduledDiagnosticIterations`, which is turned into two +# callbacks (one for computing and the other for output) and executed by the integrator. + +# Keyword arguments +# ================= + +# - `variable`: The diagnostic variable that has to be computed and output. + +# - `output_every`: Save the results to disk every `output_every` seconds. If `output_every` +# is non-positive, only output at the first time step. + +# - `output_writer`: Function that controls out to save the computed diagnostic variable to +# disk. `output_writer` has to take three arguments: the value that has to +# be output, the `ScheduledDiagnostic`, and the integrator. Internally, the +# integrator contains extra information (such as the current timestep). It +# is responsibility of the `output_writer` to properly use the provided +# information for meaningful output. + +# - `reduction_time_func`: If not `nothing`, this `ScheduledDiagnostic` receives an area of +# scratch space `acc` where to accumulate partial results. Then, at +# every `compute_every`, `reduction_time_func` is computed between +# the previously stored value in `acc` and the new value. This +# implements a running reduction. For example, if +# `reduction_time_func = max`, the space `acc` will hold the running +# maxima of the diagnostic. To implement operations like the +# arithmetic average, the `reduction_time_func` has to be chosen as +# `sum`, and a `pre_output_hook!` that renormalize `acc` by the +# number of samples has to be provided. For custom reductions, it is +# necessary to also specify the identity of operation by defining a +# new method to `identity_of_reduction`. + +# - `reduction_space_func`: NOT IMPLEMENTED YET + +# - `compute_every`: Run the computations every `compute_every` seconds. This is not +# particularly useful for point-wise diagnostics, where we enforce that +# `compute_every` = `output_every`. For time reductions, +# `compute_every` is set to `:timestep` (compute at every timestep) by +# default. `compute_every` has to evenly divide `output_every`. +# `compute_every` can take the special symbol `:timestep` which is a +# placeholder for the timestep of the simulation to which this +# `ScheduledDiagnostic` is attached. + +# - `pre_output_hook!`: Function that has to be run before saving to disk for reductions +# (mostly used to implement averages). The function `pre_output_hook!` +# is called with two arguments: the value accumulated during the +# reduction, and the number of times the diagnostic was computed from +# the last time it was output. `pre_output_hook!` should mutate the +# accumulator in place. The return value of `pre_output_hook!` is +# discarded. An example of `pre_output_hook!` to compute the arithmetic +# average is `pre_output_hook!(acc, N) = @. acc = acc / N`. + +# - `output_short_name`: A descriptive name for this particular diagnostic. If none is +# provided, one will be generated mixing the short name of the +# variable, the reduction, and the period of the reduction. +# Normally, it has to be unique. In `ClimaAtmos`, we follow the CMIP +# conventions for this. + +# - `output_long_name`: A descriptive name for this particular diagnostic. If none is +# provided, one will be generated mixing the short name of the +# variable, the reduction, and the period of the reduction. +# """ +# function ScheduledDiagnosticTime(; +# variable::DiagnosticVariable, +# output_every, +# output_writer, +# reduction_time_func = nothing, +# reduction_space_func = nothing, +# compute_every = isnothing(reduction_time_func) ? output_every : +# :timestep, +# pre_output_hook! = nothing, +# output_short_name = descriptive_short_name( +# variable, +# output_every, +# reduction_time_func, +# pre_output_hook!; +# ), +# output_long_name = descriptive_long_name( +# variable, +# output_every, +# reduction_time_func, +# pre_output_hook!; +# ), +# ) + +# # We provide an inner constructor to enforce some constraints + +# # compute_every could be a Symbol (:timestep). We process this that when we process +# # the list of diagnostics +# if !isa(compute_every, Symbol) +# (output_every <= 0 || output_every % compute_every == 0) || error( +# "output_every ($output_every) should be multiple of compute_every ($compute_every) for diagnostic $(output_short_name)", +# ) +# end + +# isa_reduction = !isnothing(reduction_time_func) + +# # If it is not a reduction, we compute only when we output +# if !isa_reduction && compute_every != output_every +# @warn "output_every ($output_every) != compute_every ($compute_every) for $(output_short_name), changing compute_every to match" +# compute_every = output_every +# end + +# # pre_output_hook! has to be a function, but it is much more intuitive to specify +# # `nothing` when we want nothing to happen. Here, we convert the nothing keyword +# # into a function that does nothing +# if isnothing(pre_output_hook!) +# pre_output_hook! = (accum, count) -> nothing +# end + +# T1 = typeof(output_every) +# T2 = typeof(compute_every) +# OW = typeof(output_writer) +# F1 = typeof(reduction_time_func) +# F2 = typeof(reduction_space_func) +# PO = typeof(pre_output_hook!) + +# new{T1, T2, OW, F1, F2, PO}( +# variable, +# output_every, +# output_writer, +# reduction_time_func, +# reduction_space_func, +# compute_every, +# pre_output_hook!, +# output_short_name, +# output_long_name, +# ) +# end +# end + +# """ +# ScheduledDiagnosticIterations(sd_time::ScheduledDiagnosticTime, Δt) + + +# Create a `ScheduledDiagnosticIterations` given a `ScheduledDiagnosticTime` and a timestep +# `Δt`. In this, ensure that `compute_every` and `output_every` are meaningful for the given +# timestep. + +# """ +# function ScheduledDiagnosticIterations( +# sd_time::ScheduledDiagnosticTime, +# Δt::T, +# ) where {T} + +# # If we have the timestep, we can convert time in seconds into iterations + +# # if compute_every is :timestep, then we want to compute after every iterations +# compute_every = +# sd_time.compute_every == :timestep ? 1 : sd_time.compute_every / Δt +# output_every = sd_time.output_every / Δt + +# # When Δt is a Float32, loss of precision might lead to spurious results (e.g., 1. / +# # 0.1f0 = 9.99999985098839). So, we round to the number of significant digits that we +# # expect from the float type. +# # +# # FIXME: eps(typeof(Δt)) is not the best value to pick the number of significant digits +# # because it makes sense only for values of order unity. +# sigdigits = eps(typeof(Δt)) |> log10 |> abs |> round |> Int + +# output_every = round(output_every; sigdigits) +# compute_every = round(compute_every; sigdigits) + +# isinteger(output_every) || error( +# "output_every ($(sd_time.output_every)) should be multiple of the timestep ($Δt) for diagnostic $(sd_time.output_short_name)", +# ) +# isinteger(compute_every) || error( +# "compute_every ($(sd_time.compute_every)) should be multiple of the timestep ($Δt) for diagnostic $(sd_time.output_short_name)", +# ) + +# ScheduledDiagnosticIterations(; +# sd_time.variable, +# output_every = convert(Int, output_every), +# sd_time.output_writer, +# sd_time.reduction_time_func, +# sd_time.reduction_space_func, +# compute_every = convert(Int, compute_every), +# sd_time.pre_output_hook!, +# sd_time.output_short_name, +# sd_time.output_long_name, +# ) +# end + +# """ +# ScheduledDiagnosticTime(sd_time::ScheduledDiagnosticIterations, Δt) + + +# Create a `ScheduledDiagnosticTime` given a `ScheduledDiagnosticIterations` and a timestep +# `Δt`. + +# """ +# function ScheduledDiagnosticTime( +# sd_time::ScheduledDiagnosticIterations, +# Δt::T, +# ) where {T} + +# # If we have the timestep, we can convert time in iterations to seconds + +# # if compute_every is :timestep, then we want to compute after every iterations +# compute_every = +# sd_time.compute_every == 1 ? :timestep : sd_time.compute_every * Δt +# output_every = sd_time.output_every * Δt + +# ScheduledDiagnosticTime(; +# sd_time.variable, +# output_every, +# sd_time.output_writer, +# sd_time.reduction_time_func, +# sd_time.reduction_space_func, +# compute_every, +# sd_time.pre_output_hook!, +# sd_time.output_short_name, +# sd_time.output_long_name, +# ) +# end + +# # We provide also a companion constructor for ScheduledDiagnosticIterations which returns +# # itself (without copy) when called with a timestep. +# # +# # This is so that we can assume that +# # ScheduledDiagnosticIterations(ScheduledDiagnostic{Time, Iterations}, Δt) +# # always returns a valid ScheduledDiagnosticIterations +# ScheduledDiagnosticIterations( +# sd::ScheduledDiagnosticIterations, +# _Δt::T, +# ) where {T} = sd +end diff --git a/src/Writers.jl b/src/Writers.jl index f3d98938..a07d11ca 100644 --- a/src/Writers.jl +++ b/src/Writers.jl @@ -4,6 +4,10 @@ module Writers +import ..AbstractWriter, ..ScheduledDiagnostic +import ..ScheduledDiagnostics: output_short_name + +include("dict_writer.jl") include("hdf5_writer.jl") include("netcdf_writer.jl") diff --git a/src/clima_diagnostics.jl b/src/clima_diagnostics.jl new file mode 100644 index 00000000..3d2c7665 --- /dev/null +++ b/src/clima_diagnostics.jl @@ -0,0 +1,279 @@ + +# This file contains: +# +# - The definition of what a DiagnosticVariable is. Conceptually, a DiagnosticVariable is a +# variable we know how to compute from the state. We attach more information to it for +# documentation and to reference to it with its short name. DiagnosticVariables can exist +# irrespective of the existence of an actual simulation that is being run. Science +# packages are encourage to define their set of pre-made `DiagnosticVariables`, for +# example, ClimaAtmos comes with several diagnostics already defined (in the +# `ALL_DIAGNOSTICS` dictionary). +# +# - A dictionary `ALL_DIAGNOSTICS` with all the diagnostics we know how to compute, keyed +# over their short name. If you want to add more diagnostics, look at the included files. +# You can add your own file if you want to define several new diagnostics that are +# conceptually related. The dictionary `ALL_DIAGNOSTICS` should be considered an +# implementation detail. +# +# - The definition of what a ScheduledDiagnostics is. Conceptually, a ScheduledDiagnostics is a +# DiagnosticVariable we want to compute in a given simulation. For example, it could be +# the temperature averaged over a day. We can have multiple ScheduledDiagnostics for the +# same DiagnosticVariable (e.g., daily and monthly average temperatures). +# +# We provide two types of ScheduledDiagnostics: ScheduledDiagnosticIterations and +# ScheduledDiagnosticTime, with the difference being only in what domain the recurrence +# time is defined (are we doing something at every N timesteps or every T seconds?). It is +# much cleaner and simpler to work with ScheduledDiagnosticIterations because iterations +# are well defined and consistent. On the other hand, working in the time domain requires +# dealing with what happens when the timestep is not lined up with the output period. +# Possible solutions to this problem include: uneven output, interpolation, or restricting +# the user from picking specific combinations of timestep/output period. In the current +# implementation, we choose the third option. So, ScheduledDiagnosticTime is provided +# because it is the physically interesting quantity. If we know what is the timestep, we +# can convert between the two and check if the diagnostics are well-posed in terms of the +# relationship between the periods and the timesteps. In some sense, you can think of +# ScheduledDiagnosticIterations as an internal representation and ScheduledDiagnosticTime +# as the external interface. +# +# - A function to convert a list of ScheduledDiagnosticIterations into a list of +# AtmosCallbacks. This function takes three arguments: the list of diagnostics and two +# dictionaries that map each scheduled diagnostic to an area of memory where to save the +# result and where to keep track of how many times the function was called (so that we +# can compute stuff like averages). + +import .Callbacks: + Callback, CallbackOrchestrator, DivisorSchedule, EveryDtSchedule +import .Writers: write_field!, AbstractWriter + +# We define all the known identities in reduction_identities.jl +include("reduction_identities.jl") + +""" + reset_accumulator!(accumulated_value, reduction_time_func) + +Reset the `accumulated_value` to the identity of `reduction_time_func`. + +It requires that a method for `identity_of_reduction` is defined for `reduction_time_func`. +These functions are defined in `reduction_identities.jl`. + +Users can define their own by providing a method to `Diagnostics.identity_of_reduction` +""" +function reset_accumulator!(accumulated_value, reduction_time_func) + # identity_of_reduction works by dispatching over operation. + # The function is defined in reduction_identities.jl + identity = identity_of_reduction(reduction_time_func) + fill!(parent(accumulated_value), identity) + return nothing +end + +# When the reduction is nothing, do nothing +reset_accumulator!(_, reduction_time_func::Nothing) = nothing + +""" + accumulate!(accumulated_value, latest_computed_value, reduction_time_func) + +Apply the binary `reduction_time_func` to `accumulated_value` and `latest_computed_value` +and store the output to `accumulated_value`. + +For example, if `reduction_time_func = max`, this computes the max between the previous max +and the newly computed value and stores it to `accumulated_value` (so that it can be used in +the next iteration). +""" +function accumulate!( + accumulated_value, + latest_computed_value, + reduction_time_func, +) + accumulated_value .= + reduction_time_func.(accumulated_value, latest_computed_value) + return nothing +end + +# When the reduction is nothing, we do not need to accumulate anything +accumulate!(_, _, reduction_time_func::Nothing) = nothing + + +function compute_callback!( + integrator, + accumulators, + storage, + diag, + counters, + compute!, +) + compute!(storage, integrator.u, integrator.p, integrator.t) + + # accumulator[diag] is not defined for non-reductions + diag_accumulator = get(accumulators, diag, nothing) + + accumulate!(diag_accumulator, storage, diag.reduction_time_func) + counters[diag] += 1 + return nothing +end + +function output_callback!(integrator, accumulators, storage, diag, counters) + # Move accumulated value to storage so that we can output it (for reductions). This + # provides a unified interface to pre_output_hook! and output, at the cost of an + # additional copy. If this copy turns out to be too expensive, we can move the if + # statement below. + isnothing(diag.reduction_time_func) || (storage .= accumulators[diag]) + + # Any operations we have to perform before writing to output? Here is where we would + # divide by N to obtain an arithmetic average + diag.pre_output_hook!(storage, counters[diag]) + + # Write to disk + write_field!( + diag.output_writer, + storage, + diag, + integrator.u, + integrator.p, + integrator.t, + ) + + # accumulator[diag] is not defined for non-reductions, in which case we return `nothing` + # The dispatch in reset_accumulator! knows how to handle this + diag_accumulator = get(accumulators, diag, nothing) + + # If we have a reduction, we have to reset the accumulator to its neutral state. (If we + # don't have a reduction, we don't have to do anything, but this is handled by dispatch.) + reset_accumulator!(diag_accumulator, diag.reduction_time_func) + + # Reset counter too + counters[diag] = 0 + return nothing +end + +struct DiagnosticsHandler{SD, STORAGE <: Dict, ACC <: Dict, COUNT <: Dict} + """An iterable with the `ScheduledDiagnostic`s that are scheduled.""" + scheduled_diagnostics::SD + + """Dictionary that maps a given `ScheduledDiagnostic` to a potentially pre-allocated + area of memory where to save the newly computed results.""" + storage::STORAGE + + """Dictionary that maps a given `ScheduledDiagnostic` to a potentially pre-allocated + area of memory where to accumulate results.""" + accumulators::ACC + + """Dictionary that maps a given `ScheduledDiagnosticIterations` to the counter that + tracks how many times the given diagnostics was computed from the last time it was + output to disk.""" + counters::COUNT +end + +""" + +The `DiagnosticsHandler` initializes the diagnostics by calling the `compute!` +function. + +Note: initializing a `DiagnosticsHandler` can be expensive! + +Keyword arguments +=================== + +`dt`, if passed, is used for error checking, to ensure that the diagnostics defined as given +a given period are integer multiples of the timestep. +""" +function DiagnosticsHandler(scheduled_diagnostics, Y, p, t; dt = nothing) + + # For diagnostics that perform reductions, the storage is used for the values computed + # at each call. Reductions also save the accumulated value in accumulators. + storage = Dict() + accumulators = Dict() + counters = Dict() + + for diag in scheduled_diagnostics + if isnothing(dt) + @warn "dt was not passed to DiagnosticsHandler. No checks will be performed on the frequency of the diagnostics" + else + if diag.compute_schedule_func isa EveryDtSchedule + compute_dt = diag.compute_schedule_func.dt + every_num_iteration = compute_dt / dt + every_num_iteration ≈ round(every_num_iteration) || error( + "Compute dt ($compute_dt) for $(diag.output_short_name) is not an even multiple of the timestep ($dt)", + ) + end + if diag.output_schedule_func isa EveryDtSchedule + output_dt = diag.output_schedule_func.dt + every_num_iteration = output_dt / dt + every_num_iteration ≈ round(every_num_iteration) || error( + "Output dt ($output_dt) for $(diag.output_short_name) is not an even multiple of the timestep ($dt)", + ) + end + end + + variable = diag.variable + isa_time_reduction = !isnothing(diag.reduction_time_func) + + # The first time we call compute! we use its return value. All the subsequent times + # (in the callbacks), we will write the result in place + # TODO: Use lazy broadcasted expressions here + storage[diag] = variable.compute!(nothing, Y, p, t) + counters[diag] = 1 + + # If it is not a reduction, call the output writer as well + if !isa_time_reduction + write_field!(diag.output_writer, storage[diag], diag, Y, p, t) + else + # Add to the accumulator + + # We use similar + .= instead of copy because CUDA 5.2 does not supported nested + # wrappers with view(reshape(view)) objects. See discussion in + # https://github.com/CliMA/ClimaAtmos.jl/pull/2579 and + # https://github.com/JuliaGPU/Adapt.jl/issues/21 + accumulators[diag] = similar(storage[diag]) + accumulators[diag] .= storage[diag] + end + end + + return DiagnosticsHandler( + scheduled_diagnostics, + storage, + accumulators, + counters, + ) +end + + +""" + DiagnosticsCallback(diagnostics_handler::DiagnosticsHandler) + +Translate a `DiagnosticsHandler` into a SciML callback ready to be used. +""" +function DiagnosticsCallback(diagnostics_handler::DiagnosticsHandler) + # TODO: We have two types of callbacks: to compute and accumulate diagnostics, and to + # dump them to disk. At the moment, they all end up in the same place, but we might want + # to keep them separate + + callback_arrays = map(diagnostics_handler.scheduled_diagnostics) do diag + compute_callback = + integrator -> begin + compute_callback!( + integrator, + diagnostics_handler.accumulators, + diagnostics_handler.storage[diag], + diag, + diagnostics_handler.counters, + diag.variable.compute!, + ) + end + output_callback = + integrator -> begin + output_callback!( + integrator, + diagnostics_handler.accumulators, + diagnostics_handler.storage[diag], + diag, + diagnostics_handler.counters, + ) + end + [ + Callback(compute_callback, diag.compute_schedule_func), + Callback(output_callback, diag.output_schedule_func), + ] + end + + return CallbackOrchestrator(vcat(callback_arrays...)) +end diff --git a/src/diagnostics_utils.jl b/src/diagnostics_utils.jl deleted file mode 100644 index dd3805a0..00000000 --- a/src/diagnostics_utils.jl +++ /dev/null @@ -1,128 +0,0 @@ -# diagnostic_utils.jl -# -# This file contains: -# - descriptive_short_name: to condense ScheduledDiagnostic information into few characters. -# - descriptive_long_name: to produce full names that are clearly human-understandable - -""" - descriptive_short_name(variable::DiagnosticVariable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true) - - -Return a compact, unique-ish, identifier generated from the given information. - -`output_every` is interpreted as in seconds if `units_are_seconds` is `true`. Otherwise, it -is interpreted as in units of number of iterations. - -This function is useful for filenames and error messages. -""" -function descriptive_short_name( - variable::DiagnosticVariable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true, -) - var = "$(variable.short_name)" - isa_reduction = !isnothing(reduction_time_func) - - if isa_reduction - red = "$(reduction_time_func)" - - # Let's check if we are computing the average. Note that this might slip under the - # radar if the user passes their own pre_output_hook!. - if reduction_time_func == (+) && - nameof(pre_output_hook!) == :average_pre_output_hook! - red = "average" - end - - if units_are_seconds - - # Convert period from seconds to days, hours, minutes, seconds - period = "" - - days, rem_seconds = divrem(output_every, 24 * 60 * 60) - hours, rem_seconds = divrem(rem_seconds, 60 * 60) - minutes, seconds = divrem(rem_seconds, 60) - - # At this point, days, hours, minutes, seconds have to be integers. - # Let us force them to be such so that we can have a consistent string output. - - days, hours, minutes, seconds = - map(Int, (days, hours, minutes, seconds)) - - days > 0 && (period *= "$(days)d_") - hours > 0 && (period *= "$(hours)h_") - minutes > 0 && (period *= "$(minutes)m_") - seconds > 0 && (period *= "$(seconds)s_") - - suffix = period * red - else - suffix = "$(output_every)it_$(red)" - end - else - suffix = "inst" - end - return "$(var)_$(suffix)" -end - -""" - descriptive_long_name(variable::DiagnosticVariable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true) - -Return a verbose description of the given output variable. - -`output_every` is interpreted as in seconds if `units_are_seconds` is `true`. Otherwise, it -is interpreted as in units of number of iterations. - -This function is useful for attributes in output files. -""" -function descriptive_long_name( - variable::DiagnosticVariable, - output_every, - reduction_time_func, - pre_output_hook!; - units_are_seconds = true, -) - var = "$(variable.long_name)" - isa_reduction = !isnothing(reduction_time_func) - - if isa_reduction - red = "$(reduction_time_func)" - - # Let's check if we are computing the average. Note that this might slip under the - # radar if the user passes their own pre_output_hook!. - if reduction_time_func == (+) && - pre_output_hook! == average_pre_output_hook! - red = "average" - end - - if units_are_seconds - # Convert period from seconds to days, hours, minutes, seconds - period = "" - - days, rem_seconds = divrem(output_every, 24 * 60 * 60) - hours, rem_seconds = divrem(rem_seconds, 60 * 60) - minutes, seconds = divrem(rem_seconds, 60) - - days > 0 && (period *= "$(days) Day(s)") - hours > 0 && (period *= "$(hours) Hour(s)") - minutes > 0 && (period *= "$(minutes) Minute(s)") - seconds > 0 && (period *= "$(seconds) Second(s)") - - period_str = period - else - period_str = "$(output_every) Iterations" - end - suffix = "$(red) within $(period_str)" - else - suffix = "Instantaneous" - end - return "$(var), $(suffix)" -end diff --git a/src/dict_writer.jl b/src/dict_writer.jl new file mode 100644 index 00000000..90082f2a --- /dev/null +++ b/src/dict_writer.jl @@ -0,0 +1,28 @@ +"""The `DictWriter` is a writer that does not write to disk, but to memory (in a +dictionary). + +This is particularly useful for testing and debugging. + +This is not type stable (the underlying dictionary does not know in advance what types might +be used) +""" +struct DictWriter{T} <: AbstractWriter + dict::T +end + +function DictWriter() + return DictWriter(Dict()) +end + +function write_field!(writer::DictWriter, field, diagnostic, u, p, t) + key_name = + diagnostic isa ScheduledDiagnostic ? output_short_name(diagnostic) : + diagnostic + diagnostic_dict = get!(writer.dict, key_name, Dict()) + diagnostic_dict[t] = copy(field) + return nothing +end + +function Base.getindex(writer::DictWriter, key) + return Base.getindex(writer.dict, key) +end diff --git a/src/hdf5_writer.jl b/src/hdf5_writer.jl index 879ac323..607e0dcf 100644 --- a/src/hdf5_writer.jl +++ b/src/hdf5_writer.jl @@ -1,18 +1,12 @@ import ClimaComms - -############## -# HDF5Writer # -############## +import ClimaCore.InputOutput """ HDF5Writer() - Save a `ScheduledDiagnostic` to a HDF5 file inside the `output_dir` of the simulation. - -TODO: This is a very barebone HDF5Writer. Do not consider this implementation as the "final -word". +TODO: This is a very barebone HDF5Writer! We need to implement the following features/options: - Toggle for write new files/append @@ -27,8 +21,9 @@ We need to implement the following features/options: - ...more features/options """ -struct HDF5Writer end - +struct HDF5Writer <: AbstractWriter + output_dir::String +end """ close(writer::HDF5Writer) @@ -37,20 +32,14 @@ Close all the files open in `writer`. (Currently no-op.) """ Base.close(writer::HDF5Writer) = nothing -function write_field!( - writer::HDF5Writer, - field, - diagnostic, - u, - p, - t, - output_dir, -) +function write_field!(writer::HDF5Writer, field, diagnostic, u, p, t) var = diagnostic.variable time = t - output_path = - joinpath(output_dir, "$(diagnostic.output_short_name)_$(time).h5") + output_path = joinpath( + writer.output_dir, + "$(diagnostic.output_short_name)_$(time).h5", + ) comms_ctx = ClimaComms.context(u.c) hdfwriter = InputOutput.HDF5Writer(output_path, comms_ctx) diff --git a/src/netcdf_writer.jl b/src/netcdf_writer.jl index 9052cdab..d8e9eb24 100644 --- a/src/netcdf_writer.jl +++ b/src/netcdf_writer.jl @@ -20,9 +20,6 @@ import ClimaCore.Remapping: Remapper, interpolate, interpolate! import NCDatasets -################### -# NetCDFWriter # -################## """ add_dimension!(nc::NCDatasets.NCDataset, name::String, @@ -464,7 +461,10 @@ and `domain` (e.g., `ClimaCore.Geometry.LatLongPoint`s). """ function hcoords_from_horizontal_space(space, domain, hpts) end -struct NetCDFWriter{T, TS, DI} +struct NetCDFWriter{T, TS, DI} <: AbstractWriter + + # The base folder where to save the files. + output_dir::String # TODO: At the moment, each variable gets its remapper. This is a little bit of a waste # because we probably only need a handful of remappers since the same remapper can be @@ -509,8 +509,7 @@ Base.close(writer::NetCDFWriter) = map(NCDatasets.close, values(writer.open_files)) """ - NetCDFWriter() - + NetCDFWriter(output_dir) Save a `ScheduledDiagnostic` to a NetCDF file inside the `output_dir` of the simulation by performing a pointwise (non-conservative) remapping first. @@ -519,6 +518,7 @@ Keyword arguments ================== - `cspace`: Center space of fields. +- `output_dir`: The base folder where the files should be saved. - `num_points`: How many points to use along the different dimensions to interpolate the fields. This is a tuple of integers, typically having meaning Long-Lat-Z, or X-Y-Z (the details depend on the configuration being simulated). @@ -529,8 +529,9 @@ Keyword arguments is maximum compression). """ -function NetCDFWriter(; +function NetCDFWriter( cspace, + output_dir; num_points = (180, 90, 50), disable_vertical_interpolation = false, compression_level = 0, @@ -584,6 +585,7 @@ function NetCDFWriter(; typeof(interpolated_physical_z), typeof(preallocated_arrays), }( + output_dir, Dict{String, Remapper}(), num_points, compression_level, @@ -681,7 +683,6 @@ function save_diagnostic_to_disk!( u, p, t, - output_dir, ) # Only the root process has to write ClimaComms.iamroot(ClimaComms.context(field)) || return nothing @@ -691,7 +692,7 @@ function save_diagnostic_to_disk!( space = axes(field) FT = Spaces.undertype(space) - output_path = outpath_name(output_dir, diagnostic) + output_path = outpath_name(writer.output_dir, diagnostic) if !haskey(writer.open_files, output_path) # Append or write a new file @@ -751,16 +752,8 @@ function save_diagnostic_to_disk!( return nothing end -function write_field!( - writer::NetCDFWriter, - field, - diagnostic, - u, - p, - t, - output_dir, -) +function write_field!(writer::NetCDFWriter, field, diagnostic, u, p, t) interpolate_field!(writer, field, diagnostic, u, p, t) - save_diagnostic_to_disk!(writer, field, diagnostic, u, p, t, output_dir) + save_diagnostic_to_disk!(writer, field, diagnostic, u, p, t) return nothing end diff --git a/src/utils.jl b/src/utils.jl new file mode 100644 index 00000000..4603158f --- /dev/null +++ b/src/utils.jl @@ -0,0 +1,42 @@ +function seconds_to_str_short(time::Real) + name = "" + days, rem_seconds = divrem(time, 24 * 60 * 60) + hours, rem_seconds = divrem(rem_seconds, 60 * 60) + minutes, seconds = divrem(rem_seconds, 60) + + # At this point, days, hours, minutes, seconds have to be integers. + # Let us force them to be such so that we can have a consistent string output. + days, hours, minutes = map(Int, (days, hours, minutes)) + + if round(seconds) == seconds + seconds = convert(Int, seconds) + end + + days > 0 && (name *= "$(days)d_") + hours > 0 && (name *= "$(hours)h_") + minutes > 0 && (name *= "$(minutes)m_") + seconds > 0 && (name *= "$(seconds)s_") + return rstrip(name, '_') +end + +function seconds_to_str_long(time::Real) + name = "" + days, rem_seconds = divrem(time, 24 * 60 * 60) + hours, rem_seconds = divrem(rem_seconds, 60 * 60) + minutes, seconds = divrem(rem_seconds, 60) + + # At this point, days, hours, minutes, seconds have to be integers. + # Let us force them to be such so that we can have a consistent string output. + days, hours, minutes = map(Int, (days, hours, minutes)) + + if round(seconds) == seconds + seconds = convert(Int, seconds) + end + + days > 0 && (name *= "$(days) Day(s) ") + hours > 0 && (name *= "$(hours) Hour(s) ") + minutes > 0 && (name *= "$(minutes) Minute(s) ") + seconds > 0 && (name *= "$(seconds) Second(s) ") + + return rstrip(name, ' ') +end diff --git a/test/TestTools.jl b/test/TestTools.jl new file mode 100644 index 00000000..7af15c17 --- /dev/null +++ b/test/TestTools.jl @@ -0,0 +1,51 @@ +import SciMLBase + +import ClimaCore +import ClimaComms +import ClimaTimeSteppers + +function ColumnCenterFiniteDifferenceSpace( + zelem = 10, + context = ClimaComms.SingletonCommsContext(); + FT = Float64, +) + zlim = (FT(0.0), FT(1.0)) + domain = ClimaCore.Domains.IntervalDomain( + ClimaCore.Geometry.ZPoint(zlim[1]), + ClimaCore.Geometry.ZPoint(zlim[2]); + boundary_names = (:bottom, :top), + ) + mesh = ClimaCore.Meshes.IntervalMesh(domain, nelems = zelem) + topology = ClimaCore.Topologies.IntervalTopology(context, mesh) + return ClimaCore.Spaces.CenterFiniteDifferenceSpace(topology) +end + +""" + create_problem_algo() + +An ODE problem for an exponential decay. +""" +function create_problem(; t0 = 0.0, tf = 1.0, dt = 1e-3) + # Let's solve an exponential decay + space = ColumnCenterFiniteDifferenceSpace() + + Y = ClimaCore.Fields.FieldVector(; my_var = ones(space)) + p = (; tau = -0.1) + + function exp_tendency!(dY, Y, p, t) + @. dY.my_var = p.tau * Y.my_var + end + + prob = SciMLBase.ODEProblem( + ClimaTimeSteppers.ClimaODEFunction(T_exp! = exp_tendency!), + Y, + (t0, tf), + p, + ) + algo = ClimaTimeSteppers.ExplicitAlgorithm(ClimaTimeSteppers.RK4()) + + args = prob, algo + kwargs = Dict(:dt => dt) + + return args, kwargs +end diff --git a/test/callback.jl b/test/callback.jl new file mode 100644 index 00000000..2487c79b --- /dev/null +++ b/test/callback.jl @@ -0,0 +1,95 @@ +using Test +import SciMLBase + +import ClimaDiagnostics.Callbacks: + Callback, CallbackOrchestrator, DivisorSchedule, EveryDtSchedule + +include("TestTools.jl") + +@testset "Callback" begin + # Test a callback that is called at every iteration by always returning true + + called = Ref(0) + function callback_func(integrator) + called[] += 1 + end + + scheduled_func = (_integrator) -> true + callback_everystep = Callback(callback_func, scheduled_func) + + t0 = 0.0 + tf = 1.0 + dt = 1e-3 + + args, kwargs = create_problem(; t0, tf, dt) + + expected_called = convert(Int, (tf - t0) / dt) + + cb = CallbackOrchestrator([callback_everystep]) + SciMLBase.solve(args...; kwargs..., callback = cb) + + @test called[] == expected_called +end + +@testset "Schedules" begin + # Test a callback that is called at every other iteration with DivisorSchedule + + called = Ref(0) + function callback_func0(integrator) + called[] += 1 + end + + divisor = 2 + scheduled_func = DivisorSchedule(divisor) + @test "$scheduled_func" == "2it" + + callback_everystep = Callback(callback_func0, scheduled_func) + + t0 = 0.0 + tf = 1.0 + dt = 1e-3 + + args, kwargs = create_problem(; t0, tf, dt) + + expected_called = convert(Int, (tf - t0) / (divisor * dt)) + + cb = CallbackOrchestrator([callback_everystep]) + SciMLBase.solve(args...; kwargs..., callback = cb) + + @test called[] == expected_called + + # EveryDtSchedule + + called = Ref(0) + function callback_func(integrator) + called[] += 1 + end + + called2 = Ref(0) + function callback_func2(integrator) + called2[] += 1 + end + + dt_callback = 0.2 + scheduled_func = EveryDtSchedule(dt_callback) + @test "$scheduled_func" == "0.2s" + + dt_callback2 = 0.3 + t_start2 = 0.1 + scheduled_func2 = EveryDtSchedule(dt_callback2; t_start = t_start2) + + callback_dt = Callback(callback_func, scheduled_func) + callback_dt2 = Callback(callback_func2, scheduled_func2) + + args, kwargs = create_problem(; t0, tf, dt) + + expected_called = convert(Int, (tf - t0) / dt_callback) + expected_called2 = convert(Int, floor((tf - t0 - t_start2) / dt_callback2)) + + cb = CallbackOrchestrator([callback_dt, callback_dt2]) + + SciMLBase.solve(args...; kwargs..., callback = cb) + + @test called[] == expected_called + @test called2[] == expected_called2 +end diff --git a/test/diagnostic_variable.jl b/test/diagnostic_variable.jl new file mode 100644 index 00000000..050b5dd7 --- /dev/null +++ b/test/diagnostic_variable.jl @@ -0,0 +1,25 @@ +using Test +import ClimaDiagnostics.DiagnosticVariables + +@testset "DiagnosticVariable" begin + # First, create a diagnostic variable + + # All the arguments + var = DiagnosticVariables.DiagnosticVariable(; + short_name = "my", + long_name = "My test", + standard_name = "my_test", + units = "m", + comments = "It works!", + compute! = (out, u, p, t) -> 1, + ) + + @test DiagnosticVariables.short_name(var) == "my" + + # The minimum number of arguments required + var = + DiagnosticVariables.DiagnosticVariable(; compute! = (out, u, p, t) -> 1) + + @test DiagnosticVariables.short_name(var) == "" + +end diff --git a/test/diagnostics.jl b/test/diagnostics.jl new file mode 100644 index 00000000..3880fd99 --- /dev/null +++ b/test/diagnostics.jl @@ -0,0 +1,202 @@ +using Test +import ClimaDiagnostics +import ClimaDiagnostics.Callbacks +import ClimaDiagnostics.ScheduledDiagnostics +import ClimaDiagnostics.Writers + +import SciMLBase + +include("TestTools.jl") + +@testset "Utils" begin + @test ClimaDiagnostics.identity_of_reduction(max) == -Inf + @test ClimaDiagnostics.identity_of_reduction(min) == +Inf + @test ClimaDiagnostics.identity_of_reduction(+) == 0 + @test ClimaDiagnostics.identity_of_reduction(*) == 1 + + for FT in (Float32, Float64) + space = ColumnCenterFiniteDifferenceSpace(; FT) + field = FT(10) .* ones(space) + array = FT.(collect(1:10)) + # No time reduction + @test isnothing(ClimaDiagnostics.reset_accumulator!(field, nothing)) + @test isnothing(ClimaDiagnostics.reset_accumulator!(array, nothing)) + # + + ClimaDiagnostics.reset_accumulator!(field, +) + @test extrema(field) == (FT(0), FT(0)) + + ClimaDiagnostics.reset_accumulator!(array, +) + @test extrema(array) == (FT(0), FT(0)) + # * + ClimaDiagnostics.reset_accumulator!(field, *) + @test extrema(field) == (FT(1), FT(1)) + + ClimaDiagnostics.reset_accumulator!(array, *) + @test extrema(array) == (FT(1), FT(1)) + + # Now everything is reset to 1, let's test the accumulator + accumulated_value_field = ones(space) + accumulated_value_array = fill!(similar(array), 1) + + @test isnothing( + ClimaDiagnostics.accumulate!( + accumulated_value_field, + field, + nothing, + ), + ) + @test isnothing( + ClimaDiagnostics.accumulate!( + accumulated_value_array, + array, + nothing, + ), + ) + + ClimaDiagnostics.accumulate!(accumulated_value_field, field, +) + @test extrema(accumulated_value_field) == (FT(2), FT(2)) + + ClimaDiagnostics.accumulate!(accumulated_value_array, array, +) + @test extrema(accumulated_value_array) == (FT(2), FT(2)) + end +end + +@testset "Diagnostics" begin + t0 = 0.0 + tf = 1.0 + dt = 0.1 + + space = ColumnCenterFiniteDifferenceSpace() + Y = ClimaCore.Fields.FieldVector(; my_var = ones(space)) + p = (; tau = -0.1) + + function exp_tendency!(dY, Y, p, t) + @. dY.my_var = p.tau * Y.my_var + end + + prob = SciMLBase.ODEProblem( + ClimaTimeSteppers.ClimaODEFunction(T_exp! = exp_tendency!), + Y, + (t0, tf), + p, + ) + algo = ClimaTimeSteppers.ExplicitAlgorithm(ClimaTimeSteppers.RK4()) + + function computet!(out, u, p, t) + if isnothing(out) + return [t] + else + out .= t + return nothing + end + end + + # A simple diagnostic that just saves the time at every timestep + simple_var = ClimaDiagnostics.DiagnosticVariable(; + compute! = computet!, + short_name = "YO", + long_name = "YO YO", + ) + + dict_writer = Writers.DictWriter() + + # Let's start with a simple diagnostic that is computed and output + # at every time step (the default) + diagnostic_every_step = ClimaDiagnostics.ScheduledDiagnostic( + variable = simple_var, + output_writer = dict_writer, + ) + short_name = ScheduledDiagnostics.output_short_name(diagnostic_every_step) + + diagnostic_handler = ClimaDiagnostics.DiagnosticsHandler( + [diagnostic_every_step], + Y, + p, + t0; + dt, + ) + + diag_cb = ClimaDiagnostics.DiagnosticsCallback(diagnostic_handler) + + prob = SciMLBase.ODEProblem( + ClimaTimeSteppers.ClimaODEFunction(T_exp! = exp_tendency!), + Y, + (t0, tf), + p, + ) + algo = ClimaTimeSteppers.ExplicitAlgorithm(ClimaTimeSteppers.RK4()) + + SciMLBase.solve(prob, algo, dt = dt, callback = diag_cb) + + @test length(keys(dict_writer.dict[short_name])) == + convert(Int, 1 + (tf - t0) / dt) + @test dict_writer[short_name][t0] == [t0] + @test dict_writer[short_name][tf] == [tf] + + # Now test accumulation and average + dict_writer = Writers.DictWriter() + + diagnostic_accumulate_every_step = ClimaDiagnostics.ScheduledDiagnostic( + variable = simple_var, + output_writer = dict_writer, + reduction_time_func = +, + output_schedule_func = Callbacks.DivisorSchedule(5), + ) + short_name = + ScheduledDiagnostics.output_short_name(diagnostic_accumulate_every_step) + + diagnostic_handler = ClimaDiagnostics.DiagnosticsHandler( + [diagnostic_accumulate_every_step], + Y, + p, + t0; + dt, + ) + + diag_cb = ClimaDiagnostics.DiagnosticsCallback(diagnostic_handler) + + prob = SciMLBase.ODEProblem( + ClimaTimeSteppers.ClimaODEFunction(T_exp! = exp_tendency!), + Y, + (t0, tf), + p, + ) + algo = ClimaTimeSteppers.ExplicitAlgorithm(ClimaTimeSteppers.RK4()) + + SciMLBase.solve(prob, algo, dt = dt, callback = diag_cb) + + @test length(keys(dict_writer.dict[short_name])) == + convert(Int, (tf - t0) / 5dt) + + @test dict_writer[short_name][0.5][] ≈ sum(t0:dt:(tf / 2)) + @test dict_writer[short_name][1.0][] ≈ sum((tf / 2 + dt):dt:tf) + + # Incompatible timestep + diagnostic_incompatible_timestep = ClimaDiagnostics.ScheduledDiagnostic( + variable = simple_var, + output_writer = dict_writer, + reduction_time_func = +, + output_schedule_func = Callbacks.EveryDtSchedule(1.5dt), + ) + @test_throws ErrorException ClimaDiagnostics.DiagnosticsHandler( + [diagnostic_accumulate_every_step], + Y, + p, + t0; + dt, + ) + diagnostic_incompatible_timestep = ClimaDiagnostics.ScheduledDiagnostic( + variable = simple_var, + output_writer = dict_writer, + reduction_time_func = +, + compute_schedule_func = Callbacks.EveryDtSchedule(1.5dt), + ) + @test_throws ErrorException ClimaDiagnostics.DiagnosticsHandler( + [diagnostic_accumulate_every_step], + Y, + p, + t0; + dt, + ) + +end diff --git a/test/doctest.jl b/test/doctest.jl new file mode 100644 index 00000000..e27981f0 --- /dev/null +++ b/test/doctest.jl @@ -0,0 +1,6 @@ +using Documenter +import ClimaDiagnostics + +@testset "Test docstrings" begin + doctest(ClimaDiagnostics; manual = false) +end diff --git a/test/runtests.jl b/test/runtests.jl index 9cfe5dd7..b843d460 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,13 @@ using Test #! format: off @safetestset "Aqua" begin @time include("aqua.jl") end @safetestset "Format" begin @time include("format.jl") end +@safetestset "Doctest" begin @time include("doctest.jl") end + +@safetestset "Writers" begin @time include("writers.jl") end + +@safetestset "Callbacks and Schdules" begin @time include("callback.jl") end +@safetestset "DiagnosticVariable" begin @time include("diagnostic_variable.jl") end +@safetestset "SchduledDiagnostics" begin @time include("schduled_diagnostics.jl") end #! format: on return nothing diff --git a/test/writers.jl b/test/writers.jl new file mode 100644 index 00000000..d1499a05 --- /dev/null +++ b/test/writers.jl @@ -0,0 +1,15 @@ +using Test + +import ClimaDiagnostics.Writers + +@testset "DictWriter" begin + writer = Writers.DictWriter() + + # Test with some strings and floats instead of actual Fields and ScheduledDiagnostics + Writers.write_field!(writer, 10.0, "mytest", nothing, nothing, 0.0) + @test writer.dict["mytest"][0.0] == 10.0 + Writers.write_field!(writer, 20.0, "mytest", nothing, nothing, 2.0) + @test writer.dict["mytest"][2.0] == 20.0 + Writers.write_field!(writer, 50.0, "mytest2", nothing, nothing, 8.0) + @test writer.dict["mytest2"][8.0] == 50.0 +end