Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
jfrost-mo committed Aug 1, 2024
1 parent 7d2ab11 commit dbdb05e
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 100 deletions.
64 changes: 17 additions & 47 deletions cset-workflow/flow.cylc
Original file line number Diff line number Diff line change
Expand Up @@ -4,46 +4,30 @@ title = CSET
description = Workflow for running CSET.
URL = https://metoffice.github.io/CSET

[scheduler]
UTC mode = True

[scheduling]
runahead limit = P{{CSET_RUNAHEAD_LIMIT}}
initial cycle point = {{CSET_INITIAL_CYCLE_POINT}}
final cycle point = {{CSET_FINAL_CYCLE_POINT}}
initial cycle point = 1000-01-01

[[graph]]
# Only runs on the first cycle.
R1/^ = """
build_conda => install_website_skeleton
build_conda => install_local_cset
install_website_skeleton & install_local_cset => FETCH_DATA
"""

# Only runs on the final cycle.
R1/$ = """
process_finish => COLLATE:succeed-all =>
finish_website => send_email => housekeeping_full
build_conda => install_website_skeleton
install_local_cset & install_website_skeleton => setup_complete
"""

# Runs every cycle to process the data in parallel.
{{CSET_CYCLE_PERIOD}} = """
install_website_skeleton[^] & install_local_cset[^] =>
FETCH_DATA:succeed-all => PARALLEL:succeed-all =>
process_finish => housekeeping_raw

# Intercycle dependence with this task ensures the collate step waits for
# the required data.
process_finish[-{{CSET_CYCLE_PERIOD}}] => process_finish
# Runs for every forecast initiation time to process the data in parallel.
{% for date in CSET_CASE_DATES %}
R1/{{date}} = """
setup_complete[^] => FETCH_DATA:succeed-all => PROCESS:finish-all => process_finish => housekeeping_raw
"""
{% endfor %}

{% if CSET_INCREMENTAL_OUTPUT %}
# Runs every so often to update output plots during runtime.
{{CSET_INCREMENTAL_OUTPUT_PERIOD}} = """
COLLATE[-{{CSET_INCREMENTAL_OUTPUT_PERIOD}}]:finish-all &
process_finish => COLLATE
# Only runs on the final cycle.
R1/$ = """
PROCESS:finish-all => finish_website => send_email
PROCESS:finish-all => housekeeping_full
"""
{% endif %}

[runtime]
[[root]]
Expand Down Expand Up @@ -75,15 +59,8 @@ URL = https://metoffice.github.io/CSET
WEB_DIR = {{WEB_DIR}}
COLORBAR_FILE = {{COLORBAR_FILE}}

[[PARALLEL]]
script = rose task-run -v --app-key=run_cset_recipe
[[[environment]]]
CSET_BAKE_MODE = parallel

[[COLLATE]]
[[PROCESS]]
script = rose task-run -v --app-key=run_cset_recipe
[[[environment]]]
CSET_BAKE_MODE = collate

[[FETCH_DATA]]

Expand All @@ -96,20 +73,13 @@ URL = https://metoffice.github.io/CSET
ROSE_APP_OPT_CONF_KEYS = {{METPLUS_OPT_CONFIG_KEYS}}
{% endif %}

# Dummy tasks needed for workflow scheduling.
[[process_finish]]
script = true
platform = localhost

[[dummy_collate]]
inherit = COLLATE
[[DUMMY_TASK]]
script = true
platform = localhost
execution time limit = PT1M

[[dummy_parallel]]
inherit = PARALLEL
script = true
platform = localhost
[[setup_complete]]
inherit = DUMMY_TASK

[[build_conda]]
# Create the conda environment if it does not yet exist.
Expand Down
8 changes: 1 addition & 7 deletions cset-workflow/includes/plot_spatial_surface_model_field.cylc
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,7 @@
{% for model_field in SURFACE_MODEL_FIELDS %}
[runtime]
[[generic_spatial_plot_time_series_{{model_field}}]]
inherit = PARALLEL
[[[environment]]]
CSET_RECIPE_NAME = "generic_surface_spatial_plot_sequence.yaml"
CSET_ADDOPTS = "--VARNAME={{model_field}}"

[[generic_spatial_plot_time_series_collation_{{model_field}}]]
inherit = COLLATE
inherit = PROCESS
[[[environment]]]
CSET_RECIPE_NAME = "generic_surface_spatial_plot_sequence.yaml"
CSET_ADDOPTS = "--VARNAME={{model_field}}"
Expand Down
59 changes: 13 additions & 46 deletions cset-workflow/meta/rose-meta.conf
Original file line number Diff line number Diff line change
Expand Up @@ -204,58 +204,25 @@ compulsory=true
# Data and Cycling
################################################################################

[template variables=CSET_INITIAL_CYCLE_POINT]
[template variables=CSET_CASE_DATES]
ns=Data and Cycling
description=Datetime of initial cycle point.
help=This should be an ISO 8601 datetime string indicating when you want the
cycling component of CSET to begin. E.g. 2000-01-01T00:00Z. Ensure that it
is consistent with your data's starting validity time. If not then a warning
that cubes can not be loaded is raised.
type=quoted
description=List of datetimes of cases.
help=This should be a python list of ISO 8601 datetime strings indicating the
forecast initiation time (AKA data time) of the data. E.g. 2000-01-01T00:00Z.
Ensure that it is consistent with your data's first validity time. If not
then a warning that cubes can not be loaded is raised.
type=python_list
compulsory=true
sort-key=cycle1

[template variables=CSET_FINAL_CYCLE_POINT]
ns=Data and Cycling
description=Datetime of final cycle point.
help=This should be an ISO 8601 datetime string indicating when you want the
cycling component of CSET to finish. E.g. 2000-01-01T00:00Z
type=quoted
compulsory=true
sort-key=cycle2

[template variables=CSET_CYCLE_PERIOD]
ns=Data and Cycling
description=Period between each output.
help=ISO 8601 duration for the length of the data in each cycle. For 3-hourly
data this would be PT3H. For hourly data choose PT1H.
type=quoted
compulsory=true

[template variables=CSET_INCREMENTAL_OUTPUT]
[template variables=CSET_CASE_PATHS]
ns=Data and Cycling
description=Write partial output before cycle end.
help=Whether to write partial output plots before the end of the workflow.
Enabling this will slow the workflow, but allows for inspecting output before
the workflow is complete.

Only recommended for long running (multi-hour+) workflows.
type=python_boolean
trigger=template variables=CSET_INCREMENTAL_OUTPUT_PERIOD: True;
description=Lists of data paths of cases.
help=Python list of data locations for the case data as strings. The order should
match that of CSET_CASE_DATES.
type=python lists
compulsory=true
sort-key=incr_out1

[template variables=CSET_INCREMENTAL_OUTPUT_PERIOD]
ns=Data and Cycling
description=Period of updating output plots.
help=ISO 8601 duration for the length of time between updating the output plots.
Smaller values will frequently update the output plots, but use more compute.

To avoid races, the total length of the cycles must be an integer multiple
of the CSET_CYCLE_PERIOD.
type=quoted
compulsory=true
sort-key=incr_out2
sort-key=cycle1

# Input data settings.
[template variables=FETCH_FCST_OPT_CONF]
Expand Down

0 comments on commit dbdb05e

Please sign in to comment.