diff --git a/payu/branch.py b/payu/branch.py index 09f7d845..dd0c51b6 100644 --- a/payu/branch.py +++ b/payu/branch.py @@ -13,7 +13,7 @@ from typing import Optional import shutil -from ruamel.yaml import YAML, CommentedMap +from ruamel.yaml import YAML, CommentedMap, constructor import git from payu.fsops import read_config, DEFAULT_CONFIG_FNAME, list_archive_dirs @@ -67,8 +67,18 @@ def add_restart_to_config(restart_path: Path, config_path: Path) -> None: restart in archive""" # Default ruamel yaml preserves comments and multiline strings - yaml = YAML() - config = yaml.load(config_path) + try: + yaml = YAML() + config = yaml.load(config_path) + except constructor.DuplicateKeyError as e: + # PyYaml which is used to read config allows duplicate keys + # These will get removed when the configuration file is modified + warnings.warn( + "Removing any subsequent duplicate keys from config.yaml" + ) + yaml = YAML() + yaml.allow_duplicate_keys = True + config = yaml.load(config_path) # Add in restart path config['restart'] = str(restart_path) diff --git a/payu/cli.py b/payu/cli.py index 0a6348a3..87afc4c9 100644 --- a/payu/cli.py +++ b/payu/cli.py @@ -15,6 +15,7 @@ import shlex import subprocess import sys +import warnings import payu import payu.envmod as envmod @@ -23,10 +24,17 @@ from payu.schedulers import index as scheduler_index import payu.subcommands - # Default configuration DEFAULT_CONFIG = 'config.yaml' +# Force warnings.warn() to omit the source code line in the message +formatwarning_orig = warnings.formatwarning +warnings.formatwarning = ( + lambda message, category, filename, lineno, line=None: ( + formatwarning_orig(message, category, filename, lineno, line='') + ) +) + def parse(): """Parse the command line inputs and execute the subcommand.""" diff --git a/payu/fsops.py b/payu/fsops.py index ac91d82c..6560eade 100644 --- a/payu/fsops.py +++ b/payu/fsops.py @@ -17,6 +17,7 @@ import shlex import subprocess from typing import Union, List +import warnings # Extensions import yaml @@ -62,6 +63,27 @@ def movetree(src, dst, symlinks=False): shutil.rmtree(src) +class DuplicateKeyWarnLoader(yaml.SafeLoader): + def construct_mapping(self, node, deep=False): + """Add warning for duplicate keys in yaml file, as currently + PyYAML overwrites duplicate keys even though in YAML, keys + are meant to be unique + """ + mapping = {} + for key_node, value_node in node.value: + key = self.construct_object(key_node, deep=deep) + value = self.construct_object(value_node, deep=deep) + if key in mapping: + warnings.warn( + "Duplicate key found in config.yaml: " + f"key '{key}' with value '{value}'. " + f"This overwrites the original value: '{mapping[key]}'" + ) + mapping[key] = value + + return super().construct_mapping(node, deep) + + def read_config(config_fname=None): """Parse input configuration file and return a config dict.""" @@ -70,7 +92,7 @@ def read_config(config_fname=None): try: with open(config_fname, 'r') as config_file: - config = yaml.safe_load(config_file) + config = yaml.load(config_file, Loader=DuplicateKeyWarnLoader) # NOTE: A YAML file with no content returns `None` if config is None: diff --git a/test/test_payu.py b/test/test_payu.py index cf180875..7ab218cf 100644 --- a/test/test_payu.py +++ b/test/test_payu.py @@ -1,6 +1,7 @@ from io import StringIO import os from pathlib import Path +import payu.branch import pytest import shutil import stat @@ -432,3 +433,34 @@ def test_run_userscript_command(tmp_path): ]) def test_needs_shell(command, expected): assert payu.fsops.needs_subprocess_shell(command) == expected + + +def test_read_config_yaml_duplicate_key(): + """The PyYAML library is used for reading config.yaml, but use ruamel yaml + is used in when modifying config.yaml as part of payu checkout + (ruamel is used to preserve comments and multi-line strings). + This led to bug #441, where pyyaml allowed duplicate keys but + ruamel.library raises an error + """ + # Create a yaml file with a duplicate key + config_content = """ +pbs_flags: value1 +pbs_flags: value2 +""" + config_path = tmpdir / "config.yaml" + with config_path.open("w") as file: + file.write(config_content) + + # Test read config passes without an error but a warning is raised + warn_msg = "Duplicate key found in config.yaml: key 'pbs_flags' with " + warn_msg += "value 'value2'. This overwrites the original value: 'value1'" + with pytest.warns(UserWarning, match=warn_msg): + payu.fsops.read_config(config_path) + + restart_path = tmpdir / "restarts" + + # Test add restart to config.yaml does not fail with an error, but + # still raises another warning that duplicates keys will be deleted + warn_msg = "Removing any subsequent duplicate keys from config.yaml" + with pytest.warns(UserWarning, match=warn_msg): + payu.branch.add_restart_to_config(restart_path, config_path)