Add validation of config options for all drivers

This still supports unsetting the options when providing an empty string. Additionally, it raises ValueError when we are not able to set the option when starting simulation. We expose queue_bool_options, queue_memory_options, queue_positive_int_options, queue_positive_number_options, queue_string_options and queue_memory_options from ert.config. NB: when DEBUG_OUTPUT is deprecated it needs to be removed from queue_options.
equinor · Nov 2, 2023 · 33da07f · 33da07f
1 parent 4f2fdcc
commit 33da07f
Show file tree

Hide file tree

Showing 7 changed files with 196 additions and 121 deletions.
diff --git a/src/ert/config/__init__.py b/src/ert/config/__init__.py
@@ -17,7 +17,14 @@
 from .observations import EnkfObs
 from .parameter_config import ParameterConfig
 from .parsing import ConfigValidationError, ConfigWarning
-from .queue_config import QueueConfig
+from .queue_config import (
+    QueueConfig,
+    queue_bool_options,
+    queue_memory_options,
+    queue_positive_int_options,
+    queue_positive_number_options,
+    queue_string_options,
+)
 from .queue_system import QueueSystem
 from .response_config import ResponseConfig
 from .summary_config import SummaryConfig
@@ -62,4 +69,9 @@
     "WorkflowJob",
     "field_transform",
     "get_mode_variables",
+    "queue_bool_options",
+    "queue_memory_options",
+    "queue_positive_int_options",
+    "queue_positive_number_options",
+    "queue_string_options",
 ]
diff --git a/src/ert/config/queue_config.py b/src/ert/config/queue_config.py
@@ -5,11 +5,11 @@
 import shutil
 from collections import defaultdict
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Tuple, no_type_check
+from typing import Any, Dict, List, Mapping, Tuple, no_type_check
 
 from ert import _clib
 
-from .parsing import ConfigDict, ConfigValidationError, ErrorInfo
+from .parsing import ConfigDict, ConfigValidationError
 from .queue_system import QueueSystem
 
 GENERIC_QUEUE_OPTIONS: List[str] = ["MAX_RUNNING"]
@@ -31,32 +31,6 @@ class QueueConfig:
         default_factory=dict
     )
 
-    def __post_init__(self) -> None:
-        errors = []
-        for _, value in [
-            setting
-            for settings in self.queue_options.values()
-            for setting in settings
-            if setting[0] == "MAX_RUNNING" and setting[1]
-        ]:
-            err_msg = "QUEUE_OPTION MAX_RUNNING is"
-            try:
-                int_val = int(value)
-                if int_val < 0:
-                    errors.append(
-                        ErrorInfo(f"{err_msg} negative: {str(value)!r}").set_context(
-                            value
-                        )
-                    )
-            except ValueError:
-                errors.append(
-                    ErrorInfo(f"{err_msg} not an integer: {str(value)!r}").set_context(
-                        value
-                    )
-                )
-        if errors:
-            raise ConfigValidationError.from_collected(errors)
-
     @no_type_check
     @classmethod
     def from_dict(cls, config_dict: ConfigDict) -> QueueConfig:
@@ -95,11 +69,11 @@ def from_dict(cls, config_dict: ConfigDict) -> QueueConfig:
                     " usually provided by the site-configuration file, beware that"
                     " you are effectively replacing the default value provided."
                 )
-        if (
-            selected_queue_system == QueueSystem.TORQUE
-            and queue_options[QueueSystem.TORQUE]
-        ):
-            _validate_torque_options(queue_options[QueueSystem.TORQUE])
+
+        if queue_options[selected_queue_system]:
+            _validate_queue_driver_settings(
+                queue_options[selected_queue_system], selected_queue_system.name
+            )
 
         if (
             selected_queue_system != QueueSystem.LOCAL
@@ -139,17 +113,137 @@ def generate_dict(option_list: List[Tuple[str, str]]) -> Dict[str, List[str]]:
             )
 
 
-def _validate_torque_options(torque_options: List[Tuple[str, str]]) -> None:
-    for option_strings in torque_options:
+queue_memory_options: Mapping[str, List[str]] = {
+    "LSF": [],
+    "SLURM": [],
+    "TORQUE": ["MEMORY_PER_JOB"],
+    "LOCAL": [],
+}
+
+queue_string_options: Mapping[str, List[str]] = {
+    "LSF": [
+        "LSF_RESOURCE",
+        "LSF_SERVER",
+        "LSF_QUEUE",
+        "LSF_LOGIN_SHELL",
+        "LSF_RSH_CMD",
+        "BSUB_CMD",
+        "BJOBS_CMD",
+        "BKILL_CMD",
+        "BHIST_CMD",
+        "EXCLUDE_HOST",
+        "PROJECT_CODE",
+    ],
+    "SLURM": [
+        "SBATCH",
+        "SCANCEL",
+        "SCONTROL",
+        "SQUEUE",
+        "PARTITION",
+        "INCLUDE_HOST",
+        "EXCLUDE_HOST",
+    ],
+    "TORQUE": [
+        "QSUB_CMD",
+        "QSTAT_CMD",
+        "QDEL_CMD",
+        "QSTAT_OPTIONS",
+        "QUEUE",
+        "CLUSTER_LABEL",
+        "JOB_PREFIX",
+        "DEBUG_OUTPUT",
+    ],
+    "LOCAL": [],
+}
+
+queue_positive_int_options: Mapping[str, List[str]] = {
+    "LSF": [
+        "BJOBS_TIMEOUT",
+        "MAX_RUNNING",
+    ],
+    "SLURM": [
+        "MEMORY",
+        "MEMORY_PER_CPU",
+        "MAX_RUNNING",
+    ],
+    "TORQUE": [
+        "NUM_NODES",
+        "NUM_CPUS_PER_NODE",
+        "MAX_RUNNING",
+    ],
+    "LOCAL": ["MAX_RUNNING"],
+}
+
+queue_positive_number_options: Mapping[str, List[str]] = {
+    "LSF": [
+        "SUBMIT_SLEEP",
+    ],
+    "SLURM": [
+        "SQUEUE_TIMEOUT",
+        "MAX_RUNTIME",
+    ],
+    "TORQUE": ["SUBMIT_SLEEP", "QUEUE_QUERY_TIMEOUT"],
+    "LOCAL": [],
+}
+
+queue_bool_options: Mapping[str, List[str]] = {
+    "LSF": ["DEBUG_OUTPUT"],
+    "SLURM": [],
+    "TORQUE": ["KEEP_QSUB_OUTPUT"],
+    "LOCAL": [],
+}
+
+
+def _validate_queue_driver_settings(
+    queue_system_options: List[Tuple[str, str]], queue_type: str
+) -> None:
+    for option_strings in queue_system_options:
         option_name = option_strings[0]
         option_value = option_strings[1]
+
+        if option_value == "":  # This is equivalent to the option not being set
+            continue
         if (
-            option_value != ""  # This is equivalent to the option not being set
-            and option_name == "MEMORY_PER_JOB"
+            option_name in queue_memory_options[queue_type]
             and re.match("[0-9]+[mg]b", option_value) is None
         ):
-            raise ConfigValidationError(
-                f"The value '{option_value}' is not valid for the Torque option "
-                "MEMORY_PER_JOB, it must be of "
-                "the format '<integer>mb' or '<integer>gb'."
+            raise ConfigValidationError.with_context(
+                f"'{option_value}' for {option_name} is not a valid format "
+                "It should be of the format '<integer>mb' or '<integer>gb'.",
+                option_value,
+            )
+        if option_name in queue_string_options[queue_type] and not isinstance(
+            option_value, str
+        ):
+            raise ConfigValidationError.with_context(
+                f"'{option_value}' for {option_name} is not a valid string type.",
+                option_value,
+            )
+        if option_name in queue_positive_number_options[queue_type] and (
+            re.match(r"^\d+(\.\d+)?$", option_value) is None
+        ):
+            raise ConfigValidationError.with_context(
+                f"'{option_value}' for {option_name} is not a valid integer or float.",
+                option_value,
+            )
+        if option_name in queue_positive_int_options[queue_type] and (
+            re.match(r"^\d+$", option_value) is None
+        ):
+            raise ConfigValidationError.with_context(
+                f"'{option_value}' for {option_name} is not a valid positive integer.",
+                option_value,
+            )
+        if option_name in queue_bool_options[queue_type] and not option_value in [
+            "TRUE",
+            "FALSE",
+            "0",
+            "1",
+            "T",
+            "F",
+            "True",
+            "False",
+        ]:
+            raise ConfigValidationError.with_context(
+                f"The '{option_value}' for {option_name} should be either TRUE or FALSE.",
+                option_value,
             )
diff --git a/src/ert/job_queue/driver.py b/src/ert/job_queue/driver.py
@@ -11,7 +11,7 @@ class Driver(BaseCClass):  # type: ignore
     TYPE_NAME = "driver"
     _alloc = ResPrototype("void* queue_driver_alloc( queue_driver_enum )", bind=False)
     _free = ResPrototype("void queue_driver_free( driver )")
-    _set_option = ResPrototype("void queue_driver_set_option( driver , char* , char*)")
+    _set_option = ResPrototype("bool queue_driver_set_option( driver , char* , char*)")
     _get_option = ResPrototype("char* queue_driver_get_option(driver, char*)")
 
     def __init__(
@@ -52,7 +52,8 @@ def create_driver(cls, queue_config: QueueConfig) -> "Driver":
         driver = Driver(queue_config.queue_system)
         if queue_config.queue_system in queue_config.queue_options:
             for setting in queue_config.queue_options[queue_config.queue_system]:
-                driver.set_option(*setting)
+                if not driver.set_option(*setting):
+                    raise ValueError(f"Queue option not set {setting}")
         return driver
 
     @property

diff --git a/tests/unit_tests/config/config_dict_generator.py b/tests/unit_tests/config/config_dict_generator.py
@@ -14,7 +14,14 @@
 from pydantic import PositiveInt
 
 from ert import _clib
-from ert.config import QueueSystem
+from ert.config import (
+    QueueSystem,
+    queue_bool_options,
+    queue_memory_options,
+    queue_positive_int_options,
+    queue_positive_number_options,
+    queue_string_options,
+)
 from ert.config.field import TRANSFORM_FUNCTIONS
 from ert.config.parsing import ConfigKeys
 
@@ -95,59 +102,17 @@ def valid_queue_options(queue_system: str):
     return valids
 
 
-def valid_queue_values(option_name):
-    if option_name in [
-        "QSUB_CMD",
-        "QSTAT_CMD",
-        "QSTAT_OPTIONS",
-        "QDEL_CMD",
-        "QUEUE",
-        "CLUSTER_LABEL",
-        "JOB_PREFIX",
-        "DEBUG_OUTPUT",
-        "LSF_RESOURCE",
-        "LSF_SERVER",
-        "LSF_QUEUE",
-        "LSF_LOGIN_SHELL",
-        "LSF_RSH_CMD",
-        "BSUB_CMD",
-        "BJOBS_CMD",
-        "BKILL_CMD",
-        "BHIST_CMD",
-        "BJOBS_TIMEOUT",
-        "EXCLUDE_HOST",
-        "PARTITION",
-        "PROJECT_CODE",
-        "SBATCH",
-        "SCANCEL",
-        "SCONTROL",
-        "SQUEUE",
-        "MEMORY",
-        "MEMORY_PER_CPU",
-        "EXCLUDE_HOST",
-        "INCLUDE_HOST",
-    ]:
+def valid_queue_values(option_name, queue_system):
+    if option_name in queue_string_options[queue_system]:
         return words
-    if option_name in [
-        "SUBMIT_SLEEP",
-        "SQUEUE_TIMEOUT",
-    ]:
-        return st.builds(str, small_floats)
-    if option_name in ["MEMORY_PER_JOB"]:
-        return st.builds(str, memory_with_unit())
-    if option_name in [
-        "NUM_CPUS_PER_NODE",
-        "NUM_NODES",
-        "MAX_RUNNING",
-        "MAX_RUNTIME",
-        "QUEUE_QUERY_TIMEOUT",
-    ]:
-        return st.builds(str, positives)
-    if option_name in [
-        "KEEP_QSUB_OUTPUT",
-        "DEBUG_OUTPUT",
-    ]:
-        return st.builds(str, booleans)
+    elif option_name in queue_positive_number_options[queue_system]:
+        return small_floats.map(str)
+    elif option_name in queue_positive_int_options[queue_system]:
+        return positives.map(str)
+    elif option_name in queue_bool_options[queue_system]:
+        return booleans.map(str)
+    elif option_name in queue_memory_options[queue_system]:
+        return memory_with_unit()
     else:
         raise ValueError(
             "config_dict_generator does not know how to "
@@ -161,7 +126,7 @@ def queue_options(draw, systems):
     name = draw(st.sampled_from(valid_queue_options(queue_system)))
     do_set = draw(booleans)
     if do_set:
-        return [queue_system, name, draw(valid_queue_values(name))]
+        return [queue_system, name, draw(valid_queue_values(name, queue_system))]
     else:
         # Missing VALUE means unset
         return [queue_system, name]

diff --git a/tests/unit_tests/config/test_ert_config.py b/tests/unit_tests/config/test_ert_config.py
@@ -707,8 +707,8 @@ def test_default_ens_path(tmpdir):
     "max_running_value, expected_error",
     [
         (100, None),  # positive integer
-        (-1, "QUEUE_OPTION MAX_RUNNING is negative"),  # negative integer
-        ("not_an_integer", "QUEUE_OPTION MAX_RUNNING is not an integer"),  # non-integer
+        (-1, "is not a valid positive integer"),  # negative integer
+        ("not_an_integer", "is not a valid positive integer"),  # non-integer
     ],
 )
 def test_queue_config_max_running_invalid_values(max_running_value, expected_error):
@@ -736,23 +736,6 @@ def test_queue_config_max_running_invalid_values(max_running_value, expected_err
         ErtConfig.from_file(test_config_file_name)
 
 
-@settings(max_examples=10)
-@given(config_generators())
-def test_that_queue_config_dict_negative_value_invalid(
-    tmp_path_factory, config_generator
-):
-    with config_generator(tmp_path_factory) as config_values:
-        config_dict = config_values.to_config_dict("test.ert", os.getcwd())
-        config_dict[ConfigKeys.QUEUE_OPTION].append(
-            ["LSF", "MAX_RUNNING", "-6"],
-        )
-        with pytest.raises(
-            expected_exception=ConfigValidationError,
-            match="QUEUE_OPTION MAX_RUNNING is negative",
-        ):
-            ErtConfig.from_dict(config_dict)
-
-
 @pytest.mark.usefixtures("use_tmpdir")
 def test_that_non_existant_job_directory_gives_config_validation_error():
     test_config_file_base = "test"