From 865f2332d33cf50050d434cd1b1d542a8ae75c58 Mon Sep 17 00:00:00 2001
From: xjules <jparu@equinor.com>
Date: Tue, 24 Oct 2023 13:43:33 +0200
Subject: [PATCH] Add validation of config options for all drivers

This still supports unsetting the options when providing an empty string.
Additionally, it raises ValueError when we are not able to set the option
when starting simulation.
---
 src/ert/config/queue_config.py               | 161 ++++++++++++++-----
 src/ert/job_queue/driver.py                  |   5 +-
 tests/unit_tests/config/test_ert_config.py   |   8 +-
 tests/unit_tests/config/test_queue_config.py |  27 +++-
 4 files changed, 152 insertions(+), 49 deletions(-)

diff --git a/src/ert/config/queue_config.py b/src/ert/config/queue_config.py
index 6a7bcaa794d..9ae26b1683c 100644
--- a/src/ert/config/queue_config.py
+++ b/src/ert/config/queue_config.py
@@ -5,11 +5,11 @@
 import shutil
 from collections import defaultdict
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Tuple, no_type_check
+from typing import Any, Dict, List, Mapping, Tuple, no_type_check
 
 from ert import _clib
 
-from .parsing import ConfigDict, ConfigValidationError, ErrorInfo
+from .parsing import ConfigDict, ConfigValidationError
 from .queue_system import QueueSystem
 
 GENERIC_QUEUE_OPTIONS: List[str] = ["MAX_RUNNING"]
@@ -31,32 +31,6 @@ class QueueConfig:
         default_factory=dict
     )
 
-    def __post_init__(self) -> None:
-        errors = []
-        for _, value in [
-            setting
-            for settings in self.queue_options.values()
-            for setting in settings
-            if setting[0] == "MAX_RUNNING" and setting[1]
-        ]:
-            err_msg = "QUEUE_OPTION MAX_RUNNING is"
-            try:
-                int_val = int(value)
-                if int_val < 0:
-                    errors.append(
-                        ErrorInfo(f"{err_msg} negative: {str(value)!r}").set_context(
-                            value
-                        )
-                    )
-            except ValueError:
-                errors.append(
-                    ErrorInfo(f"{err_msg} not an integer: {str(value)!r}").set_context(
-                        value
-                    )
-                )
-        if errors:
-            raise ConfigValidationError.from_collected(errors)
-
     @no_type_check
     @classmethod
     def from_dict(cls, config_dict: ConfigDict) -> QueueConfig:
@@ -95,11 +69,11 @@ def from_dict(cls, config_dict: ConfigDict) -> QueueConfig:
                     " usually provided by the site-configuration file, beware that"
                     " you are effectively replacing the default value provided."
                 )
-        if (
-            selected_queue_system == QueueSystem.TORQUE
-            and queue_options[QueueSystem.TORQUE]
-        ):
-            _validate_torque_options(queue_options[QueueSystem.TORQUE])
+
+        if queue_options[selected_queue_system]:
+            _validate_queue_options(
+                queue_options[selected_queue_system], selected_queue_system.name
+            )
 
         if (
             selected_queue_system != QueueSystem.LOCAL
@@ -139,17 +113,130 @@ def generate_dict(option_list: List[Tuple[str, str]]) -> Dict[str, List[str]]:
             )
 
 
-def _validate_torque_options(torque_options: List[Tuple[str, str]]) -> None:
-    for option_strings in torque_options:
+memory_options: Mapping[str, List[str]] = {
+    "LSF": [],
+    "SLURM": [],
+    "TORQUE": ["MEMORY_PER_JOB"],
+    "LOCAL": [],
+}
+
+string_options: Mapping[str, List[str]] = {
+    "LSF": [
+        "DEBUG_OUTPUT",
+        "LSF_RESOURCE",
+        "LSF_SERVER",
+        "LSF_QUEUE",
+        "LSF_LOGIN_SHELL",
+        "LSF_RSH_CMD",
+        "BSUB_CMD",
+        "BJOBS_CMD",
+        "BKILL_CMD",
+        "BHIST_CMD",
+        "EXCLUDE_HOST",
+        "PROJECT_CODE",
+    ],
+    "SLURM": [
+        "SBATCH",
+        "SCANCEL",
+        "SCONTROL",
+        "SQUEUE",
+        "PARTITION",
+        "INCLUDE_HOST",
+        "EXCLUDE_HOST",
+    ],
+    "TORQUE": [
+        "QSUB_CMD",
+        "QSTAT_CMD",
+        "QDEL_CMD",
+        "QSTAT_OPTIONS",
+        "QUEUE",
+        "DEBUG_OUTPUT",
+    ],
+    "LOCAL": [],
+}
+
+positive_int_options: Mapping[str, List[str]] = {
+    "LSF": [
+        "BJOBS_TIMEOUT",
+        "MAX_RUNNING",
+    ],
+    "SLURM": [
+        "MEMORY",
+        "MEMORY_PER_CPU",
+        "MAX_RUNNING",
+    ],
+    "TORQUE": [
+        "NUM_NODES",
+        "NUM_CPUS_PER_NODE",
+        "MAX_RUNNING",
+    ],
+    "LOCAL": ["MAX_RUNNING"],
+}
+
+float_options: Mapping[str, List[str]] = {
+    "LSF": [
+        "SUBMIT_SLEEP",
+    ],
+    "SLURM": [
+        "SQUEUE_TIMEOUT",
+    ],
+    "TORQUE": ["SUBMIT_SLEEP", "QUEUE_QUERY_TIMEOUT"],
+    "LOCAL": [],
+}
+
+bool_options: Mapping[str, List[str]] = {
+    "LSF": ["DEBUG_OUTPUT"],
+    "SLURM": [],
+    "TORQUE": [],
+    "LOCAL": [],
+}
+
+
+def _validate_queue_options(
+    queue_options: List[Tuple[str, str]], queue_type: str
+) -> None:
+    for option_strings in queue_options:
         option_name = option_strings[0]
         option_value = option_strings[1]
         if (
             option_value != ""  # This is equivalent to the option not being set
-            and option_name == "MEMORY_PER_JOB"
+            and option_name in memory_options[queue_type]
             and re.match("[0-9]+[mg]b", option_value) is None
         ):
             raise ConfigValidationError(
-                f"The value '{option_value}' is not valid for the Torque option "
+                f"The value '{option_value}' is not valid for "
+                f"the {queue_type} option "
                 "MEMORY_PER_JOB, it must be of "
                 "the format '<integer>mb' or '<integer>gb'."
             )
+        if option_name in string_options[queue_type] and not isinstance(
+            option_value, str
+        ):
+            raise ConfigValidationError(
+                f"The value '{option_value}' is not valid string "
+                f"for the {queue_type} option "
+                f"{option_name}, it must be of string type."
+            )
+        if option_name in float_options[queue_type] and (
+            re.match(r"^\d+(\.\d+)?$", option_value) is None and option_value != ""
+        ):
+            raise ConfigValidationError(
+                f"The value '{option_value}' is not valid integer or float"
+                f" for the {queue_type} option {option_name}."
+            )
+        if option_name in positive_int_options[queue_type] and (
+            re.match(r"^\d+$", option_value) is None and option_value != ""
+        ):
+            raise ConfigValidationError(
+                f"The value '{option_value}' is not valid positive integer "
+                f" for the {queue_type} option {option_name}."
+            )
+        if option_name in bool_options[queue_type] and not option_value in [
+            "TRUE",
+            "FALSE",
+            "",
+        ]:
+            raise ConfigValidationError(
+                f"The value '{option_value}' must be either TRUE or FALSE "
+                f" for the {queue_type} option {option_name}."
+            )
diff --git a/src/ert/job_queue/driver.py b/src/ert/job_queue/driver.py
index 41cca7191bc..bd0eaa610f1 100644
--- a/src/ert/job_queue/driver.py
+++ b/src/ert/job_queue/driver.py
@@ -11,7 +11,7 @@ class Driver(BaseCClass):  # type: ignore
     TYPE_NAME = "driver"
     _alloc = ResPrototype("void* queue_driver_alloc( queue_driver_enum )", bind=False)
     _free = ResPrototype("void queue_driver_free( driver )")
-    _set_option = ResPrototype("void queue_driver_set_option( driver , char* , char*)")
+    _set_option = ResPrototype("bool queue_driver_set_option( driver , char* , char*)")
     _get_option = ResPrototype("char* queue_driver_get_option(driver, char*)")
 
     def __init__(
@@ -52,7 +52,8 @@ def create_driver(cls, queue_config: QueueConfig) -> "Driver":
         driver = Driver(queue_config.queue_system)
         if queue_config.queue_system in queue_config.queue_options:
             for setting in queue_config.queue_options[queue_config.queue_system]:
-                driver.set_option(*setting)
+                if not driver.set_option(*setting):
+                    raise ValueError(f"Queue option not set {setting}")
         return driver
 
     @property
diff --git a/tests/unit_tests/config/test_ert_config.py b/tests/unit_tests/config/test_ert_config.py
index f43474fb1fc..5d4e5612a70 100644
--- a/tests/unit_tests/config/test_ert_config.py
+++ b/tests/unit_tests/config/test_ert_config.py
@@ -707,8 +707,8 @@ def test_default_ens_path(tmpdir):
     "max_running_value, expected_error",
     [
         (100, None),  # positive integer
-        (-1, "QUEUE_OPTION MAX_RUNNING is negative"),  # negative integer
-        ("not_an_integer", "QUEUE_OPTION MAX_RUNNING is not an integer"),  # non-integer
+        (-1, "not valid positive integer"),  # negative integer
+        ("not_an_integer", "not valid positive integer"),  # non-integer
     ],
 )
 def test_queue_config_max_running_invalid_values(max_running_value, expected_error):
@@ -744,11 +744,11 @@ def test_that_queue_config_dict_negative_value_invalid(
     with config_generator(tmp_path_factory) as config_values:
         config_dict = config_values.to_config_dict("test.ert", os.getcwd())
         config_dict[ConfigKeys.QUEUE_OPTION].append(
-            ["LSF", "MAX_RUNNING", "-6"],
+            [config_dict[ConfigKeys.QUEUE_SYSTEM], "MAX_RUNNING", "-6"],
         )
         with pytest.raises(
             expected_exception=ConfigValidationError,
-            match="QUEUE_OPTION MAX_RUNNING is negative",
+            match="not valid positive integer",
         ):
             ErtConfig.from_dict(config_dict)
 
diff --git a/tests/unit_tests/config/test_queue_config.py b/tests/unit_tests/config/test_queue_config.py
index eb1cd5277e7..3c12669d972 100644
--- a/tests/unit_tests/config/test_queue_config.py
+++ b/tests/unit_tests/config/test_queue_config.py
@@ -8,12 +8,7 @@
 import pytest
 from hypothesis import given
 
-from ert.config import (
-    ConfigValidationError,
-    ErtConfig,
-    QueueConfig,
-    QueueSystem,
-)
+from ert.config import ConfigValidationError, ErtConfig, QueueConfig, QueueSystem
 from ert.job_queue import Driver
 
 
@@ -208,3 +203,23 @@ def test_initializing_empty_config_values(queue_system, queue_system_option):
     assert driver.get_option("MAX_RUNNING") == "0"
     for options in config_object.queue_config.queue_options[queue_system]:
         assert isinstance(options, tuple)
+
+
+@pytest.mark.usefixtures("use_tmpdir")
+@pytest.mark.parametrize(
+    "queue_system, queue_option, queue_value, err_msg",
+    [
+        ("LSF", "BJOBS_TIMEOUT", "-3", "is not valid positive integer"),
+        ("SLURM", "SQUEUE_TIMEOUT", "5a", "is not valid integer or float"),
+        ("TORQUE", "NUM_NODES", "3.5", "is not valid positive integer"),
+    ],
+)
+def test_wrong_config_option_types(queue_system, queue_option, queue_value, err_msg):
+    filename = "config.ert"
+    with open(filename, "w", encoding="utf-8") as f:
+        f.write("NUM_REALIZATIONS 1\n")
+        f.write(f"QUEUE_SYSTEM {queue_system}\n")
+        f.write(f"QUEUE_OPTION {queue_system} {queue_option} {queue_value}\n")
+
+    with pytest.raises(ConfigValidationError, match=err_msg):
+        ErtConfig.from_file(filename)