From da9b628b0594fc32c06ccab442b0599e40780319 Mon Sep 17 00:00:00 2001 From: Karl W Schulz Date: Wed, 7 Aug 2024 07:17:36 -0500 Subject: [PATCH] remove usermode flag from runtime config and underlying code which is no longer necessary Signed-off-by: Karl W Schulz --- omnistat/collector_rms.py | 44 ++++++++------------------------ omnistat/config/omnistat.default | 1 - omnistat/config/omnistat.ornl | 3 +-- omnistat/monitor.py | 2 -- 4 files changed, 12 insertions(+), 38 deletions(-) diff --git a/omnistat/collector_rms.py b/omnistat/collector_rms.py index b3155aa9..07b83959 100644 --- a/omnistat/collector_rms.py +++ b/omnistat/collector_rms.py @@ -43,10 +43,9 @@ class RMSJob(Collector): - def __init__(self, userMode=False, annotations=False, jobDetection=None): + def __init__(self, annotations=False, jobDetection=None): logging.debug("Initializing resource manager job data collector") self.__prefix = "rmsjob_" - self.__userMode = userMode self.__annotationsEnabled = annotations self.__RMSMetrics = {} self.__rmsJobInfo = [] @@ -67,32 +66,15 @@ def __init__(self, userMode=False, annotations=False, jobDetection=None): self.__squeue_steps = [command] + flags.split() logging.debug("sqeueue_exec = %s" % self.__squeue_query) - # cache current slurm job in user mode profiling - assumption is that it does not change - if self.__userMode is True: - # read from file if available - jobFile = self.__rmsJobFile - if os.path.isfile(jobFile): - with open(jobFile, "r") as f: - self.__rmsJobInfo = json.load(f) - logging.info("--> usermode jobinfo (from file): %s" % self.__rmsJobInfo) - - else: - # no job file data available: query slurm directly - # note: a longer timeout is provided since we only query once and some systems have slow - # slurm response times - logging.info("User mode collector enabled for SLURM, querying job info once at startup...") - self.__rmsJobInfo = self.querySlurmJob(timeout=15, exit_on_error=True, mode="squeue") - logging.info("--> usermode jobinfo (from slurm query): %s" % self.__rmsJobInfo) - + # jobMode + if self.__rmsJobMode == "file-based": + logging.info( + "collector_rms: reading job information from prolog/epilog derived file (%s)" % self.__rmsJobFile + ) + elif self.__rmsJobMode == "squeue": + logging.info("collector_rms: will poll slurm periodicaly with squeue") else: - if self.__rmsJobMode == "file-based": - logging.info( - "collector_rms: reading job information from prolog/epilog derived file (%s)" % self.__rmsJobFile - ) - elif self.__rmsJobMode == "squeue": - logging.info("collector_rms: will poll slurm periodicaly with squeue") - else: - logging.error("Unsupported slurm job data collection mode") + logging.error("Unsupported slurm job data collection mode") def querySlurmJob(self, timeout=1, exit_on_error=False, mode="squeue"): """ @@ -165,13 +147,9 @@ def updateMetrics(self): results = None - if self.__userMode is True: - results = self.__rmsJobInfo + results = self.querySlurmJob(mode=self.__rmsJobMode) + if results: jobEnabled = True - else: - results = self.querySlurmJob(mode=self.__rmsJobMode) - if results: - jobEnabled = True # Case when SLURM job is allocated if jobEnabled: diff --git a/omnistat/config/omnistat.default b/omnistat/config/omnistat.default index 3155fb1a..63935243 100644 --- a/omnistat/config/omnistat.default +++ b/omnistat/config/omnistat.default @@ -1,7 +1,6 @@ [omnistat.collectors] port = 8001 -usermode = False enable_rocm_smi = True enable_amd_smi = False enable_rms = True diff --git a/omnistat/config/omnistat.ornl b/omnistat/config/omnistat.ornl index ccef0fc3..38ceb3c1 100644 --- a/omnistat/config/omnistat.ornl +++ b/omnistat/config/omnistat.ornl @@ -1,11 +1,10 @@ #-- -# Configuration for ORNL Crusher/Frontier +# Configuration for ORNL Crusher/Frontier/Borg #-- [omnistat.collectors] port = 8001 -usermode = False enable_rocm_smi = True enable_amd_smi = False enable_rms = True diff --git a/omnistat/monitor.py b/omnistat/monitor.py index bb803003..a638a499 100644 --- a/omnistat/monitor.py +++ b/omnistat/monitor.py @@ -59,7 +59,6 @@ def __init__(self, config): "enable_amd_smi_process", False ) self.runtimeConfig["collector_port"] = config["omnistat.collectors"].get("port", 8000) - self.runtimeConfig["collector_usermode"] = config["omnistat.collectors"].getboolean("usermode", False) self.runtimeConfig["collector_rocm_path"] = config["omnistat.collectors"].get("rocm_path", "/opt/rocm") allowed_ips = config["omnistat.collectors"].get("allowed_ips", "127.0.0.1") @@ -122,7 +121,6 @@ def initMetrics(self): self.__collectors.append( RMSJob( - userMode=self.runtimeConfig["collector_usermode"], annotations=self.runtimeConfig["rms_collector_annotations"], jobDetection=self.jobDetection, )