From 1092a349df4a57a2c9fb2dedb371261f0109d21a Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Fri, 14 Feb 2025 17:16:13 +0000 Subject: [PATCH 1/2] DAOS-17120 test: remove pcmd with run_remote Replace usage of pcmd with run_remote Skip-unit-tests: true Skip-fault-injection-test: true Signed-off-by: Dalton Bohning --- .../ftest/control/dmg_storage_scan_scm.py | 25 ++++----- src/tests/ftest/control/ssd_socket.py | 38 ++++++------- .../ftest/control/super_block_versioning.py | 17 +++--- src/tests/ftest/control/version.py | 53 +++++++------------ src/tests/ftest/deployment/network_failure.py | 17 +++--- src/tests/ftest/dfuse/posix_stat.py | 28 +++++----- src/tests/ftest/erasurecode/truncate.py | 26 +++++---- src/tests/ftest/server/cpu_usage.py | 20 ++++--- 8 files changed, 105 insertions(+), 119 deletions(-) diff --git a/src/tests/ftest/control/dmg_storage_scan_scm.py b/src/tests/ftest/control/dmg_storage_scan_scm.py index 4c3373c0d0f..451b0e3d5e4 100644 --- a/src/tests/ftest/control/dmg_storage_scan_scm.py +++ b/src/tests/ftest/control/dmg_storage_scan_scm.py @@ -1,12 +1,13 @@ """ (C) Copyright 2020-2022 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ import os from control_test_base import ControlTestBase -from general_utils import pcmd, run_pcmd +from run_utils import run_remote class DmgStorageScanSCMTest(ControlTestBase): @@ -42,21 +43,17 @@ def verify_storage_scan_scm(self, storage_dict): for scm_namespace in storage_dict["scm_namespaces"]: # Verify that all namespaces exist under /dev. pmem_name = scm_namespace["blockdev"] - lscmd = "{} {}".format("ls", os.path.join("/dev", pmem_name)) - # rc is a dictionary where return code is the key. - rc = pcmd(hosts=self.hostlist_servers, command=lscmd) - - if 0 not in rc: - errors.append("{} didn't exist under /dev!".format(pmem_name)) + ls_cmd = f"ls {os.path.join('/dev', pmem_name)}" + if not run_remote(self.log, self.hostlist_servers, ls_cmd).passed: + errors.append(f"{pmem_name} didn't exist under /dev!") # Verify the Socket ID. - numa_node_path = "/sys/class/block/{}/device/numa_node".format( - pmem_name) - command = "cat {}".format(numa_node_path) - out_list = run_pcmd(hosts=self.hostlist_servers, command=command) - - # This one is in str. - expected_numa_node = out_list[0]["stdout"][0] + numa_node_path = f"/sys/class/block/{pmem_name}/device/numa_node" + command = f"cat {numa_node_path}" + result = run_remote(self.log, self.hostlist_servers, command) + if not result.passed: + errors.append(f"{command} failed on {result.failed_hosts}") + expected_numa_node = result.joined_stdout actual_numa_node = str(scm_namespace["numa_node"]) if expected_numa_node != actual_numa_node: diff --git a/src/tests/ftest/control/ssd_socket.py b/src/tests/ftest/control/ssd_socket.py index 2a899dfb446..a18625a93ce 100644 --- a/src/tests/ftest/control/ssd_socket.py +++ b/src/tests/ftest/control/ssd_socket.py @@ -1,13 +1,15 @@ """ (C) Copyright 2020-2022 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ import os from textwrap import wrap +from ClusterShell.NodeSet import NodeSet from control_test_base import ControlTestBase -from general_utils import pcmd, run_pcmd +from run_utils import run_remote class SSDSocketTest(ControlTestBase): @@ -36,10 +38,10 @@ def debug_numa_node(self, pci_addr_heads): for pci_addr_head in pci_addr_heads: self.log.debug( "----- Search PCI Addr Head %s in /sys -----", pci_addr_head) - run_pcmd( - hosts=self.hostlist_servers, - command="find /sys -name \"{}\"".format(pci_addr_head), - verbose=True) + run_remote( + self.log, + self.hostlist_servers, + f'find /sys -name "{pci_addr_head}"') # Another way to obtain the Socket ID is to use hwloc-ls --whole-io # --verbose. It contains something like: @@ -55,9 +57,10 @@ def debug_numa_node(self, pci_addr_heads): # much more cumbersome than reading the numa_node, so it's called here # for mainly debugging purpose. self.log.debug("----- Show PCI Address in hwloc-ls -----") - pcmd( - hosts=self.hostlist_servers, - command="hwloc-ls --whole-io --verbose") + run_remote( + self.log, + self.hostlist_servers, + "hwloc-ls --whole-io --verbose") def verify_ssd_sockets(self, storage_dict): """Verify SSD sockets. @@ -98,17 +101,14 @@ def verify_ssd_sockets(self, storage_dict): pci_addr_heads.append(pci_addr_head) # Call cat on the server host, not necessarily the local test host. - results = run_pcmd( - hosts=self.hostlist_servers[0:1], command="cat {}".format(numa_node_path)) - - # Obtain the numa_node content. - fs_socket_id = "" - for result in results: - # Test that the content is expected. - fs_socket_id = result["stdout"][-1] - if fs_socket_id != str(cmd_socket_id): - errors.append( - "Unexpected socket ID! Cmd: {}; FS: {}".format(cmd_socket_id, fs_socket_id)) + command = f"cat {numa_node_path}" + result = run_remote( + self.log, NodeSet(self.hostlist_servers[0]), command) + if not result.passed: + errors.append(f"{command} failed on {result.failed_hosts}") + fs_socket_id = result.joined_stdout + if fs_socket_id != str(cmd_socket_id): + errors.append(f"Unexpected socket ID! Cmd: {cmd_socket_id}; FS: {fs_socket_id}") if errors: # Since we're dealing with system files and we don't have access to diff --git a/src/tests/ftest/control/super_block_versioning.py b/src/tests/ftest/control/super_block_versioning.py index 829e5615e17..500318b2c56 100644 --- a/src/tests/ftest/control/super_block_versioning.py +++ b/src/tests/ftest/control/super_block_versioning.py @@ -1,14 +1,15 @@ """ (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ - - import os from apricot import TestWithServers -from general_utils import check_file_exists, pcmd +from command_utils import command_as_user +from general_utils import check_file_exists +from run_utils import run_remote class SuperBlockVersioning(TestWithServers): @@ -39,9 +40,7 @@ def test_super_block_version_basic(self): self.fail("{}: {} not found".format(check_result[1], fname)) # Make sure that 'version' is in the file, run task to check - cmd = "sudo cat {} | grep -F \"version\"".format(fname) - result = pcmd(self.hostlist_servers, cmd, timeout=20) - - # Determine if the command completed successfully across all the hosts - if len(result) > 1 or 0 not in result: - self.fail("Was not able to find version in {} file".format(fname)) + cmd = command_as_user(f'cat {fname} | grep -F "version"', "root") + result = run_remote(self.log, self.hostlist_servers, cmd, timeout=20) + if not result.passed: + self.fail(f"Was not able to find version in {fname} file") diff --git a/src/tests/ftest/control/version.py b/src/tests/ftest/control/version.py index 247b7072953..47ddd1f6752 100644 --- a/src/tests/ftest/control/version.py +++ b/src/tests/ftest/control/version.py @@ -1,5 +1,6 @@ ''' (C) Copyright 2018-2023 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent ''' @@ -7,7 +8,9 @@ import re from apricot import TestWithServers -from general_utils import append_error, report_errors, run_pcmd +from ClusterShell.NodeSet import NodeSet +from general_utils import append_error, report_errors +from run_utils import run_remote from server_utils_base import DaosServerCommandRunner @@ -37,31 +40,17 @@ def test_version(self): """ # Get RPM version. rpm_command = "rpm -qa | grep daos-server" - output = run_pcmd(hosts=self.hostlist_servers, command=rpm_command) - self.log.debug("RPM output = %s", output) - rc = output[0]["exit_status"] - stdout = output[0]["stdout"] - if rc != 0: - report_errors(self, ["DAOS RPMs not properly installed: rc={}".format(rc)]) - rpm_version = None - for rpm in stdout: - result = re.findall(r"daos-server-[tests-|tests_openmpi-]*([\d.]+)", rpm) - if result: - rpm_version = result[0] - break - if not result: - report_errors(self, ["RPM version could not be defined"]) + result = run_remote(self.log, self.hostlist_servers, rpm_command) + if not result.passed: + self.fail("Failed to list daos-server RPMs") + if not result.homogeneous: + self.fail("Non-homogenous daos-server RPMs") + match = re.findall(r"daos-server-[tests-|tests_openmpi-]*([\d.]+)", result.joined_stdout) + if not match: + self.fail("Failed to get version from daos-server RPMs") + rpm_version = match[0] self.log.info("RPM version = %s", rpm_version) - # Remove configuration files - cleanup_cmds = [ - "sudo find /etc/daos/certs -type f -delete -print", - "sudo rm -fv /etc/daos/daos_server.yml /etc/daos/daos_control.yml" - " /etc/daos/daos_agent.yml", - ] - for cmd in cleanup_cmds: - run_pcmd(hosts=self.hostlist_servers, command=cmd) - # Get dmg version. dmg_version = self.get_dmg_command().version()["response"]["version"] self.log.info("dmg version = %s", dmg_version) @@ -75,17 +64,11 @@ def test_version(self): # Get daos_agent version. daos_agent_version = None daos_agent_cmd = "daos_agent --json version" - output = run_pcmd(hosts=self.hostlist_servers, command=daos_agent_cmd) - self.log.debug("DAOS Agent output = %s", output) - rc = output[0]["exit_status"] - stdout = output[0]["stdout"] - if rc != 0: - msg = "DAOS Agent not properly installed: rc={}".format(rc) - append_error(errors, msg, stdout) - else: - self.log.info("DAOS Agent stdout = %s", "".join(stdout)) - daos_agent_version = json.loads("".join(stdout))["response"]["version"] - self.log.info("daos_agent version = %s", daos_agent_version) + result = run_remote(self.log, NodeSet(self.hostlist_servers[0]), daos_agent_cmd) + if not result.passed: + self.fail("Failed to get daos_agent version") + daos_agent_version = json.loads(result.joined_stdout)["response"]["version"] + self.log.info("daos_agent version = %s", daos_agent_version) # Get daos_server version daos_server_cmd = DaosServerCommandRunner(path=self.bin) diff --git a/src/tests/ftest/deployment/network_failure.py b/src/tests/ftest/deployment/network_failure.py index 84c7b259370..ce5baadbc0b 100644 --- a/src/tests/ftest/deployment/network_failure.py +++ b/src/tests/ftest/deployment/network_failure.py @@ -1,5 +1,6 @@ """ (C) Copyright 2022-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -10,11 +11,12 @@ from ClusterShell.NodeSet import NodeSet from command_utils_base import CommandFailure from dmg_utils import check_system_query_status -from general_utils import report_errors, run_pcmd +from general_utils import report_errors from ior_test_base import IorTestBase from ior_utils import IorCommand from job_manager_utils import get_job_manager from network_utils import NetworkInterface +from run_utils import run_remote class NetworkFailureTest(IorTestBase): @@ -97,16 +99,15 @@ def create_ip_to_host(self): """ command = "hostname -i" - results = run_pcmd(hosts=self.hostlist_servers, command=command) - self.log.info("hostname -i results = %s", results) + result = run_remote(self.log, self.hostlist_servers, command) + if not result.passed: + self.fail("Failed to get hostname on servers") ip_to_host = {} - for result in results: - ips_str = result["stdout"][0] + for hosts, stdout in result.all_stdout.items(): # There may be multiple IP addresses for one host. - ip_addresses = ips_str.split() - for ip_address in ip_addresses: - ip_to_host[ip_address] = NodeSet(str(result["hosts"])) + for ip_address in stdout.split(): + ip_to_host[ip_address] = NodeSet(hosts) return ip_to_host diff --git a/src/tests/ftest/dfuse/posix_stat.py b/src/tests/ftest/dfuse/posix_stat.py index 57030f6567a..a5ede764c42 100644 --- a/src/tests/ftest/dfuse/posix_stat.py +++ b/src/tests/ftest/dfuse/posix_stat.py @@ -1,11 +1,13 @@ """ (C) Copyright 2018-2023 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ -from general_utils import get_remote_file_size, run_pcmd +from general_utils import get_remote_file_size from ior_test_base import IorTestBase +from run_utils import run_remote class POSIXStatTest(IorTestBase): @@ -55,31 +57,27 @@ def test_stat_parameters(self): create_cont=False, test_file_suffix=test_file_suffix) # Get current epoch. - current_epoch = -1 - output = run_pcmd(hosts=self.hostlist_clients, command="date +%s") - stdout = output[0]["stdout"] - self.log.info("date stdout = %s", stdout) - current_epoch = stdout[-1] + result = run_remote(self.log, self.hostlist_clients, "date +%s") + if not result.passed: + self.fail("Failed to get date on clients") + current_epoch = int(result.output[0].stdout[-1]) # Get epoch of the created file. (technically %Z is for last status # change. %W is file birth, but it returns 0.) - creation_epoch = -1 # As in date command, run stat command in the client node. stat_command = "stat -c%Z {}".format(self.ior_cmd.test_file.value) - output = run_pcmd(hosts=self.hostlist_clients, command=stat_command) - stdout = output[0]["stdout"] - self.log.info("stat stdout = %s", stdout) - creation_epoch = stdout[-1] + result = run_remote(self.log, self.hostlist_clients, stat_command) + if not result.passed: + self.fail(f"{stat_command} failed on clients") + creation_epoch = int(result.output[0].stdout[-1]) # Calculate the epoch difference between the creation time and the # value in the file metadata. They're usually 2 sec apart. - creation_epoch_int = int(creation_epoch) - current_epoch_int = int(current_epoch) - diff_epoch = creation_epoch_int - current_epoch_int + diff_epoch = creation_epoch - current_epoch if diff_epoch > 10: msg = "Unexpected creation time! Expected = {}; Actual = {}" error_list.append( - msg.format(current_epoch_int, creation_epoch_int)) + msg.format(current_epoch, creation_epoch)) # 2. Verify file size. # Get file size. diff --git a/src/tests/ftest/erasurecode/truncate.py b/src/tests/ftest/erasurecode/truncate.py index bb570755bc8..6635e7d5008 100644 --- a/src/tests/ftest/erasurecode/truncate.py +++ b/src/tests/ftest/erasurecode/truncate.py @@ -1,5 +1,6 @@ ''' (C) Copyright 2019-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent ''' @@ -7,7 +8,8 @@ from dfuse_utils import get_dfuse, start_dfuse from fio_test_base import FioBase -from general_utils import get_remote_file_size, run_pcmd +from general_utils import get_remote_file_size +from run_utils import run_remote class Ecodtruncate(FioBase): @@ -59,13 +61,14 @@ def test_ec_truncate(self): # Get the file stats and confirm size file_size = get_remote_file_size(self.hostlist_clients[0], testfile) - self.assertEqual(original_fs, file_size) + self.assertEqual( + original_fs, file_size, "file size after truncase is not equal to original") # Truncate the original file which will extend the size of file. - result = run_pcmd( - self.hostlist_clients, "truncate -s {} {}".format(truncate_size, testfile)) - if result[0]["exit_status"] == 1: - self.fail("Failed to truncate file {}".format(testfile)) + result = run_remote( + self.log, self.hostlist_clients, f"truncate -s {truncate_size} {testfile}") + if not result.passed: + self.fail(f"Failed to truncate file {testfile}") # Verify the file size is extended. file_size = get_remote_file_size(self.hostlist_clients[0], testfile) @@ -75,14 +78,15 @@ def test_ec_truncate(self): self.fio_cmd.run() # Truncate the original file and shrink to original size. - result = run_pcmd( - self.hostlist_clients, "truncate -s {} {}".format(original_fs, testfile)) - if result[0]["exit_status"] == 1: - self.fail("Failed to truncate file {}".format(testfile)) + result = run_remote( + self.log, self.hostlist_clients, f"truncate -s {original_fs} {testfile}") + if not result.passed: + self.fail(f"Failed to truncate file {testfile}") # Verify the file size is shrink to original. file_size = get_remote_file_size(self.hostlist_clients[0], testfile) - self.assertEqual(original_fs, file_size) + self.assertEqual( + original_fs, file_size, "file size after truncase is not equal to original") # Read and verify the data after truncate. self.fio_cmd.run() diff --git a/src/tests/ftest/server/cpu_usage.py b/src/tests/ftest/server/cpu_usage.py index 46886190a41..ad81f208695 100644 --- a/src/tests/ftest/server/cpu_usage.py +++ b/src/tests/ftest/server/cpu_usage.py @@ -1,12 +1,13 @@ """ (C) Copyright 2020-2022 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ import time -from general_utils import run_pcmd from ior_test_base import IorTestBase +from run_utils import run_remote class CPUUsage(IorTestBase): @@ -36,9 +37,11 @@ def get_cpu_usage(self, pid, usage_limit): # Get (instantaneous) CPU usage of the PID with top. top_pid = "top -p {} -b -n 1".format(pid) usage = -1 - results = run_pcmd(hosts=self.hostlist_servers, command=top_pid) - for result in results: - process_row = result["stdout"][-1] + result = run_remote(self.log, self.hostlist_servers, top_pid) + if not result.passed: + self.fail(f"{top_pid} failed on {result.failed_hosts}") + for data in result.output: + process_row = data.stdout[-1] self.log.info("Process row = %s", process_row) values = process_row.split() self.log.info("Values = %s", values) @@ -78,10 +81,11 @@ def test_cpu_usage(self): # At this point, daos_engine should be started, but do the repetitive # calls just in case. for _ in range(5): - results = run_pcmd(hosts=self.hostlist_servers, command=ps_engine) - for result in results: - self.log.info("ps output = %s", "\n".join(result["stdout"])) - pid = result["stdout"][-1] + result = run_remote(self.log, self.hostlist_servers, ps_engine) + if not result.passed: + self.fail(f"{ps_engine} failed on {result.failed_hosts}") + for data in result.output: + pid = data.stdout[-1] self.log.info("PID = %s", pid) if "PID" not in pid: pid_found = True From 1cf3345a0b4f8cc3be042ecc0f74bc23b0099569 Mon Sep 17 00:00:00 2001 From: Dalton Bohning Date: Wed, 19 Feb 2025 21:42:22 +0000 Subject: [PATCH 2/2] handle utils Test-tag: pr CPUUsage DAOSVersion DmgStorageScanSCMTest Ecodtruncate NetworkFailureTest POSIXStatTest SSDSocketTest SuperBlockVersioning soak_smoke MacsioTest daos_racer OSAOnlineExtend OSAOnlineParallelTest DdbTest ContainerListConsolidationTest performance,-manual NvmeEnospace Skip-unit-tests: true Skip-fault-injection-test: true Signed-off-by: Dalton Bohning --- src/tests/ftest/daos_racer/parallel.py | 6 +- src/tests/ftest/nvme/enospace.py | 2 +- .../recovery/container_list_consolidation.py | 7 +- src/tests/ftest/recovery/ddb.py | 18 ++- src/tests/ftest/util/agent_utils.py | 48 ++----- src/tests/ftest/util/daos_racer_utils.py | 19 +-- src/tests/ftest/util/ddb_utils.py | 17 ++- src/tests/ftest/util/general_utils.py | 8 +- src/tests/ftest/util/job_manager_utils.py | 118 +++++++----------- src/tests/ftest/util/macsio_util.py | 9 +- src/tests/ftest/util/performance_test_base.py | 7 +- src/tests/ftest/util/run_utils.py | 10 ++ src/tests/ftest/util/server_utils.py | 21 ++-- src/tests/ftest/util/soak_utils.py | 27 ++-- 14 files changed, 139 insertions(+), 178 deletions(-) diff --git a/src/tests/ftest/daos_racer/parallel.py b/src/tests/ftest/daos_racer/parallel.py index 85c1a39cce8..781e16dcd57 100755 --- a/src/tests/ftest/daos_racer/parallel.py +++ b/src/tests/ftest/daos_racer/parallel.py @@ -1,6 +1,7 @@ #!/usr/bin/python3 """ (C) Copyright 2021-2022 Intel Corporation. +(C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -52,7 +53,8 @@ def test_daos_racer_parallel(self): job_manager.run() except CommandFailure as error: - self.log.error("DAOS Racer Failed: %s", str(error)) - self.fail("Test was expected to pass but it failed.\n") + msg = f"daos_racer failed: {error}" + self.log.error(msg) + self.fail(msg) self.log.info("Test passed!") diff --git a/src/tests/ftest/nvme/enospace.py b/src/tests/ftest/nvme/enospace.py index d8d5baf3c04..a8e0620e5e4 100644 --- a/src/tests/ftest/nvme/enospace.py +++ b/src/tests/ftest/nvme/enospace.py @@ -340,7 +340,7 @@ def err_to_str(err_no): return pydaos.DaosErrorCode(err_no).name logfile_glob = log_file + r".*[0-9]" - errors_count = get_errors_count(self.hostlist_clients, logfile_glob) + errors_count = get_errors_count(self.log, self.hostlist_clients, logfile_glob) for error in self.expected_errors: if error not in errors_count: errors_count[error] = 0 diff --git a/src/tests/ftest/recovery/container_list_consolidation.py b/src/tests/ftest/recovery/container_list_consolidation.py index 0c377e451b2..9911bb9b14c 100644 --- a/src/tests/ftest/recovery/container_list_consolidation.py +++ b/src/tests/ftest/recovery/container_list_consolidation.py @@ -1,5 +1,6 @@ """ (C) Copyright 2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -74,9 +75,8 @@ def test_orphan_container(self): server_host=NodeSet(self.hostlist_servers[0]), path=self.bin, mount_point=scm_mount, pool_uuid=pool.uuid, vos_file=vos_file) cmd_result = ddb_command.list_component() - ls_out = "\n".join(cmd_result[0]["stdout"]) uuid_regex = r"([0-f]{8}-[0-f]{4}-[0-f]{4}-[0-f]{4}-[0-f]{12})" - match = re.search(uuid_regex, ls_out) + match = re.search(uuid_regex, cmd_result.joined_stdout) if match is None: self.fail("Unexpected output from ddb command, unable to parse.") self.log.info("Container UUID from ddb ls = %s", match.group(1)) @@ -133,9 +133,8 @@ def test_orphan_container(self): "(PMEM only).") self.log_step(msg) cmd_result = ddb_command.list_component() - ls_out = "\n".join(cmd_result[0]["stdout"]) uuid_regex = r"([0-f]{8}-[0-f]{4}-[0-f]{4}-[0-f]{4}-[0-f]{12})" - match = re.search(uuid_regex, ls_out) + match = re.search(uuid_regex, cmd_result.joined_stdout) if match: errors.append("Container UUID is found in shard! Checker didn't remove it.") diff --git a/src/tests/ftest/recovery/ddb.py b/src/tests/ftest/recovery/ddb.py index 25e7223e0fa..777c97a53d7 100644 --- a/src/tests/ftest/recovery/ddb.py +++ b/src/tests/ftest/recovery/ddb.py @@ -1,5 +1,6 @@ """ (C) Copyright 2022-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -176,10 +177,9 @@ def test_recovery_ddb_ls(self): # CONT: (/[0]) /3082b7d3-32f9-41ea-bcbf-5d6450c1b34f # stdout is a list which contains each line as separate element. Concatenate them # to single string so that we can apply regex. - ls_out = "\n".join(cmd_result[0]["stdout"]) # Matches the container uuid uuid_regex = r"([0-f]{8}-[0-f]{4}-[0-f]{4}-[0-f]{4}-[0-f]{12})" - match = re.search(uuid_regex, ls_out) + match = re.search(uuid_regex, cmd_result.joined_stdout) if match is None: self.fail("Unexpected output from ddb command, unable to parse.") self.log.info("Container UUID from ddb ls = %s", match.group(1)) @@ -198,10 +198,9 @@ def test_recovery_ddb_ls(self): # OBJ: (/[0]/[0]) /3082b7d3-32f9-41ea-bcbf-5d6450c1b34f/937030214649643008.1.0.1 # OBJ: (/[0]/[1]) /3082b7d3-32f9-41ea-bcbf-5d6450c1b34f/937030214649643009.1.0.1 # OBJ: (/[0]/[2]) /3082b7d3-32f9-41ea-bcbf-5d6450c1b34f/937030214649643016.1.0.1 - ls_out = "\n".join(cmd_result[0]["stdout"]) # Matches an object id. (4 digits separated by a period '.') object_id_regex = r"\d+\.\d+\.\d+\.\d+" - match = re.findall(object_id_regex, ls_out) + match = re.findall(object_id_regex, cmd_result.joined_stdout) self.log.info("List objects match = %s", match) actual_object_count = len(match) @@ -217,12 +216,11 @@ def test_recovery_ddb_ls(self): for obj_index in range(object_count): component_path = "[0]/[{}]".format(obj_index) cmd_result = ddb_command.list_component(component_path=component_path) - ls_out = "\n".join(cmd_result[0]["stdout"]) # Sample output. # /d4e0c836-17bd-4df3-b255-929732486bab/281479271677953.0.0/ # [0] 'Sample dkey 0 0' (15) # [1] 'Sample dkey 0 1' (15) - match = re.findall(dkey_regex, ls_out) + match = re.findall(dkey_regex, cmd_result.joined_stdout) actual_dkey_count += len(match) @@ -248,7 +246,7 @@ def test_recovery_ddb_ls(self): for dkey_index in range(dkey_count): component_path = "[0]/[{}]/[{}]".format(obj_index, dkey_index) cmd_result = ddb_command.list_component(component_path=component_path) - ls_out = "\n".join(cmd_result[0]["stdout"]) + ls_out = cmd_result.joined_stdout msg = "List akeys obj_index = {}, dkey_index = {}, stdout = {}".format( obj_index, dkey_index, ls_out) self.log.info(msg) @@ -356,7 +354,7 @@ def test_recovery_ddb_rm(self): # 4. Call ddb rm to remove the akey. cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]/[0]") - self.log.info("rm akey stdout = %s", cmd_result[0]["stdout"]) + self.log.info("rm akey stdout = %s", cmd_result.joined_stdout) # 5. Restart the server to use the API. dmg_command.system_start() @@ -386,7 +384,7 @@ def test_recovery_ddb_rm(self): # 9. Call ddb rm to remove the dkey. cmd_result = ddb_command.remove_component(component_path="[0]/[0]/[0]") - self.log.info("rm dkey stdout = %s", cmd_result[0]["stdout"]) + self.log.info("rm dkey stdout = %s", cmd_result.joined_stdout) # 10. Restart the server to use the API. dmg_command.system_start() @@ -415,7 +413,7 @@ def test_recovery_ddb_rm(self): # 14. Call ddb rm to remove the object. cmd_result = ddb_command.remove_component(component_path="[0]/[0]") - self.log.info("rm object stdout = %s", cmd_result[0]["stdout"]) + self.log.info("rm object stdout = %s", cmd_result.joined_stdout) # 15. Restart the server to use daos command. dmg_command.system_start() diff --git a/src/tests/ftest/util/agent_utils.py b/src/tests/ftest/util/agent_utils.py index 74b79fb9796..40d52a4ee60 100644 --- a/src/tests/ftest/util/agent_utils.py +++ b/src/tests/ftest/util/agent_utils.py @@ -1,10 +1,10 @@ """ (C) Copyright 2019-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ import os -import re import socket from agent_utils_params import DaosAgentTransportCredentials, DaosAgentYamlParameters @@ -13,7 +13,7 @@ from command_utils_base import (CommandWithParameters, CommonConfig, EnvironmentVariables, FormattedParameter) from exception_utils import CommandFailure -from general_utils import get_default_config_file, get_log_file, run_pcmd +from general_utils import get_default_config_file, get_log_file from run_utils import run_remote @@ -293,12 +293,17 @@ def start(self): super().start() def dump_attachinfo(self): - """Run dump-attachinfo on the daos_agent.""" + """Run dump-attachinfo on the daos_agent. + + Raises: + CommandFailure: if the daos_agent command fails. + + Returns: + CommandResult: groups of command results from the same hosts with the same return status + """ cmd = self.manager.job.copy() cmd.set_sub_command("dump-attachinfo") - self.attachinfo = run_pcmd(self.hosts, - str(self.manager.job))[0]["stdout"] - self.log.info("Agent attachinfo: %s", self.attachinfo) + return run_remote(self.log, self.hosts, cmd.with_exports) def support_collect_log(self, **kwargs): """Collect logs for debug purpose. @@ -309,6 +314,7 @@ def support_collect_log(self, **kwargs): archive (bool, optional): Archive the log/config files extra_logs_dir (str, optional): Collect the Logs from given custom directory target-host (str, optional): R sync all the logs to target system + Raises: CommandFailure: if the daos_agent command fails. @@ -324,36 +330,6 @@ def support_collect_log(self, **kwargs): self.log.info("Support collect-log on clients: %s", str(cmd)) return run_remote(self.log, self.hosts, cmd.with_exports) - def get_attachinfo_file(self): - """Run dump-attachinfo on the daos_agent. - - Returns: - str: the attach info file path - - """ - server_name = self.get_config_value("name") - - self.dump_attachinfo() - - attach_info = self.attachinfo - - # Filter log messages from attachinfo content - messages = [x for x in attach_info if re.match(r"^(name\s|size\s|all|\d+\s)", x)] - attach_info_contents = "\n".join(messages) - attach_info_filename = f"{server_name}.attach_info_tmp" - - if len(messages) < 4: - self.log.info("Malformed attachinfo file: %s", attach_info_contents) - return None - - # Write an attach_info_tmp file in this directory for cart_ctl to use - attachinfo_file_path = os.path.join(self.outputdir, attach_info_filename) - - with open(attachinfo_file_path, 'w', encoding='utf-8') as file_handle: - file_handle.write(attach_info_contents) - - return attachinfo_file_path - def stop(self): """Stop the agent through the job manager. diff --git a/src/tests/ftest/util/daos_racer_utils.py b/src/tests/ftest/util/daos_racer_utils.py index a1ebd6e92d6..ddea074a701 100644 --- a/src/tests/ftest/util/daos_racer_utils.py +++ b/src/tests/ftest/util/daos_racer_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -10,7 +11,8 @@ from command_utils_base import BasicParameter, FormattedParameter from env_modules import load_mpi from exception_utils import CommandFailure, MPILoadError -from general_utils import get_log_file, pcmd +from general_utils import get_log_file +from run_utils import run_remote class DaosRacerCommand(ExecutableCommand): @@ -109,15 +111,14 @@ def run(self, raise_exception=None): str(self), self.host, "no" if self.clush_timeout.value is None else "a {}s".format(self.clush_timeout.value)) - return_codes = pcmd(self.host, self.with_exports, True, self.clush_timeout.value) - if 0 not in return_codes or len(return_codes) > 1: - # Kill the daos_racer process if the remote command timed out - if 255 in return_codes: - self.log.info( - "Stopping timed out daos_racer process on %s", self.host) - pcmd(self.host, "pkill daos_racer", True) + result = run_remote( + self.log, self.host, self.with_exports, timeout=self.clush_timeout.value) + if not result.passed: + if result.timeout: + self.log.info("Stopping timed out daos_racer process on %s", result.timeout_hosts) + run_remote(self.log, result.timeout_hosts, "pkill daos_racer", True) if raise_exception: - raise CommandFailure("Error running '{}'".format(self._command)) + raise CommandFailure(f"Error running '{self._command}'") self.log.info("Test passed!") diff --git a/src/tests/ftest/util/ddb_utils.py b/src/tests/ftest/util/ddb_utils.py index cf994a51378..12b787eaf71 100644 --- a/src/tests/ftest/util/ddb_utils.py +++ b/src/tests/ftest/util/ddb_utils.py @@ -1,12 +1,13 @@ """ (C) Copyright 2022 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ import os from command_utils_base import BasicParameter, CommandWithParameters, FormattedParameter -from general_utils import run_pcmd +from run_utils import run_remote class DdbCommandBase(CommandWithParameters): @@ -18,9 +19,9 @@ def __init__(self, server_host, path, verbose=True, timeout=None, sudo=True): Args: server_host (NodeSet): Server host to run the command. path (str): path to the ddb command. - verbose (bool, optional): Display command output when run_pcmd is called. + verbose (bool, optional): Display command output in run. Defaults to True. - timeout (int, optional): Command timeout (sec) used in run_pcmd. Defaults to + timeout (int, optional): Command timeout (sec) used in run. Defaults to None. sudo (bool, optional): Whether to run ddb with sudo. Defaults to True. """ @@ -40,7 +41,7 @@ def __init__(self, server_host, path, verbose=True, timeout=None, sudo=True): # VOS file path. self.vos_path = BasicParameter(None, position=1) - # Members needed for run_pcmd(). + # Members needed for run(). self.verbose = verbose self.timeout = timeout @@ -60,13 +61,11 @@ def run(self): """Run the command. Returns: - list: A list of dictionaries with each entry containing output, exit status, - and interrupted status common to each group of hosts. + CommandResult: groups of command results from the same hosts with the same return status """ - return run_pcmd( - hosts=self.host, command=str(self), verbose=self.verbose, - timeout=self.timeout) + return run_remote( + self.log, self.host, command=str(self), verbose=self.verbose, timeout=self.timeout) class DdbCommand(DdbCommandBase): diff --git a/src/tests/ftest/util/general_utils.py b/src/tests/ftest/util/general_utils.py index 84e55601ff2..198c690594c 100644 --- a/src/tests/ftest/util/general_utils.py +++ b/src/tests/ftest/util/general_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -788,10 +789,11 @@ def get_remote_file_size(host, file_name): return int(result.stdout_text) -def get_errors_count(hostlist, file_glob): +def get_errors_count(log, hostlist, file_glob): """Count the number of errors found in log files. Args: + log (logger): logger for the messages produced by this method hostlist (list): System list to looks for an error. file_glob (str): Glob pattern of the log file to parse. @@ -803,9 +805,9 @@ def get_errors_count(hostlist, file_glob): cmd = "cat {} | sed -n -E -e ".format(get_log_file(file_glob)) cmd += r"'/^.+[[:space:]]ERR[[:space:]].+[[:space:]]DER_[^(]+\([^)]+\).+$/" cmd += r"s/^.+[[:space:]]DER_[^(]+\((-[[:digit:]]+)\).+$/\1/p'" - results = run_pcmd(hostlist, cmd, False, None, None) + result = run_remote(log, hostlist, cmd, verbose=False) errors_count = {} - for error_str in sum([result["stdout"] for result in results], []): + for error_str in sum([data.stdout for data in result.output], []): error = int(error_str) if error not in errors_count: errors_count[error] = 0 diff --git a/src/tests/ftest/util/job_manager_utils.py b/src/tests/ftest/util/job_manager_utils.py index 5f687900fbc..e6aaff0851b 100644 --- a/src/tests/ftest/util/job_manager_utils.py +++ b/src/tests/ftest/util/job_manager_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2020-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -14,8 +15,7 @@ from command_utils_base import BasicParameter, EnvironmentVariables, FormattedParameter from env_modules import load_mpi from exception_utils import CommandFailure, MPILoadError -from general_utils import (get_job_manager_class, get_journalctl_command, journalctl_time, pcmd, - run_pcmd) +from general_utils import get_job_manager_class, get_journalctl_command, journalctl_time from run_utils import run_remote, stop_processes from write_host_file import write_host_file @@ -276,14 +276,9 @@ def _get_remote_process_state(self, message=None): self.log.debug( "%s processes still running remotely%s:", self.command, " {}".format(message) if message else "") - self.log.debug("Running (on %s): %s", self._hosts, command) - results = pcmd(self._hosts, command, True, 10, None) - - # The pcmd method will return a dictionary with a single key, e.g. - # {1: }, if there are no remote processes running on any of the - # hosts. If this value is not returned, indicate there are remote - # processes running by returning a "R" state. - return "R" if 1 not in results or len(results) > 1 else None + result = run_remote(self.log, self._hosts, command, timeout=10) + # Return "R" if processes were found running on any hosts + return "R" if result.passed_hosts else None def run(self, raise_exception=None): """Run the command. @@ -711,7 +706,6 @@ def run(self, raise_exception=None): # Start the daos_server.service self.service_enable() result = self.service_start() - # result = self.service_status() # Determine if the command has launched correctly using its # check_subprocess_status() method. @@ -815,25 +809,21 @@ def _run_unit_command(self, command): CommandFailure: if there is an issue running the command Returns: - dict: a dictionary of return codes keys and accompanying NodeSet - values indicating which hosts yielded the return code. + CommandResult: groups of command results from the same hosts with the same return status """ self._systemctl.unit_command.value = command self.timestamps[command] = journalctl_time() - result = pcmd(self._hosts, str(self), self.verbose, self.timeout) - if 255 in result: + cmd = str(self) + result = run_remote( + self.log, self._hosts, cmd, verbose=self.verbose, timeout=self.timeout) + if result.timeout: raise CommandFailure( "Timeout detected running '{}' with a {}s timeout on {}".format( - str(self), self.timeout, NodeSet.fromlist(result[255]))) - - if 0 not in result or len(result) > 1: - failed = [] - for item, value in list(result.items()): - if item != 0: - failed.extend(value) + cmd, self.timeout, result.timeout_hosts)) + if not result.passed: raise CommandFailure( - "Error occurred running '{}' on {}".format(str(self), NodeSet.fromlist(failed))) + "Error occurred running '{}' on {}".format(cmd, result.failed_hosts)) return result def _report_unit_command(self, command): @@ -846,8 +836,7 @@ def _report_unit_command(self, command): CommandFailure: if there is an issue running the command Returns: - dict: a dictionary of return codes keys and accompanying NodeSet - values indicating which hosts yielded the return code. + CommandResult: groups of command results from the same hosts with the same return status """ try: @@ -866,8 +855,7 @@ def service_enable(self): CommandFailure: if unable to enable Returns: - dict: a dictionary of return codes keys and accompanying NodeSet - values indicating which hosts yielded the return code. + CommandResult: groups of command results from the same hosts with the same return status """ return self._report_unit_command("enable") @@ -879,8 +867,7 @@ def service_disable(self): CommandFailure: if unable to disable Returns: - dict: a dictionary of return codes keys and accompanying NodeSet - values indicating which hosts yielded the return code. + CommandResult: groups of command results from the same hosts with the same return status """ return self._report_unit_command("disable") @@ -892,8 +879,7 @@ def service_start(self): CommandFailure: if unable to start Returns: - dict: a dictionary of return codes keys and accompanying NodeSet - values indicating which hosts yielded the return code. + CommandResult: groups of command results from the same hosts with the same return status """ return self._report_unit_command("start") @@ -905,8 +891,7 @@ def service_stop(self): CommandFailure: if unable to stop Returns: - dict: a dictionary of return codes keys and accompanying NodeSet - values indicating which hosts yielded the return code. + CommandResult: groups of command results from the same hosts with the same return status """ return self._report_unit_command("stop") @@ -918,8 +903,7 @@ def service_status(self): CommandFailure: if unable to get the status Returns: - dict: a dictionary of return codes keys and accompanying NodeSet - values indicating which hosts yielded the return code. + CommandResult: groups of command results from the same hosts with the same return status """ return self._report_unit_command("status") @@ -940,17 +924,17 @@ def service_running(self): states = {} valid_states = ["active", "activating"] self._systemctl.unit_command.value = "is-active" - results = run_pcmd(self._hosts, str(self), False, self.timeout, None) - for result in results: - if result["interrupted"]: - states["timeout"] = result["hosts"] - status = False - else: - output = result["stdout"][-1] - if output not in states: - states[output] = NodeSet() - states[output].add(result["hosts"]) - status &= output in valid_states + result = run_remote(self.log, self._hosts, str(self), verbose=False, timeout=self.timeout) + if result.timeout: + states["timeout"] = result.timeout_hosts + for data in result.output: + if data.timeout: + continue + output = data.stdout[-1] + if output not in states: + states[output] = NodeSet() + states[output].add(data.hosts) + status &= output in valid_states data = ["=".join([key, str(states[key])]) for key in sorted(states)] self.log.info( " Detected %s states: %s", @@ -987,49 +971,41 @@ def get_log_data(self, hosts, command, timeout=60): self.log.info("Gathering log data on %s: %s", str(hosts), command) # Gather the log information per host - results = run_pcmd(hosts, command, False, timeout, None) + result = run_remote(self.log, hosts, command, verbose=False, timeout=timeout) # Determine if the command completed successfully without a timeout - status = True - for result in results: - if result["interrupted"]: - self.log.info(" Errors detected running \"%s\":", command) - self.log.info( - " %s: timeout detected after %s seconds", - str(result["hosts"]), timeout) - status = False - elif result["exit_status"] != 0: - self.log.info(" Errors detected running \"%s\":", command) - status = False - if not status: - break + if not result.passed: + self.log.info(' Errors detected running "%s":', command) + if result.timeout: + self.log.info( + " %s: timeout detected after %s seconds", str(result.timeout_hosts), timeout) # Display/return the command output log_data = [] - for result in results: - if result["exit_status"] == 0 and not result["interrupted"]: + for data in result.output: + if data.returncode == 0: # Add the successful output from each node to the dictionary log_data.append( - {"hosts": result["hosts"], "data": result["stdout"]}) + {"hosts": data.hosts, "data": data.stdout}) else: # Display all of the results in the case of an error - if len(result["stdout"]) > 1: + if len(data.stdout) > 1: self.log.info( " %s: rc=%s, output:", - str(result["hosts"]), result["exit_status"]) - for line in result["stdout"]: + str(data.hosts), data.returncode) + for line in data.stdout: self.log.info(" %s", line) else: self.log.info( " %s: rc=%s, output: %s", - str(result["hosts"]), result["exit_status"], - result["stdout"][0]) + str(data.hosts), data.returncode, + data.stdout[0]) # Report any errors through an exception - if not status: + if not result.passed: raise CommandFailure( - "Error(s) detected gathering {} log data on {}".format( - self._systemctl.service.value, NodeSet.fromlist(hosts))) + f"Error(s) detected gathering {self._systemctl.service.value} " + f"log data on {result.failed_hosts}") # Return the successful command output per set of hosts return log_data diff --git a/src/tests/ftest/util/macsio_util.py b/src/tests/ftest/util/macsio_util.py index cf64dbcb030..1d41e05b358 100644 --- a/src/tests/ftest/util/macsio_util.py +++ b/src/tests/ftest/util/macsio_util.py @@ -1,11 +1,13 @@ """ (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ from command_utils import ExecutableCommand from command_utils_base import FormattedParameter -from general_utils import get_log_file, pcmd +from general_utils import get_log_file +from run_utils import run_remote class MacsioCommand(ExecutableCommand): @@ -467,7 +469,8 @@ def check_results(self, result, hosts): macsio_files = (self.log_file_name.value, self.timings_file_name.value) for macsio_file in macsio_files: if macsio_file: - self.log.info("Output from %s", macsio_file) - pcmd(hosts, "cat {}".format(macsio_file), timeout=30) + result = run_remote(self.log, hosts, f"cat {macsio_file}", timeout=30) + if not result.passed: + status = False return status diff --git a/src/tests/ftest/util/performance_test_base.py b/src/tests/ftest/util/performance_test_base.py index 7976a825a5d..9f1592c7d19 100644 --- a/src/tests/ftest/util/performance_test_base.py +++ b/src/tests/ftest/util/performance_test_base.py @@ -1,5 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -111,10 +112,10 @@ def _log_daos_metrics(self): os.makedirs(metrics_dir, exist_ok=True) per_engine_results = self.server_managers[0].get_daos_metrics() for engine_idx, engine_results in enumerate(per_engine_results): - for host_results in engine_results: - log_name = "{}_engine{}.csv".format(host_results["hosts"], engine_idx) + for hosts, stdout in engine_results.all_stdout.items(): + log_name = "{}_engine{}.csv".format(hosts, engine_idx) log_path = os.path.join(metrics_dir, log_name) - self.log_performance(host_results["stdout"], False, log_path) + self.log_performance(stdout, False, log_path) @property def unique_id(self): diff --git a/src/tests/ftest/util/run_utils.py b/src/tests/ftest/util/run_utils.py index 8e96e2228f0..a6129bd1d41 100644 --- a/src/tests/ftest/util/run_utils.py +++ b/src/tests/ftest/util/run_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2022-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -267,6 +268,15 @@ def failed_hosts(self): """ return NodeSet.fromlist(data.hosts for data in self.output if data.returncode != 0) + @property + def timeout_hosts(self): + """Get all timeout hosts. + + Returns: + NodeSet: all nodes where the command timed out + """ + return NodeSet.fromlist(data.hosts for data in self.output if data.timeout) + @property def all_stdout(self): """Get all of the stdout from the issued command from each host. diff --git a/src/tests/ftest/util/server_utils.py b/src/tests/ftest/util/server_utils.py index ec79f029c6e..69ed10f184d 100644 --- a/src/tests/ftest/util/server_utils.py +++ b/src/tests/ftest/util/server_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2018-2024 Intel Corporation. + (C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -16,10 +17,9 @@ from command_utils_base import BasicParameter, CommonConfig from dmg_utils import get_dmg_command from exception_utils import CommandFailure -from general_utils import (get_default_config_file, get_display_size, get_log_file, list_to_str, - pcmd, run_pcmd) +from general_utils import get_default_config_file, get_display_size, get_log_file, list_to_str from host_utils import get_local_host -from run_utils import run_remote, stop_processes +from run_utils import command_as_user, run_remote, stop_processes from server_utils_base import DaosServerCommand, DaosServerInformation, ServerFailed from server_utils_params import DaosServerTransportCredentials, DaosServerYamlParameters from user_utils import get_chown_command @@ -615,7 +615,7 @@ def set_scm_mount_ownership(self, user=None, verbose=False): ) if cmd_list: - pcmd(self._hosts, "; ".join(cmd_list), verbose) + run_remote(self.log, self._hosts, "; ".join(cmd_list), verbose=verbose) def restart(self, hosts, wait=False): """Restart the specified servers after a stop. @@ -1148,10 +1148,10 @@ def get_daos_metrics(self, verbose=False, timeout=60): timeout (int, optional): pass timeout to each execution ofrun_pcmd. Defaults to 60. Returns: - list: list of pcmd results for each host. See general_utils.run_pcmd for details. + list: list of CommandResult results for each host. See run_utils.run_remote for details. [ - general_utils.run_pcmd(), # engine 0 - general_utils.run_pcmd() # engine 1 + run_utils.run_remote(), # engine 0 + run_utils.run_remote() # engine 1 ] """ @@ -1159,8 +1159,7 @@ def get_daos_metrics(self, verbose=False, timeout=60): engines = [] daos_metrics_exe = os.path.join(self.manager.job.command_path, "daos_metrics") for engine in range(engines_per_host): - results = run_pcmd( - hosts=self._hosts, verbose=verbose, timeout=timeout, - command="sudo {} -S {} --csv".format(daos_metrics_exe, engine)) - engines.append(results) + command = command_as_user(f"{daos_metrics_exe} -S {engine} --csv", "root") + result = run_remote(self.log, self._hosts, command, verbose=verbose, timeout=timeout) + engines.append(result) return engines diff --git a/src/tests/ftest/util/soak_utils.py b/src/tests/ftest/util/soak_utils.py index 7b5cfdb2608..cc7a91327b7 100644 --- a/src/tests/ftest/util/soak_utils.py +++ b/src/tests/ftest/util/soak_utils.py @@ -1,5 +1,6 @@ """ (C) Copyright 2019-2024 Intel Corporation. +(C) Copyright 2025 Hewlett Packard Enterprise Development LP SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -28,8 +29,8 @@ from exception_utils import CommandFailure from fio_utils import FioCommand from general_utils import (DaosTestError, check_ping, check_ssh, get_journalctl, get_log_file, - get_random_bytes, get_random_string, list_to_str, pcmd, run_command, - run_pcmd, wait_for_result) + get_random_bytes, get_random_string, list_to_str, run_command, + wait_for_result) from ior_utils import IorCommand from job_manager_utils import Mpirun from macsio_util import MacsioCommand @@ -301,11 +302,9 @@ def run_monitor_check(self): self (obj): soak obj """ - monitor_cmds = self.params.get("monitor", "/run/*") - hosts = self.hostlist_servers - if monitor_cmds: - for cmd in monitor_cmds: - pcmd(hosts, cmd, timeout=30) + monitor_cmds = self.params.get("monitor", "/run/*") or [] + for cmd in monitor_cmds: + run_remote(self.log, self.hostlist_servers, cmd, timeout=30) def run_metrics_check(self, logging=True, prefix=None): @@ -326,17 +325,13 @@ def run_metrics_check(self, logging=True, prefix=None): name = prefix + f"_metrics_{engine}.csv" destination = self.outputsoak_dir daos_metrics = f"{self.sudo_cmd} daos_metrics -S {engine} --csv" - self.log.info("Running %s", daos_metrics) - results = run_pcmd(hosts=self.hostlist_servers, - command=daos_metrics, - verbose=(not logging), - timeout=60) + result = run_remote( + self.log, self.hostlist_servers, daos_metrics, verbose=(not logging), timeout=60) if logging: - for result in results: - hosts = result["hosts"] - log_name = name + "-" + str(hosts) + for data in result.output: + log_name = name + "-" + str(data.hosts) self.log.info("Logging %s output to %s", daos_metrics, log_name) - write_logfile(result["stdout"], log_name, destination) + write_logfile(data.stdout, log_name, destination) def get_harassers(harasser):