Skip to content

Commit

Permalink
Create new test file for the nodes maintenance test scenarios when us…
Browse files Browse the repository at this point in the history
…ing a Provider mode (#9528)

* Create new test file for the nodes maintenance test scenarios when using a Provider mode

Signed-off-by: Itzhak Kave <[email protected]>
  • Loading branch information
yitzhak12 authored May 29, 2024
1 parent 29d04fb commit 087df7d
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 4 deletions.
14 changes: 12 additions & 2 deletions ocs_ci/ocs/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,10 @@
from ocs_ci.ocs.resources.pvc import PVC
from ocs_ci.utility.connection import Connection
from ocs_ci.utility.lvmo_utils import get_lvm_cluster_name
from ocs_ci.ocs.resources.pod import get_mds_pods, wait_for_pods_to_be_running
from ocs_ci.ocs.resources.pod import (
get_mds_pods,
wait_for_pods_to_be_in_statuses,
)
from ocs_ci.utility.decorators import switch_to_orig_index_at_last

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -3235,7 +3238,14 @@ def client_cluster_health_check():
)

logger.info("Wait for the pods to be running")
res = wait_for_pods_to_be_running(timeout=300, sleep=20)
expected_statuses = [constants.STATUS_RUNNING, constants.STATUS_COMPLETED]
exclude_pod_name_prefixes = ["rook-ceph-tools"]
res = wait_for_pods_to_be_in_statuses(
expected_statuses=expected_statuses,
exclude_pod_name_prefixes=exclude_pod_name_prefixes,
timeout=300,
sleep=20,
)
if not res:
raise ResourceWrongStatusException("Not all the pods in running state")

Expand Down
5 changes: 5 additions & 0 deletions ocs_ci/ocs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@

DEFAULT_CLUSTERNAME = DEFAULT_STORAGE_CLUSTER = "ocs-storagecluster"
DEFAULT_CLUSTERNAME_EXTERNAL_MODE = "ocs-external-storagecluster"
DEFAULT_CLUSTERNAME_CLIENT = "storage-client"
DEFAULT_BLOCKPOOL = f"{DEFAULT_CLUSTERNAME}-cephblockpool"
METADATA_POOL = f"{DEFAULT_CLUSTERNAME}-cephfilesystem-metadata"
DATA_POOL = f"{DEFAULT_CLUSTERNAME}-cephfilesystem-data0"
Expand Down Expand Up @@ -363,6 +364,10 @@
f"{DEFAULT_CLUSTERNAME_EXTERNAL_MODE}-ceph-rbd-thick"
)

# Default StorageClass for Provider-mode
DEFAULT_STORAGECLASS_CLIENT_CEPHFS = f"{DEFAULT_CLUSTERNAME_CLIENT}-cephfs"
DEFAULT_STORAGECLASS_CLIENT_RBD = f"{DEFAULT_CLUSTERNAME_CLIENT}-ceph-rbd"

# Default VolumeSnapshotClass
DEFAULT_VOLUMESNAPSHOTCLASS_CEPHFS = f"{DEFAULT_CLUSTERNAME}-cephfsplugin-snapclass"
DEFAULT_VOLUMESNAPSHOTCLASS_RBD = f"{DEFAULT_CLUSTERNAME}-rbdplugin-snapclass"
Expand Down
8 changes: 6 additions & 2 deletions ocs_ci/ocs/scale_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,12 @@ def create_multi_pvc_pod(
raise UnexpectedBehaviour("Kube_job batch count should be lesser than 1200")

logger.info(f"Start creating {pvc_count} PVC of 2 types RBD-RWO & FS-RWX")
cephfs_sc_obj = constants.DEFAULT_STORAGECLASS_CEPHFS
rbd_sc_obj = constants.DEFAULT_STORAGECLASS_RBD
if is_hci_cluster():
cephfs_sc_obj = constants.DEFAULT_STORAGECLASS_CLIENT_CEPHFS
rbd_sc_obj = constants.DEFAULT_STORAGECLASS_CLIENT_RBD
else:
cephfs_sc_obj = constants.DEFAULT_STORAGECLASS_CEPHFS
rbd_sc_obj = constants.DEFAULT_STORAGECLASS_RBD

# Get pvc_dict_list, append all the pvc.yaml dict to pvc_dict_list
rbd_pvc_dict_list, cephfs_pvc_dict_list = ([], [])
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import logging
import pytest
import random
import time


from ocs_ci.framework.pytest_customization.marks import brown_squad
from ocs_ci.framework.testlib import (
tier4a,
tier4b,
ignore_leftovers,
ManageTest,
provider_client_platform_required,
)
from ocs_ci.ocs import constants
from ocs_ci.ocs.constants import HCI_PROVIDER
from ocs_ci.ocs.node import (
get_node_objs,
recover_node_to_ready_state,
wait_for_nodes_status,
get_nodes,
drain_nodes,
schedule_nodes,
)
from ocs_ci.helpers.sanity_helpers import SanityProviderMode
from ocs_ci.ocs.cluster import (
ceph_health_check,
)
from ocs_ci.framework import config
from ocs_ci.utility.utils import switch_to_correct_cluster_at_setup

logger = logging.getLogger(__name__)


def check_drain_and_unschedule_node(ocp_node):
"""
Drain and unschedule a node
Args:
ocp_node (OCS): The node object
Raises:
ResourceWrongStatusException: In case the node didn't reach the desired state
"""
drain_nodes([ocp_node.name])
# Wait for the node to be unschedule
wait_for_nodes_status(
node_names=[ocp_node.name],
status=constants.NODE_READY_SCHEDULING_DISABLED,
timeout=120,
sleep=5,
)

wait_time_before_reschedule = 30
logger.info(
f"Wait {wait_time_before_reschedule} seconds before rescheduling the node"
)
time.sleep(wait_time_before_reschedule)

schedule_nodes([ocp_node.name])
wait_for_nodes_status(
node_names=[ocp_node.name],
status=constants.NODE_READY,
timeout=120,
sleep=5,
)
logger.info("Checking that the Ceph health is OK")
ceph_health_check()


@brown_squad
@ignore_leftovers
@provider_client_platform_required
class TestNodesMaintenanceProviderMode(ManageTest):
"""
Test nodes maintenance scenarios when using a Provider mode
"""

@pytest.fixture(autouse=True)
def setup(self, request, create_scale_pods_and_pvcs_using_kube_job_on_hci_clients):
"""
1. Save the original index
2. Switch to the correct cluster index
3. Initialize the Sanity instance
"""
self.orig_index = config.cur_index
switch_to_correct_cluster_at_setup(request)
self.sanity_helpers = SanityProviderMode(
create_scale_pods_and_pvcs_using_kube_job_on_hci_clients
)

@pytest.fixture(autouse=True)
def teardown(self, request):
"""
1. Make sure all nodes are up again
2. Switch to the original cluster index
3. Check the Ceph health
"""

def finalizer():
ocp_nodes = get_node_objs()
for n in ocp_nodes:
recover_node_to_ready_state(n)

logger.info("Switch to the original cluster index")
config.switch_ctx(self.orig_index)
ceph_health_check()

request.addfinalizer(finalizer)

@tier4a
@pytest.mark.parametrize(
argnames=["cluster_type", "node_type"],
argvalues=[
pytest.param(
*[HCI_PROVIDER, constants.WORKER_MACHINE],
marks=pytest.mark.polarion_id("OCS-5461"),
),
pytest.param(
*[HCI_PROVIDER, constants.MASTER_MACHINE],
marks=pytest.mark.polarion_id("OCS-5462"),
),
],
)
def test_node_maintenance(self, cluster_type, node_type):
"""
Test node maintenance
"""
ocp_nodes = get_nodes(node_type=node_type)
ocp_node = random.choice(ocp_nodes)
check_drain_and_unschedule_node(ocp_node)
logger.info(
"Check basic cluster functionality by creating resources, run IO, "
"and deleting the resources"
)
self.sanity_helpers.create_resources_on_clients()
self.sanity_helpers.delete_resources()
logger.info("Check the cluster health")
self.sanity_helpers.health_check_provider_mode()

@tier4b
@pytest.mark.polarion_id("OCS-5466")
@pytest.mark.parametrize(
argnames=["cluster_type", "node_type"],
argvalues=[
pytest.param(*[HCI_PROVIDER, constants.WORKER_MACHINE]),
],
)
def test_rolling_nodes_maintenance(self, cluster_type, node_type):
"""
Test maintenance nodes one after the other and check health status in between
"""
ocp_nodes = get_nodes(node_type=node_type)
for ocp_node in ocp_nodes:
check_drain_and_unschedule_node(ocp_node)

logger.info(
"Check basic cluster functionality by creating resources, run IO, "
"and deleting the resources"
)
self.sanity_helpers.create_resources_on_clients()
self.sanity_helpers.delete_resources()
logger.info("Check the cluster health")
self.sanity_helpers.health_check_provider_mode()

0 comments on commit 087df7d

Please sign in to comment.