From 341a94643b85609ab302ccd44f74b8b1072a0258 Mon Sep 17 00:00:00 2001 From: Liapkovich Date: Fri, 29 Nov 2024 17:58:14 +0100 Subject: [PATCH] test(manager): add test for backup snapshot preparation The test prepares backup snapshot for its future use in restore cases (restore benchmark or nemesis). The test populates the cluster with data, runs backup and logs created snapshot details into console. Corresponding jenkinsfile and testcase yamls are added. Together with test, the new helper method was introduced which extracts compaction strategy string from c-s cmd. To handle the backup size, the new environmental variable was added (SCT_MGMT_PREPARE_SNAPSHOT_SIZE). It should be specified in Jenkins job. --- docs/configuration_options.md | 1 + .../prepare-backup-snapshot.jenkinsfile | 18 +++++ mgmt_cli_test.py | 70 +++++++++++++++++++ sdcm/sct_config.py | 4 ++ .../prepare_snapshot/100GB_dataset.yaml | 12 ++++ .../prepare_snapshot/10GB_dataset.yaml | 12 ++++ .../manager/prepare_snapshot/2TB_dataset.yaml | 12 ++++ .../manager/prepare_snapshot/5GB_dataset.yaml | 12 ++++ 8 files changed, 141 insertions(+) create mode 100644 jenkins-pipelines/manager/helpers/prepare-backup-snapshot.jenkinsfile create mode 100644 test-cases/manager/prepare_snapshot/100GB_dataset.yaml create mode 100644 test-cases/manager/prepare_snapshot/10GB_dataset.yaml create mode 100644 test-cases/manager/prepare_snapshot/2TB_dataset.yaml create mode 100644 test-cases/manager/prepare_snapshot/5GB_dataset.yaml diff --git a/docs/configuration_options.md b/docs/configuration_options.md index cacf1c1160..7ec677bec5 100644 --- a/docs/configuration_options.md +++ b/docs/configuration_options.md @@ -266,6 +266,7 @@ | **mgmt_reuse_backup_snapshot_name** | Name of backup snapshot to use in Manager restore benchmark test, for example, 500gb_2t_ics. The name provides the info about dataset size (500gb), tables number (2) and compaction (ICS) | N/A | SCT_MGMT_REUSE_BACKUP_SNAPSHOT_NAME | **mgmt_skip_post_restore_stress_read** | Skip post-restore c-s verification read in the Manager restore benchmark tests | N/A | SCT_MGMT_SKIP_POST_RESTORE_STRESS_READ | **mgmt_nodetool_refresh_flags** | Nodetool refresh extra options like --load-and-stream or --primary-replica-only | N/A | SCT_MGMT_NODETOOL_REFRESH_FLAGS +| **mgmt_nodetool_refresh_flags** | Size of backup snapshot in Gb to be prepared to be prepared for backup | N/A | SCT_MGMT_PREPARE_SNAPSHOT_SIZE | **stress_cmd_w** | cassandra-stress commands.
You can specify everything but the -node parameter, which is going to
be provided by the test suite infrastructure.
multiple commands can passed as a list | N/A | SCT_STRESS_CMD_W | **stress_cmd_r** | cassandra-stress commands.
You can specify everything but the -node parameter, which is going to
be provided by the test suite infrastructure.
multiple commands can passed as a list | N/A | SCT_STRESS_CMD_R | **stress_cmd_m** | cassandra-stress commands.
You can specify everything but the -node parameter, which is going to
be provided by the test suite infrastructure.
multiple commands can passed as a list | N/A | SCT_STRESS_CMD_M diff --git a/jenkins-pipelines/manager/helpers/prepare-backup-snapshot.jenkinsfile b/jenkins-pipelines/manager/helpers/prepare-backup-snapshot.jenkinsfile new file mode 100644 index 0000000000..eb4aebf48d --- /dev/null +++ b/jenkins-pipelines/manager/helpers/prepare-backup-snapshot.jenkinsfile @@ -0,0 +1,18 @@ +#!groovy + +// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43 +def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm) + +managerPipeline( + backend: 'aws', + region: 'us-east-1', + backup_bucket_location: 'manager-backup-tests-permanent-snapshots-us-east-1', + test_name: 'mgmt_cli_test.ManagerHelperTests.test_prepare_backup_snapshot', + test_config: 'test-cases/manager/prepare_snapshot/5GB_dataset.yaml', + + post_behavior_db_nodes: 'destroy', + post_behavior_loader_nodes: 'destroy', + post_behavior_monitor_nodes: 'destroy', + + extra_environment_variables: 'SCT_BACKUP_BUCKET_REGION=us-east-1', +) diff --git a/mgmt_cli_test.py b/mgmt_cli_test.py index aa3a58030e..6bcccf9b61 100644 --- a/mgmt_cli_test.py +++ b/mgmt_cli_test.py @@ -278,6 +278,26 @@ def prepare_run_and_verify_stress_in_threads(self, cmd_template: str, keyspace_n for _thread in stress_queue: assert self.verify_stress_thread(cs_thread_pool=_thread), "Stress thread verification failed" + @staticmethod + def extract_compaction_strategy_from_cs_cmd(cs_cmd: str, lower: bool = True, remove_postfix: bool = True) -> str: + """Extracts the compaction strategy from the cassandra-stress command. + + :param cs_cmd: cassandra-stress command + :param lower: if True, the resulting string will be lowercased + :param remove_postfix: if True, the resulting string will have the "CompactionStrategy" postfix removed + """ + match = re.search(r"compaction\(strategy=([^)]+)\)", cs_cmd) + if match: + strategy = match.group(1) + if remove_postfix: + strategy = re.sub(r"CompactionStrategy$", "", strategy) + if lower: + strategy = strategy.lower() + else: + raise ValueError("Compaction strategy not found in cs_cmd.") + + return strategy + class ClusterOperations(ClusterTester): CLUSTER_NAME = "mgr_cluster1" @@ -1258,6 +1278,56 @@ def test_suspend_and_resume(self): self._test_suspend_with_on_resume_start_tasks_flag_template(wait_for_duration=False) +class ManagerHelperTests(ManagerTestFunctionsMixIn): + + def test_prepare_backup_snapshot(self): + """Test prepares backup snapshot for its future use in nemesis or restore benchmarks + + Steps: + 1. Populate the cluster with data. + - C-S write cmd is based on `confirmation_stress_template` template in manager_persistent_snapshots.yaml + - Backup size should be specified in Jenkins job passing `mgmt_prepare_snapshot_size` parameter + 2. Run backup and wait for it to finish. + 3. Log snapshot details into console. + """ + self.log.info("Populate the cluster with data") + backup_size = self.params.get("mgmt_prepare_snapshot_size") # in Gb + assert backup_size and backup_size >= 1, "Backup size must be at least 1Gb" + + backend = self.params.get("cluster_backend") + cs_read_cmd_template = get_persistent_snapshots()[backend]["confirmation_stress_template"] + cs_write_cmd_template = cs_read_cmd_template.replace(" read ", " write ") + + compaction = self.extract_compaction_strategy_from_cs_cmd(cs_write_cmd_template) + scylla_version = re.sub(r"[-.]", "_", self.params.get("scylla_version")) + keyspace_name = f"{backup_size}gb_{compaction}_{scylla_version}" + + self.prepare_run_and_verify_stress_in_threads( + cmd_template=cs_write_cmd_template, + keyspace_name=keyspace_name, + num_of_rows=backup_size * 1024 * 1024, # Considering 1 row = 1Kb + stop_on_failure=True, + ) + + self.log.info("Initialize Scylla Manager") + manager_tool = mgmt.get_scylla_manager_tool(manager_node=self.monitors.nodes[0]) + mgr_cluster = self.ensure_and_get_cluster(manager_tool) + + self.log.info("Run backup and wait for it to finish") + backup_task = mgr_cluster.create_backup_task(location_list=self.locations, rate_limit_list=["0"]) + backup_task_status = backup_task.wait_and_get_final_status(timeout=200000) + assert backup_task_status == TaskStatus.DONE, \ + f"Backup task ended in {backup_task_status} instead of {TaskStatus.DONE}" + + self.log.info("Log snapshot details") + self.log.info( + f"Snapshot tag: {backup_task.get_snapshot_tag()}\n" + f"Keyspace name: {keyspace_name}\n" + f"Bucket: {self.locations}\n" + f"Cluster id: {mgr_cluster.id}\n" + ) + + class ManagerSanityTests( ManagerBackupTests, ManagerRestoreTests, diff --git a/sdcm/sct_config.py b/sdcm/sct_config.py index be088080a8..fe983624fe 100644 --- a/sdcm/sct_config.py +++ b/sdcm/sct_config.py @@ -1158,6 +1158,10 @@ class SCTConfiguration(dict): env="SCT_MGMT_NODETOOL_REFRESH_FLAGS", type=str, help="Nodetool refresh extra options like --load-and-stream or --primary-replica-only"), + dict(name="mgmt_prepare_snapshot_size", + env="SCT_MGMT_PREPARE_SNAPSHOT_SIZE", type=int, + help="Size of backup snapshot in Gb to be prepared to be prepared for backup"), + # PerformanceRegressionTest dict(name="stress_cmd_w", env="SCT_STRESS_CMD_W", diff --git a/test-cases/manager/prepare_snapshot/100GB_dataset.yaml b/test-cases/manager/prepare_snapshot/100GB_dataset.yaml new file mode 100644 index 0000000000..dd07d50247 --- /dev/null +++ b/test-cases/manager/prepare_snapshot/100GB_dataset.yaml @@ -0,0 +1,12 @@ +test_duration: 240 + +instance_type_db: 'i4i.large' +instance_type_loader: 'c6i.large' + +n_db_nodes: 3 +n_loaders: 2 +n_monitor_nodes: 1 + +mgmt_prepare_snapshot_size: 100 # GB + +user_prefix: manager-helper diff --git a/test-cases/manager/prepare_snapshot/10GB_dataset.yaml b/test-cases/manager/prepare_snapshot/10GB_dataset.yaml new file mode 100644 index 0000000000..3bfd1ea837 --- /dev/null +++ b/test-cases/manager/prepare_snapshot/10GB_dataset.yaml @@ -0,0 +1,12 @@ +test_duration: 60 + +instance_type_db: 'i4i.large' +instance_type_loader: 'c6i.large' + +n_db_nodes: 3 +n_loaders: 1 +n_monitor_nodes: 1 + +mgmt_prepare_snapshot_size: 10 # GB + +user_prefix: manager-helper diff --git a/test-cases/manager/prepare_snapshot/2TB_dataset.yaml b/test-cases/manager/prepare_snapshot/2TB_dataset.yaml new file mode 100644 index 0000000000..e4ff3274ee --- /dev/null +++ b/test-cases/manager/prepare_snapshot/2TB_dataset.yaml @@ -0,0 +1,12 @@ +test_duration: 2880 + +instance_type_db: 'i3en.3xlarge' +instance_type_loader: 'c6i.2xlarge' + +n_db_nodes: 3 +n_loaders: 4 +n_monitor_nodes: 1 + +mgmt_prepare_snapshot_size: 2048 # GB + +user_prefix: manager-helper diff --git a/test-cases/manager/prepare_snapshot/5GB_dataset.yaml b/test-cases/manager/prepare_snapshot/5GB_dataset.yaml new file mode 100644 index 0000000000..f4bfe73047 --- /dev/null +++ b/test-cases/manager/prepare_snapshot/5GB_dataset.yaml @@ -0,0 +1,12 @@ +test_duration: 60 + +instance_type_db: 'i4i.large' +instance_type_loader: 'c6i.large' + +n_db_nodes: 3 +n_loaders: 1 +n_monitor_nodes: 1 + +mgmt_prepare_snapshot_size: 5 # GB + +user_prefix: manager-helper