diff --git a/docs/configuration_options.md b/docs/configuration_options.md index cacf1c1160..7ec677bec5 100644 --- a/docs/configuration_options.md +++ b/docs/configuration_options.md @@ -266,6 +266,7 @@ | **mgmt_reuse_backup_snapshot_name** | Name of backup snapshot to use in Manager restore benchmark test, for example, 500gb_2t_ics. The name provides the info about dataset size (500gb), tables number (2) and compaction (ICS) | N/A | SCT_MGMT_REUSE_BACKUP_SNAPSHOT_NAME | **mgmt_skip_post_restore_stress_read** | Skip post-restore c-s verification read in the Manager restore benchmark tests | N/A | SCT_MGMT_SKIP_POST_RESTORE_STRESS_READ | **mgmt_nodetool_refresh_flags** | Nodetool refresh extra options like --load-and-stream or --primary-replica-only | N/A | SCT_MGMT_NODETOOL_REFRESH_FLAGS +| **mgmt_nodetool_refresh_flags** | Size of backup snapshot in Gb to be prepared to be prepared for backup | N/A | SCT_MGMT_PREPARE_SNAPSHOT_SIZE | **stress_cmd_w** | cassandra-stress commands.
You can specify everything but the -node parameter, which is going to
be provided by the test suite infrastructure.
multiple commands can passed as a list | N/A | SCT_STRESS_CMD_W | **stress_cmd_r** | cassandra-stress commands.
You can specify everything but the -node parameter, which is going to
be provided by the test suite infrastructure.
multiple commands can passed as a list | N/A | SCT_STRESS_CMD_R | **stress_cmd_m** | cassandra-stress commands.
You can specify everything but the -node parameter, which is going to
be provided by the test suite infrastructure.
multiple commands can passed as a list | N/A | SCT_STRESS_CMD_M diff --git a/jenkins-pipelines/manager/helpers/prepare-backup-snapshot.jenkinsfile b/jenkins-pipelines/manager/helpers/prepare-backup-snapshot.jenkinsfile new file mode 100644 index 0000000000..eb4aebf48d --- /dev/null +++ b/jenkins-pipelines/manager/helpers/prepare-backup-snapshot.jenkinsfile @@ -0,0 +1,18 @@ +#!groovy + +// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43 +def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm) + +managerPipeline( + backend: 'aws', + region: 'us-east-1', + backup_bucket_location: 'manager-backup-tests-permanent-snapshots-us-east-1', + test_name: 'mgmt_cli_test.ManagerHelperTests.test_prepare_backup_snapshot', + test_config: 'test-cases/manager/prepare_snapshot/5GB_dataset.yaml', + + post_behavior_db_nodes: 'destroy', + post_behavior_loader_nodes: 'destroy', + post_behavior_monitor_nodes: 'destroy', + + extra_environment_variables: 'SCT_BACKUP_BUCKET_REGION=us-east-1', +) diff --git a/mgmt_cli_test.py b/mgmt_cli_test.py index aa3a58030e..6bcccf9b61 100644 --- a/mgmt_cli_test.py +++ b/mgmt_cli_test.py @@ -278,6 +278,26 @@ def prepare_run_and_verify_stress_in_threads(self, cmd_template: str, keyspace_n for _thread in stress_queue: assert self.verify_stress_thread(cs_thread_pool=_thread), "Stress thread verification failed" + @staticmethod + def extract_compaction_strategy_from_cs_cmd(cs_cmd: str, lower: bool = True, remove_postfix: bool = True) -> str: + """Extracts the compaction strategy from the cassandra-stress command. + + :param cs_cmd: cassandra-stress command + :param lower: if True, the resulting string will be lowercased + :param remove_postfix: if True, the resulting string will have the "CompactionStrategy" postfix removed + """ + match = re.search(r"compaction\(strategy=([^)]+)\)", cs_cmd) + if match: + strategy = match.group(1) + if remove_postfix: + strategy = re.sub(r"CompactionStrategy$", "", strategy) + if lower: + strategy = strategy.lower() + else: + raise ValueError("Compaction strategy not found in cs_cmd.") + + return strategy + class ClusterOperations(ClusterTester): CLUSTER_NAME = "mgr_cluster1" @@ -1258,6 +1278,56 @@ def test_suspend_and_resume(self): self._test_suspend_with_on_resume_start_tasks_flag_template(wait_for_duration=False) +class ManagerHelperTests(ManagerTestFunctionsMixIn): + + def test_prepare_backup_snapshot(self): + """Test prepares backup snapshot for its future use in nemesis or restore benchmarks + + Steps: + 1. Populate the cluster with data. + - C-S write cmd is based on `confirmation_stress_template` template in manager_persistent_snapshots.yaml + - Backup size should be specified in Jenkins job passing `mgmt_prepare_snapshot_size` parameter + 2. Run backup and wait for it to finish. + 3. Log snapshot details into console. + """ + self.log.info("Populate the cluster with data") + backup_size = self.params.get("mgmt_prepare_snapshot_size") # in Gb + assert backup_size and backup_size >= 1, "Backup size must be at least 1Gb" + + backend = self.params.get("cluster_backend") + cs_read_cmd_template = get_persistent_snapshots()[backend]["confirmation_stress_template"] + cs_write_cmd_template = cs_read_cmd_template.replace(" read ", " write ") + + compaction = self.extract_compaction_strategy_from_cs_cmd(cs_write_cmd_template) + scylla_version = re.sub(r"[-.]", "_", self.params.get("scylla_version")) + keyspace_name = f"{backup_size}gb_{compaction}_{scylla_version}" + + self.prepare_run_and_verify_stress_in_threads( + cmd_template=cs_write_cmd_template, + keyspace_name=keyspace_name, + num_of_rows=backup_size * 1024 * 1024, # Considering 1 row = 1Kb + stop_on_failure=True, + ) + + self.log.info("Initialize Scylla Manager") + manager_tool = mgmt.get_scylla_manager_tool(manager_node=self.monitors.nodes[0]) + mgr_cluster = self.ensure_and_get_cluster(manager_tool) + + self.log.info("Run backup and wait for it to finish") + backup_task = mgr_cluster.create_backup_task(location_list=self.locations, rate_limit_list=["0"]) + backup_task_status = backup_task.wait_and_get_final_status(timeout=200000) + assert backup_task_status == TaskStatus.DONE, \ + f"Backup task ended in {backup_task_status} instead of {TaskStatus.DONE}" + + self.log.info("Log snapshot details") + self.log.info( + f"Snapshot tag: {backup_task.get_snapshot_tag()}\n" + f"Keyspace name: {keyspace_name}\n" + f"Bucket: {self.locations}\n" + f"Cluster id: {mgr_cluster.id}\n" + ) + + class ManagerSanityTests( ManagerBackupTests, ManagerRestoreTests, diff --git a/sdcm/sct_config.py b/sdcm/sct_config.py index be088080a8..fe983624fe 100644 --- a/sdcm/sct_config.py +++ b/sdcm/sct_config.py @@ -1158,6 +1158,10 @@ class SCTConfiguration(dict): env="SCT_MGMT_NODETOOL_REFRESH_FLAGS", type=str, help="Nodetool refresh extra options like --load-and-stream or --primary-replica-only"), + dict(name="mgmt_prepare_snapshot_size", + env="SCT_MGMT_PREPARE_SNAPSHOT_SIZE", type=int, + help="Size of backup snapshot in Gb to be prepared to be prepared for backup"), + # PerformanceRegressionTest dict(name="stress_cmd_w", env="SCT_STRESS_CMD_W", diff --git a/test-cases/manager/prepare_snapshot/100GB_dataset.yaml b/test-cases/manager/prepare_snapshot/100GB_dataset.yaml new file mode 100644 index 0000000000..dd07d50247 --- /dev/null +++ b/test-cases/manager/prepare_snapshot/100GB_dataset.yaml @@ -0,0 +1,12 @@ +test_duration: 240 + +instance_type_db: 'i4i.large' +instance_type_loader: 'c6i.large' + +n_db_nodes: 3 +n_loaders: 2 +n_monitor_nodes: 1 + +mgmt_prepare_snapshot_size: 100 # GB + +user_prefix: manager-helper diff --git a/test-cases/manager/prepare_snapshot/10GB_dataset.yaml b/test-cases/manager/prepare_snapshot/10GB_dataset.yaml new file mode 100644 index 0000000000..3bfd1ea837 --- /dev/null +++ b/test-cases/manager/prepare_snapshot/10GB_dataset.yaml @@ -0,0 +1,12 @@ +test_duration: 60 + +instance_type_db: 'i4i.large' +instance_type_loader: 'c6i.large' + +n_db_nodes: 3 +n_loaders: 1 +n_monitor_nodes: 1 + +mgmt_prepare_snapshot_size: 10 # GB + +user_prefix: manager-helper diff --git a/test-cases/manager/prepare_snapshot/2TB_dataset.yaml b/test-cases/manager/prepare_snapshot/2TB_dataset.yaml new file mode 100644 index 0000000000..e4ff3274ee --- /dev/null +++ b/test-cases/manager/prepare_snapshot/2TB_dataset.yaml @@ -0,0 +1,12 @@ +test_duration: 2880 + +instance_type_db: 'i3en.3xlarge' +instance_type_loader: 'c6i.2xlarge' + +n_db_nodes: 3 +n_loaders: 4 +n_monitor_nodes: 1 + +mgmt_prepare_snapshot_size: 2048 # GB + +user_prefix: manager-helper diff --git a/test-cases/manager/prepare_snapshot/5GB_dataset.yaml b/test-cases/manager/prepare_snapshot/5GB_dataset.yaml new file mode 100644 index 0000000000..f4bfe73047 --- /dev/null +++ b/test-cases/manager/prepare_snapshot/5GB_dataset.yaml @@ -0,0 +1,12 @@ +test_duration: 60 + +instance_type_db: 'i4i.large' +instance_type_loader: 'c6i.large' + +n_db_nodes: 3 +n_loaders: 1 +n_monitor_nodes: 1 + +mgmt_prepare_snapshot_size: 5 # GB + +user_prefix: manager-helper