From cf3cc56a7755418cec45ce4c8844f93afb3f406c Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Feb 2025 19:13:56 +0000 Subject: [PATCH] tf: add standalone apm server profiles with NVMe SSDs and io2 EBS (#15624) Improve benchmarking tf setup to facilitate benchmarking on standalone apm server setup with faster disks (e.g. local NVMe SSD) - Add NVMe c6id ec2 instance profiles - Use NVMe disk for data directory for instance types with d., e.g. c6id.2xlarge - Configurable disk IOPS which is a required field for io2 volume type - Bigger worker instance for 32GB profile to avoid OOM (cherry picked from commit 3afe1fd0d91e8ccba3f3ea30d7e6ad2bba6cb36e) --- testing/benchmark/main.tf | 1 + .../system-profiles/16GB_NVMe-x2zone.tfvars | 23 +++++++++++++++++ .../system-profiles/1GB_NVMe-x1zone.tfvars | 21 ++++++++++++++++ .../system-profiles/2GB_NVMe-x1zone.tfvars | 21 ++++++++++++++++ .../system-profiles/32GB_NVMe-x2zone.tfvars | 25 +++++++++++++++++++ .../system-profiles/32GBx2zone.tfvars | 2 +- .../system-profiles/4GB_NVMe-x1zone.tfvars | 21 ++++++++++++++++ .../system-profiles/8GB_NVMe-x1zone.tfvars | 21 ++++++++++++++++ .../8GB_io2_6000iops-x1zone.tfvars | 23 +++++++++++++++++ testing/benchmark/variables.tf | 6 +++++ .../modules/standalone_apm_server/main.tf | 13 ++++++++++ .../standalone_apm_server/variables.tf | 6 +++++ 12 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 testing/benchmark/system-profiles/16GB_NVMe-x2zone.tfvars create mode 100644 testing/benchmark/system-profiles/1GB_NVMe-x1zone.tfvars create mode 100644 testing/benchmark/system-profiles/2GB_NVMe-x1zone.tfvars create mode 100644 testing/benchmark/system-profiles/32GB_NVMe-x2zone.tfvars create mode 100644 testing/benchmark/system-profiles/4GB_NVMe-x1zone.tfvars create mode 100644 testing/benchmark/system-profiles/8GB_NVMe-x1zone.tfvars create mode 100644 testing/benchmark/system-profiles/8GB_io2_6000iops-x1zone.tfvars diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index 4465e090239..8bc52145166 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -157,6 +157,7 @@ module "standalone_apm_server" { apm_instance_type = var.standalone_apm_server_instance_size apm_volume_type = var.standalone_apm_server_volume_type apm_volume_size = var.apm_server_tail_sampling ? coalesce(var.standalone_apm_server_volume_size, 60) : var.standalone_apm_server_volume_size + apm_iops = var.standalone_apm_server_iops apm_server_bin_path = var.apm_server_bin_path ea_managed = false diff --git a/testing/benchmark/system-profiles/16GB_NVMe-x2zone.tfvars b/testing/benchmark/system-profiles/16GB_NVMe-x2zone.tfvars new file mode 100644 index 00000000000..d32b2388b94 --- /dev/null +++ b/testing/benchmark/system-profiles/16GB_NVMe-x2zone.tfvars @@ -0,0 +1,23 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.2xlarge" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "128g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "16g" +# Number of shards for the ES indices +apm_shards = 4 + +# Standalone + +standalone_apm_server_instance_size = "c6id.2xlarge" +standalone_moxy_instance_size = "c6i.4xlarge" diff --git a/testing/benchmark/system-profiles/1GB_NVMe-x1zone.tfvars b/testing/benchmark/system-profiles/1GB_NVMe-x1zone.tfvars new file mode 100644 index 00000000000..b9ccb14c2f0 --- /dev/null +++ b/testing/benchmark/system-profiles/1GB_NVMe-x1zone.tfvars @@ -0,0 +1,21 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.large" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "16g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "1g" + +# Standalone + +standalone_apm_server_instance_size = "c6id.large" +standalone_moxy_instance_size = "c6i.xlarge" diff --git a/testing/benchmark/system-profiles/2GB_NVMe-x1zone.tfvars b/testing/benchmark/system-profiles/2GB_NVMe-x1zone.tfvars new file mode 100644 index 00000000000..f11694aedd5 --- /dev/null +++ b/testing/benchmark/system-profiles/2GB_NVMe-x1zone.tfvars @@ -0,0 +1,21 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.large" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "16g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "2g" + +# Standalone + +standalone_apm_server_instance_size = "c6id.large" +standalone_moxy_instance_size = "c6i.xlarge" diff --git a/testing/benchmark/system-profiles/32GB_NVMe-x2zone.tfvars b/testing/benchmark/system-profiles/32GB_NVMe-x2zone.tfvars new file mode 100644 index 00000000000..b9025364646 --- /dev/null +++ b/testing/benchmark/system-profiles/32GB_NVMe-x2zone.tfvars @@ -0,0 +1,25 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.4xlarge" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "256g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# Run the cluster with a dedicated master +elasticsearch_dedicated_masters = true +# APM server instance size +apm_server_size = "32g" +# Number of shards for the ES indices +apm_shards = 4 + +# Standalone + +standalone_apm_server_instance_size = "c6id.4xlarge" +standalone_moxy_instance_size = "c6i.8xlarge" diff --git a/testing/benchmark/system-profiles/32GBx2zone.tfvars b/testing/benchmark/system-profiles/32GBx2zone.tfvars index 67cc51afc2b..a76e0fcbaf5 100644 --- a/testing/benchmark/system-profiles/32GBx2zone.tfvars +++ b/testing/benchmark/system-profiles/32GBx2zone.tfvars @@ -2,7 +2,7 @@ user_name = "USER" # APM bench -worker_instance_type = "c6i.2xlarge" +worker_instance_type = "c6i.4xlarge" # Elastic Cloud diff --git a/testing/benchmark/system-profiles/4GB_NVMe-x1zone.tfvars b/testing/benchmark/system-profiles/4GB_NVMe-x1zone.tfvars new file mode 100644 index 00000000000..3d87c264da4 --- /dev/null +++ b/testing/benchmark/system-profiles/4GB_NVMe-x1zone.tfvars @@ -0,0 +1,21 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.large" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "32g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "4g" + +# Standalone + +standalone_apm_server_instance_size = "c6id.large" +standalone_moxy_instance_size = "c6i.xlarge" diff --git a/testing/benchmark/system-profiles/8GB_NVMe-x1zone.tfvars b/testing/benchmark/system-profiles/8GB_NVMe-x1zone.tfvars new file mode 100644 index 00000000000..4f797f5c0d9 --- /dev/null +++ b/testing/benchmark/system-profiles/8GB_NVMe-x1zone.tfvars @@ -0,0 +1,21 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.xlarge" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "64g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "8g" + +# Standalone + +standalone_apm_server_instance_size = "c6id.xlarge" +standalone_moxy_instance_size = "c6i.2xlarge" diff --git a/testing/benchmark/system-profiles/8GB_io2_6000iops-x1zone.tfvars b/testing/benchmark/system-profiles/8GB_io2_6000iops-x1zone.tfvars new file mode 100644 index 00000000000..f7b8303fa05 --- /dev/null +++ b/testing/benchmark/system-profiles/8GB_io2_6000iops-x1zone.tfvars @@ -0,0 +1,23 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.xlarge" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "64g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "8g" + +# Standalone + +standalone_apm_server_instance_size = "c6i.xlarge" +standalone_apm_server_volume_type = "io2" +standalone_apm_server_iops = 6000 +standalone_moxy_instance_size = "c6i.2xlarge" diff --git a/testing/benchmark/variables.tf b/testing/benchmark/variables.tf index 25671ad3253..8a9aac45235 100644 --- a/testing/benchmark/variables.tf +++ b/testing/benchmark/variables.tf @@ -142,6 +142,12 @@ variable "standalone_apm_server_volume_size" { description = "Optional volume size in GB to use for APM Server VM" } +variable "standalone_apm_server_iops" { + default = null + type = number + description = "Optional disk IOPS in GB to use for APM Server VM" +} + ## VPC Network settings variable "vpc_cidr" { diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index c1e2d02a5b1..bc7e5d56c30 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -150,6 +150,7 @@ resource "aws_instance" "apm" { root_block_device { volume_type = var.apm_volume_type volume_size = var.apm_volume_size + iops = var.apm_iops } connection { @@ -159,6 +160,18 @@ resource "aws_instance" "apm" { private_key = file("${var.aws_provisioner_key_name}") } + // For instance types with 'd.' e.g. c6id.2xlarge, use the NVMe ssd as data disk. + provisioner "remote-exec" { + inline = length(regexall("d[.]", self.instance_type)) > 0 ? [ + "sudo mkfs -t xfs /dev/nvme1n1", + "mkdir ~/data", + "sudo mount /dev/nvme1n1 ~/data", + "sudo chown $USER:$USER ~/data", + ] : [ + ":", // no-op + ] + } + provisioner "file" { source = "${var.apm_server_bin_path}/apm-server" destination = local.bin_path diff --git a/testing/infra/terraform/modules/standalone_apm_server/variables.tf b/testing/infra/terraform/modules/standalone_apm_server/variables.tf index 69c7a739bd0..38638b02caa 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/variables.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/variables.tf @@ -22,6 +22,12 @@ variable "apm_volume_size" { description = "Optional apm server volume size in GB override" } +variable "apm_iops" { + default = null + type = number + description = "Optional apm server disk IOPS override" +} + variable "vpc_id" { description = "VPC ID to provision the EC2 instance" type = string