From 840e59e88646f71546dbcad6a4fbd43aadde7ed6 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 7 Feb 2025 15:25:37 +0000 Subject: [PATCH 1/8] Configurable iops, instance size --- .github/workflows/benchmarks.yml | 5 +++++ testing/benchmark/main.tf | 1 + testing/benchmark/variables.tf | 6 ++++++ .../infra/terraform/modules/standalone_apm_server/main.tf | 1 + .../terraform/modules/standalone_apm_server/variables.tf | 6 ++++++ 5 files changed, 19 insertions(+) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 098c088b124..595f8970645 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -8,6 +8,10 @@ on: required: false type: boolean default: false + standaloneInstanceSize: + description: 'AWS instance size of standalone APM Server, e.g. c6i.2xlarge' + required: false + type: string enableTailSampling: description: 'Enable tail-based sampling on the APM server' required: false @@ -67,6 +71,7 @@ jobs: TF_VAR_private_key: ./id_rsa_terraform TF_VAR_public_key: ./id_rsa_terraform.pub TF_VAR_run_standalone: ${{ inputs.runStandalone || github.event.schedule=='0 0 1 * *' }} + TF_VAR_standalone_apm_server_instance_size: ${{ inputs.standaloneInstanceSize || 'c6i.2xlarge' }} TF_VAR_apm_server_tail_sampling: ${{ inputs.enableTailSampling || 'false' }} # set the default again otherwise schedules won't work TF_VAR_apm_server_tail_sampling_storage_limit: ${{ inputs.tailSamplingStorageLimit || '10GB' }} # set the default again otherwise schedules won't work RUN_STANDALONE: ${{ inputs.runStandalone || github.event.schedule=='0 0 1 * *' }} diff --git a/testing/benchmark/main.tf b/testing/benchmark/main.tf index 713eb4725db..02f095fc13a 100644 --- a/testing/benchmark/main.tf +++ b/testing/benchmark/main.tf @@ -157,6 +157,7 @@ module "standalone_apm_server" { apm_instance_type = var.standalone_apm_server_instance_size apm_volume_type = var.standalone_apm_server_volume_type apm_volume_size = var.apm_server_tail_sampling ? coalesce(var.standalone_apm_server_volume_size, 60) : var.standalone_apm_server_volume_size + apm_iops = var.standalone_apm_server_iops apm_server_bin_path = var.apm_server_bin_path ea_managed = false diff --git a/testing/benchmark/variables.tf b/testing/benchmark/variables.tf index 25671ad3253..8a9aac45235 100644 --- a/testing/benchmark/variables.tf +++ b/testing/benchmark/variables.tf @@ -142,6 +142,12 @@ variable "standalone_apm_server_volume_size" { description = "Optional volume size in GB to use for APM Server VM" } +variable "standalone_apm_server_iops" { + default = null + type = number + description = "Optional disk IOPS in GB to use for APM Server VM" +} + ## VPC Network settings variable "vpc_cidr" { diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index 86f08555be7..c084eded220 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -172,6 +172,7 @@ resource "aws_instance" "apm" { root_block_device { volume_type = var.apm_volume_type volume_size = var.apm_volume_size + iops = var.apm_iops } connection { diff --git a/testing/infra/terraform/modules/standalone_apm_server/variables.tf b/testing/infra/terraform/modules/standalone_apm_server/variables.tf index 69c7a739bd0..38638b02caa 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/variables.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/variables.tf @@ -22,6 +22,12 @@ variable "apm_volume_size" { description = "Optional apm server volume size in GB override" } +variable "apm_iops" { + default = null + type = number + description = "Optional apm server disk IOPS override" +} + variable "vpc_id" { description = "VPC ID to provision the EC2 instance" type = string From 5ff6df6e638f9cffae666859a962f0fca22ccacc Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Fri, 7 Feb 2025 18:08:37 +0000 Subject: [PATCH 2/8] Use nvme ssd if available --- .../terraform/modules/standalone_apm_server/main.tf | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index c084eded220..b05f7f53006 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -182,6 +182,16 @@ resource "aws_instance" "apm" { private_key = file("${var.aws_provisioner_key_name}") } + // For instance types with 'd.' e.g. c6id.2xlarge, use the NVMe ssd as data disk. + provisioner "remote-exec" { + inline = length(regexall("d[.]", self.instance_type)) > 0 ? [ + "sudo mkfs -t xfs /dev/nvme1n1", + "mkdir ~/data", + "sudo mount /dev/nvme1n1 ~/data", + "sudo chown $USER:$USER ~/data", + ] : [] + } + provisioner "file" { source = "${var.apm_server_bin_path}/apm-server" destination = local.bin_path From 48bf369c79accd9b3f5d5b95dca21ede96baa06c Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Sat, 8 Feb 2025 00:10:59 +0000 Subject: [PATCH 3/8] Add no-op --- testing/infra/terraform/modules/standalone_apm_server/main.tf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index b05f7f53006..34aea0645eb 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -189,7 +189,9 @@ resource "aws_instance" "apm" { "mkdir ~/data", "sudo mount /dev/nvme1n1 ~/data", "sudo chown $USER:$USER ~/data", - ] : [] + ] : [ + ":", // no-op + ] } provisioner "file" { From dee60773b9fba7bee77955c1566f37417a355621 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Feb 2025 14:47:08 +0000 Subject: [PATCH 4/8] Remove instance override --- .github/workflows/benchmarks.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 595f8970645..098c088b124 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -8,10 +8,6 @@ on: required: false type: boolean default: false - standaloneInstanceSize: - description: 'AWS instance size of standalone APM Server, e.g. c6i.2xlarge' - required: false - type: string enableTailSampling: description: 'Enable tail-based sampling on the APM server' required: false @@ -71,7 +67,6 @@ jobs: TF_VAR_private_key: ./id_rsa_terraform TF_VAR_public_key: ./id_rsa_terraform.pub TF_VAR_run_standalone: ${{ inputs.runStandalone || github.event.schedule=='0 0 1 * *' }} - TF_VAR_standalone_apm_server_instance_size: ${{ inputs.standaloneInstanceSize || 'c6i.2xlarge' }} TF_VAR_apm_server_tail_sampling: ${{ inputs.enableTailSampling || 'false' }} # set the default again otherwise schedules won't work TF_VAR_apm_server_tail_sampling_storage_limit: ${{ inputs.tailSamplingStorageLimit || '10GB' }} # set the default again otherwise schedules won't work RUN_STANDALONE: ${{ inputs.runStandalone || github.event.schedule=='0 0 1 * *' }} From 74d463de977c407e1b7dc61ab06ec619f8c89e7b Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Feb 2025 14:47:19 +0000 Subject: [PATCH 5/8] Larger worker --- testing/benchmark/system-profiles/32GBx2zone.tfvars | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/benchmark/system-profiles/32GBx2zone.tfvars b/testing/benchmark/system-profiles/32GBx2zone.tfvars index 67cc51afc2b..a76e0fcbaf5 100644 --- a/testing/benchmark/system-profiles/32GBx2zone.tfvars +++ b/testing/benchmark/system-profiles/32GBx2zone.tfvars @@ -2,7 +2,7 @@ user_name = "USER" # APM bench -worker_instance_type = "c6i.2xlarge" +worker_instance_type = "c6i.4xlarge" # Elastic Cloud From 7b0a0e6292f706f410e0b39d71e2f4aa2920a5dd Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Feb 2025 14:47:31 +0000 Subject: [PATCH 6/8] Add NVMe profiles --- .../system-profiles/16GB_NVMe-x2zone.tfvars | 23 +++++++++++++++++ .../system-profiles/1GB_NVMe-x1zone.tfvars | 21 ++++++++++++++++ .../system-profiles/2GB_NVMe-x1zone.tfvars | 21 ++++++++++++++++ .../system-profiles/32GB_NVMe-x2zone.tfvars | 25 +++++++++++++++++++ .../system-profiles/4GB_NVMe-x1zone.tfvars | 21 ++++++++++++++++ .../system-profiles/8GB_NVMe-x1zone.tfvars | 21 ++++++++++++++++ 6 files changed, 132 insertions(+) create mode 100644 testing/benchmark/system-profiles/16GB_NVMe-x2zone.tfvars create mode 100644 testing/benchmark/system-profiles/1GB_NVMe-x1zone.tfvars create mode 100644 testing/benchmark/system-profiles/2GB_NVMe-x1zone.tfvars create mode 100644 testing/benchmark/system-profiles/32GB_NVMe-x2zone.tfvars create mode 100644 testing/benchmark/system-profiles/4GB_NVMe-x1zone.tfvars create mode 100644 testing/benchmark/system-profiles/8GB_NVMe-x1zone.tfvars diff --git a/testing/benchmark/system-profiles/16GB_NVMe-x2zone.tfvars b/testing/benchmark/system-profiles/16GB_NVMe-x2zone.tfvars new file mode 100644 index 00000000000..d32b2388b94 --- /dev/null +++ b/testing/benchmark/system-profiles/16GB_NVMe-x2zone.tfvars @@ -0,0 +1,23 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.2xlarge" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "128g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "16g" +# Number of shards for the ES indices +apm_shards = 4 + +# Standalone + +standalone_apm_server_instance_size = "c6id.2xlarge" +standalone_moxy_instance_size = "c6i.4xlarge" diff --git a/testing/benchmark/system-profiles/1GB_NVMe-x1zone.tfvars b/testing/benchmark/system-profiles/1GB_NVMe-x1zone.tfvars new file mode 100644 index 00000000000..b9ccb14c2f0 --- /dev/null +++ b/testing/benchmark/system-profiles/1GB_NVMe-x1zone.tfvars @@ -0,0 +1,21 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.large" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "16g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "1g" + +# Standalone + +standalone_apm_server_instance_size = "c6id.large" +standalone_moxy_instance_size = "c6i.xlarge" diff --git a/testing/benchmark/system-profiles/2GB_NVMe-x1zone.tfvars b/testing/benchmark/system-profiles/2GB_NVMe-x1zone.tfvars new file mode 100644 index 00000000000..f11694aedd5 --- /dev/null +++ b/testing/benchmark/system-profiles/2GB_NVMe-x1zone.tfvars @@ -0,0 +1,21 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.large" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "16g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "2g" + +# Standalone + +standalone_apm_server_instance_size = "c6id.large" +standalone_moxy_instance_size = "c6i.xlarge" diff --git a/testing/benchmark/system-profiles/32GB_NVMe-x2zone.tfvars b/testing/benchmark/system-profiles/32GB_NVMe-x2zone.tfvars new file mode 100644 index 00000000000..b9025364646 --- /dev/null +++ b/testing/benchmark/system-profiles/32GB_NVMe-x2zone.tfvars @@ -0,0 +1,25 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.4xlarge" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "256g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# Run the cluster with a dedicated master +elasticsearch_dedicated_masters = true +# APM server instance size +apm_server_size = "32g" +# Number of shards for the ES indices +apm_shards = 4 + +# Standalone + +standalone_apm_server_instance_size = "c6id.4xlarge" +standalone_moxy_instance_size = "c6i.8xlarge" diff --git a/testing/benchmark/system-profiles/4GB_NVMe-x1zone.tfvars b/testing/benchmark/system-profiles/4GB_NVMe-x1zone.tfvars new file mode 100644 index 00000000000..3d87c264da4 --- /dev/null +++ b/testing/benchmark/system-profiles/4GB_NVMe-x1zone.tfvars @@ -0,0 +1,21 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.large" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "32g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "4g" + +# Standalone + +standalone_apm_server_instance_size = "c6id.large" +standalone_moxy_instance_size = "c6i.xlarge" diff --git a/testing/benchmark/system-profiles/8GB_NVMe-x1zone.tfvars b/testing/benchmark/system-profiles/8GB_NVMe-x1zone.tfvars new file mode 100644 index 00000000000..4f797f5c0d9 --- /dev/null +++ b/testing/benchmark/system-profiles/8GB_NVMe-x1zone.tfvars @@ -0,0 +1,21 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.xlarge" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "64g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "8g" + +# Standalone + +standalone_apm_server_instance_size = "c6id.xlarge" +standalone_moxy_instance_size = "c6i.2xlarge" From 97ce9be80fdae1de1a76ee46dacf9a0b4d475900 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Feb 2025 15:09:28 +0000 Subject: [PATCH 7/8] terraform fmt --- testing/infra/terraform/modules/standalone_apm_server/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/infra/terraform/modules/standalone_apm_server/main.tf b/testing/infra/terraform/modules/standalone_apm_server/main.tf index 34aea0645eb..93ed9121933 100644 --- a/testing/infra/terraform/modules/standalone_apm_server/main.tf +++ b/testing/infra/terraform/modules/standalone_apm_server/main.tf @@ -189,7 +189,7 @@ resource "aws_instance" "apm" { "mkdir ~/data", "sudo mount /dev/nvme1n1 ~/data", "sudo chown $USER:$USER ~/data", - ] : [ + ] : [ ":", // no-op ] } From fdb3c0bb2921c631ad893243df8da46eb8a88188 Mon Sep 17 00:00:00 2001 From: Carson Ip Date: Mon, 10 Feb 2025 16:08:57 +0000 Subject: [PATCH 8/8] Add io2 6000iops profile --- .../8GB_io2_6000iops-x1zone.tfvars | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 testing/benchmark/system-profiles/8GB_io2_6000iops-x1zone.tfvars diff --git a/testing/benchmark/system-profiles/8GB_io2_6000iops-x1zone.tfvars b/testing/benchmark/system-profiles/8GB_io2_6000iops-x1zone.tfvars new file mode 100644 index 00000000000..f7b8303fa05 --- /dev/null +++ b/testing/benchmark/system-profiles/8GB_io2_6000iops-x1zone.tfvars @@ -0,0 +1,23 @@ +user_name = "USER" + +# APM bench + +worker_instance_type = "c6i.xlarge" + +# Elastic Cloud + +# The number of AZs the APM Server should span. +apm_server_zone_count = 1 +# The Elasticsearch cluster node size. +elasticsearch_size = "64g" +# The number of AZs the Elasticsearch cluster should have. +elasticsearch_zone_count = 2 +# APM server instance size +apm_server_size = "8g" + +# Standalone + +standalone_apm_server_instance_size = "c6i.xlarge" +standalone_apm_server_volume_type = "io2" +standalone_apm_server_iops = 6000 +standalone_moxy_instance_size = "c6i.2xlarge"