Skip to content

Commit

Permalink
Merge #139443
Browse files Browse the repository at this point in the history
139443: drtprod: YAML for 300 node and YCSB scripts r=vidit-bhat a=nameisbhaskar

The PR includes the following changes:
1. YAML for the 300 node sniff testing
2. YCSB init and run scripts
3. Max rate for tpcc changed to 500 in drt-large cluster YAML
4. Change in the tpcc_run_multiregion.sh to take the PGURL at runtime.

Epic: None
Release: None

Co-authored-by: Bhaskarjyoti Bora <[email protected]>
  • Loading branch information
craig[bot] and nameisbhaskar committed Jan 21, 2025
2 parents dda6410 + f27a48e commit 4cb25fc
Show file tree
Hide file tree
Showing 5 changed files with 321 additions and 3 deletions.
14 changes: 13 additions & 1 deletion pkg/cmd/drtprod/configs/drt_large.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ environment:
RUN_DURATION: 12h
NUM_CONNECTIONS: 500
NUM_WORKERS: 500
MAX_RATE: 1000
MAX_RATE: 500

targets:
- target_name: $CLUSTER
Expand Down Expand Up @@ -120,6 +120,18 @@ targets:
args:
- $WORKLOAD_CLUSTER
- workload
- target_name: rp
steps:
- command: put
args:
- $WORKLOAD_CLUSTER
- artifacts/roachprod
- roachprod
- command: put
args:
- $WORKLOAD_CLUSTER:1
- artifacts/roachtest
- roachtest-operations
- script: "pkg/cmd/drtprod/scripts/setup_datadog_workload"
- target_name: post_tasks
dependent_targets:
Expand Down
150 changes: 150 additions & 0 deletions pkg/cmd/drtprod/configs/drt_scale_300.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# Yaml for creating and configuring the drt-scale cluster. This also configures Datadog.
# Build the roachprod and roachtest binaries (using --cross) before running this script
environment:
ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: [email protected]
ROACHPROD_DNS: drt.crdb.io
ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io
ROACHPROD_GCE_DNS_ZONE: drt
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt
CLUSTER: drt-scale-300
WORKLOAD_CLUSTER: workload-scale-300
CLUSTER_NODES: 300
WORKLOAD_NODES: 20

targets:
# crdb cluster specs
- target_name: $CLUSTER
steps:
- command: create
args:
- $CLUSTER
flags:
clouds: gce
gce-managed: true
gce-enable-multiple-stores: true
gce-zones: "us-central1-a:150,us-central1-b:150"
nodes: $CLUSTER_NODES
gce-machine-type: n2-standard-16
local-ssd: false
gce-pd-volume-size: 375
gce-pd-volume-type: pd-ssd
gce-pd-volume-count: 4
os-volume-size: 100
username: drt
lifetime: 8760h
gce-image: "ubuntu-2204-jammy-v20250112"
- command: sync
flags:
clouds: gce
- command: stage
args:
- $CLUSTER
- cockroach
- script: "pkg/cmd/drtprod/scripts/setup_datadog_cluster"
- command: start
args:
- $CLUSTER
- "--binary"
- "./cockroach"
flags:
# add flag to set provisioned throughput on each store according to their cloud provider limits
enable-fluent-sink: true
store-count: 4
args: --wal-failover=among-stores
restart: false
sql-port: 26257
- command: run
args:
- $CLUSTER
- --
- "sudo systemctl unmask cron.service ; sudo systemctl enable cron.service ; echo \"crontab -l ; echo '@reboot sleep 100 && ~/cockroach.sh' | crontab -\" > t.sh ; sh t.sh ; rm t.sh"
# workload cluster specs
- target_name: $WORKLOAD_CLUSTER
steps:
- command: create
args:
- $WORKLOAD_CLUSTER
flags:
clouds: gce
gce-zones: "us-central1-a,us-central1-b"
nodes: $WORKLOAD_NODES
gce-machine-type: n2-standard-8
os-volume-size: 100
username: workload
lifetime: 8760h
gce-image: "ubuntu-2204-jammy-v20250112"
on_rollback:
- command: destroy
args:
- $WORKLOAD_CLUSTER
- command: sync
flags:
clouds: gce
- command: stage
args:
- $WORKLOAD_CLUSTER
- cockroach
- command: stage
args:
- $WORKLOAD_CLUSTER
- workload
- script: "pkg/cmd/drtprod/scripts/setup_datadog_workload"
- target_name: post_tasks
dependent_targets:
- $CLUSTER
- $WORKLOAD_CLUSTER
steps:
- script: rm
args:
- -rf
- certs-$CLUSTER
- command: get
args:
- $CLUSTER:1
- certs
- certs-$CLUSTER
- command: put
args:
- $WORKLOAD_CLUSTER
- certs-$CLUSTER
- certs
- command: ssh
args:
- $WORKLOAD_CLUSTER
- --
- chmod
- 600
- './certs/*'
- command: put
args:
- $WORKLOAD_CLUSTER
- artifacts/roachprod
- roachprod
- command: put
args:
- $WORKLOAD_CLUSTER
- artifacts/roachtest
- roachtest-operations
- script: "pkg/cmd/drtprod/scripts/ycsb_init.sh"
args:
- 20M
- true
flags:
splits: 200
insert-count: 20000000
- target_name: ycsb_run
dependent_targets:
- post_tasks
steps:
- script: "pkg/cmd/drtprod/scripts/generate_ycsb_run.sh"
args:
- 20M
- false
flags:
max-rate: 66666
read-freq: 0.8
insert-freq: 0.1
update-freq: 0.05
delete-freq: 0.05
duration: 0
ramp: 5s
96 changes: 96 additions & 0 deletions pkg/cmd/drtprod/scripts/generate_ycsb_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/bin/bash

# Copyright 2024 The Cockroach Authors.
#
# Use of this software is governed by the CockroachDB Software License
# included in the /LICENSE file.

# This script sets up the ycsb run workload script in the workload nodes
# The --warehouses flag is passed as argument to this script
# NOTE - This uses CLUSTER and WORKLOAD_CLUSTER environment variable, if not set the script fails
if [ "$#" -lt 7 ]; then
echo "Usage: $0 <script_suffix> <execute:true|false> <flags to run: max-rate, read-freq, insert-freq, update-freq, delete-freq>"
exit 1
fi
suffix=$1
shift
# The second argument represents whether the init process should be started in the workload cluster
# The value is true or false
if [ "$1" != "true" ] && [ "$1" != "false" ]; then
# $1 is used again because of the shift
echo "Error: The second argument must be 'true' or 'false' which implies whether the script should be started in background or not."
exit 1
fi
execute_script=$1
shift

if [ -z "${CLUSTER}" ]; then
echo "environment CLUSTER is not set"
exit 1
fi

if [ -z "${WORKLOAD_CLUSTER}" ]; then
echo "environment WORKLOAD_CLUSTER is not set"
exit 1
fi

if [ -z "${WORKLOAD_NODES}" ]; then
echo "environment WORKLOAD_NODES is not set"
exit 1
fi

if [ -z "${CLUSTER_NODES}" ]; then
echo "environment CLUSTER_NODES is not set"
exit 1
fi

absolute_path=$(drtprod run "${WORKLOAD_CLUSTER}":1 -- "realpath ./cockroach")
pwd=$(drtprod run "${WORKLOAD_CLUSTER}":1 -- "dirname ${absolute_path}")

# Calculate the number of PGURLS each workload node should get
PGURL_PER_NODE=$((CLUSTER_NODES / WORKLOAD_NODES))
REMAINDER_NODE=$((CLUSTER_NODES % WORKLOAD_NODES))

# Distribute the PGURLS among the workload nodes
for ((NODE=0; NODE<WORKLOAD_NODES; NODE++)); do
START_OFFSET=$((NODE * PGURL_PER_NODE + (NODE < REMAINDER_NODE ? NODE : REMAINDER_NODE) + 1))
END_OFFSET=$((START_OFFSET + PGURL_PER_NODE + (NODE < REMAINDER_NODE ? 1 : 0) - 1))

# Print or use the PGURLS for the current workload node
echo "pgurl for Nodes ${START_OFFSET}:${END_OFFSET}"

# Create the workload script
cat <<EOF >/tmp/ycsb_run_${suffix}.sh
#!/usr/bin/env bash
export ROACHPROD_GCE_DEFAULT_PROJECT=$ROACHPROD_GCE_DEFAULT_PROJECT
INSERT_START=10000000000000000
NUM_WORKERS_PER_NODE=5
OUTPUT_FILE_A="ycsb-a-\$(date '+%Y-%m-%d-%H:%M:%S').log"
OUTPUT_ERROR_FILE_A="ycsb-a-\$(date '+%Y-%m-%d-%H:%M:%S').error.log"
CLIENTS_PER_WORKLOAD=4000
./roachprod sync
PGURLS=\$(./roachprod pgurl $CLUSTER:$START_OFFSET-$END_OFFSET | sed s/\'//g)
read -r -a PGURLS_ARR <<< "\$PGURLS"
for ((j=1;j<=\$NUM_WORKERS_PER_NODE;j++)); do
echo ">> Starting ycsb workload"
nohup ./cockroach workload run ycsb --tolerate-errors --workload='custom' \
--min-conns=\$((CLIENTS_PER_WORKLOAD/NUM_WORKERS_PER_NODE)) $@ \
--insert-start=\$((INSERT_START*$NODE+(INSERT_START/j))) \
--families=false --request-distribution='uniform' --scan-length-distribution='uniform' \
--concurrency=\$((CLIENTS_PER_WORKLOAD/NUM_WORKERS_PER_NODE)) \
\${PGURLS_ARR[@]} > \$OUTPUT_FILE_A 2> \$OUTPUT_ERROR_FILE_A &
done
EOF

# Upload the script to the workload cluster
drtprod put $WORKLOAD_CLUSTER:$((NODE + 1)) /tmp/ycsb_run_${suffix}.sh
drtprod ssh $WORKLOAD_CLUSTER:$((NODE + 1)) -- "chmod +x ycsb_run_${suffix}.sh"
done
if [ "$execute_script" = "true" ]; then
drtprod run "${WORKLOAD_CLUSTER}" -- "${pwd}/ycsb_run_${suffix}.sh"
else
echo "Run --> drtprod run "${WORKLOAD_CLUSTER}" -- \"${pwd}/ycsb_run_${suffix}.sh\""
fi
9 changes: 7 additions & 2 deletions pkg/cmd/drtprod/scripts/tpcc_run_multiregion.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,14 @@ do
# us to reach the specified region, and then add the actual number of workers
# we want to run.
EFFECTIVE_NUM_WORKERS=$(($(($TPCC_WAREHOUSES/$NUM_REGIONS))*$(($NODE-1))+$NUM_WORKERS))
PGURLS_REGION=$(./bin/drtprod pgurl $CLUSTER:$NODE_OFFSET-$LAST_NODE_IN_REGION | sed "s/'//g; s/^/'/; s/$/'/")
cat <<EOF >/tmp/tpcc_run.sh
#!/usr/bin/env bash
export ROACHPROD_GCE_DEFAULT_PROJECT=$ROACHPROD_GCE_DEFAULT_PROJECT
./roachprod sync
PGURLS=\$(./roachprod pgurl $CLUSTER:$NODE_OFFSET-$LAST_NODE_IN_REGION | sed s/\'//g)
read -r -a PGURLS_REGION <<< "\$PGURLS"
j=0
while true; do
echo ">> Starting tpcc workload"
Expand All @@ -47,7 +52,7 @@ while true; do
--partitions=$NUM_REGIONS \
--partition-affinity=$(($NODE-1)) \
--tolerate-errors \
$PGURLS_REGION \
\${PGURLS_REGION[@]} \
--survival-goal region \
--regions=$REGIONS
done
Expand Down
55 changes: 55 additions & 0 deletions pkg/cmd/drtprod/scripts/ycsb_init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash

# Copyright 2024 The Cockroach Authors.
#
# Use of this software is governed by the CockroachDB Software License
# included in the /LICENSE file.

# This script sets up the ycsb import workload script in the workload node and starts the same in nohup
# The --warehouses and other flags for import are passed as argument to this script
# NOTE - This uses CLUSTER and WORKLOAD_CLUSTER environment variable, if not set the script fails

# The first argument is the name suffix that is added to the script as ycsb_init_<suffix>.sh
if [ "$#" -lt 4 ]; then
echo "Usage: $0 <script_suffix> <execute:true|false> <flags to init:--splits, --insert-count>"
exit 1
fi
suffix=$1
shift
# The second argument represents whether the init process should be started in the workload cluster
# The value is true or false
if [ "$1" != "true" ] && [ "$1" != "false" ]; then
# $1 is used again because of the shift
echo "Error: The second argument must be 'true' or 'false' which implies whether the script should be started in background or not."
exit 1
fi
execute_script=$1
shift

if [ -z "${CLUSTER}" ]; then
echo "environment CLUSTER is not set"
exit 1
fi

if [ -z "${WORKLOAD_CLUSTER}" ]; then
echo "environment CLUSTER is not set"
exit 1
fi

absolute_path=$(drtprod run "${WORKLOAD_CLUSTER}":1 -- "realpath ./cockroach")
pwd=$(drtprod run "${WORKLOAD_CLUSTER}":1 -- "dirname ${absolute_path}")
PGURLS=$(drtprod pgurl "${CLUSTER}":1)

# script is responsible for importing the ycsb database for workload
drtprod ssh "${WORKLOAD_CLUSTER}":1 -- "tee ycsb_init_${suffix}.sh > /dev/null << 'EOF'
#!/bin/bash
${pwd}/cockroach workload init ycsb $PGURLS --drop --families=false $@
EOF"
drtprod ssh "${WORKLOAD_CLUSTER}":1 -- "chmod +x ycsb_init_${suffix}.sh"

if [ "$execute_script" = "true" ]; then
drtprod run "${WORKLOAD_CLUSTER}":1 -- "sudo systemd-run --unit ycsb_init_${suffix} --same-dir --uid \$(id -u) --gid \$(id -g) bash ${pwd}/ycsb_init_${suffix}.sh"
else
echo "Run --> drtprod run "${WORKLOAD_CLUSTER}":1 -- \"sudo systemd-run --unit ycsb_init_${suffix} --same-dir --uid \\\$(id -u) --gid \\\$(id -g) bash ${pwd}/ycsb_init_${suffix}.sh\""
fi

0 comments on commit 4cb25fc

Please sign in to comment.