Skip to content

Commit

Permalink
ci: Enable metrics collection
Browse files Browse the repository at this point in the history
  • Loading branch information
marun committed Mar 12, 2024
1 parent 96188ab commit 7b934e9
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 6 deletions.
41 changes: 40 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ on:
pull_request:

env:
tmpnet_data_path: ~/.tmpnet/networks/1000
tmpnet_data_path: ~/.tmpnet
prometheus_url: https://grafana-experimental.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?orgId=1&refresh=10s&var-filter=is_ephemeral_node%7C%3D%7Cfalse&var-filter=gh_repo%7C%3D%7Cava-labs%2Fsubnet-evm&var-filter=gh_run_id%7C%3D%7C${{ github.run_id }}&var-filter=gh_run_attempt%7C%3D%7C${{ github.run_attempt }}

jobs:
lint_test:
Expand Down Expand Up @@ -125,9 +126,28 @@ jobs:
- name: Build Subnet-EVM Plugin Binary
shell: bash
run: ./scripts/build.sh /tmp/e2e-test/avalanchego/plugins/srEXiWaHuhNyGwPUi444Tu47ZEDwxTWrbQiuD7FmgSAQ6X7Dy
- name: Start prometheus
shell: bash
run: bash -x ./scripts/run_prometheus.sh
env:
PROMETHEUS_ID: ${{ secrets.PROMETHEUS_ID }}
PROMETHEUS_PASSWORD: ${{ secrets.PROMETHEUS_PASSWORD }}
- name: Notify of metrics availability
shell: bash
run: .github/workflows/notify-metrics-availability.sh
env:
PROMETHEUS_URL: ${{ env.prometheus_url }}
GH_JOB_ID: ${{ github.job }}
- name: Run Warp E2E Tests
shell: bash
run: AVALANCHEGO_BUILD_PATH=/tmp/e2e-test/avalanchego ./scripts/run_ginkgo_warp.sh
env:
GH_REPO: ${{ github.repository }}
GH_WORKFLOW: ${{ github.workflow }}
GH_RUN_ID: ${{ github.run_id }}
GH_RUN_NUMBER: ${{ github.run_number }}
GH_RUN_ATTEMPT: ${{ github.run_attempt }}
GH_JOB_ID: ${{ github.job }}
- name: Upload tmpnet network dir for warp testing
if: always()
uses: actions/upload-artifact@v4
Expand All @@ -154,9 +174,28 @@ jobs:
- name: Build Subnet-EVM Plugin Binary
shell: bash
run: ./scripts/build.sh /tmp/e2e-test/avalanchego/plugins/srEXiWaHuhNyGwPUi444Tu47ZEDwxTWrbQiuD7FmgSAQ6X7Dy
- name: Start prometheus
shell: bash
run: bash -x ./scripts/run_prometheus.sh
env:
PROMETHEUS_ID: ${{ secrets.PROMETHEUS_ID }}
PROMETHEUS_PASSWORD: ${{ secrets.PROMETHEUS_PASSWORD }}
- name: Notify of metrics availability
shell: bash
run: .github/workflows/notify-metrics-availability.sh
env:
PROMETHEUS_URL: ${{ env.prometheus_url }}
GH_JOB_ID: ${{ github.job }}
- name: Run E2E Load Tests
shell: bash
run: AVALANCHEGO_BUILD_PATH=/tmp/e2e-test/avalanchego ./scripts/run_ginkgo_load.sh
env:
GH_REPO: ${{ github.repository }}
GH_WORKFLOW: ${{ github.workflow }}
GH_RUN_ID: ${{ github.run_id }}
GH_RUN_NUMBER: ${{ github.run_number }}
GH_RUN_ATTEMPT: ${{ github.run_attempt }}
GH_JOB_ID: ${{ github.job }}
- name: Upload tmpnet network dir for load testing
if: always()
uses: actions/upload-artifact@v4
Expand Down
19 changes: 19 additions & 0 deletions .github/workflows/notify-metrics-availability.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash

set -euo pipefail

# Timestamps are in seconds
from_timestamp="$(date '+%s')"
monitoring_period=900 # 15 minutes
to_timestamp="$((${from_timestamp} + ${monitoring_period}))"

# Grafana expects microseconds, so pad timestamps with 3 zeros
metrics_url="${PROMETHEUS_URL}&var-filter=gh_job_id%7C%3D%7C${GH_JOB_ID}&from=${from_timestamp}000&to=${to_timestamp}000"

# Optionally ensure that the link displays metrics only for the shared
# network rather than mixing it with the results for private networks.
if [[ -n "${FILTER_BY_OWNER:-}" ]]; then
metrics_url="${metrics_url}&var-filter=network_owner%7C%3D%7C${FILTER_BY_OWNER}"
fi

echo "::notice links::metrics ${metrics_url}"
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.21

require (
github.com/VictoriaMetrics/fastcache v1.10.0
github.com/ava-labs/avalanchego v1.11.2
github.com/ava-labs/avalanchego v1.11.3-0.20240312180123-888d1f385b1a
github.com/cespare/cp v0.1.0
github.com/davecgh/go-spew v1.1.1
github.com/deckarep/golang-set/v2 v2.1.0
Expand All @@ -15,7 +15,6 @@ require (
github.com/fsnotify/fsnotify v1.6.0
github.com/gballet/go-libpcsclite v0.0.0-20191108122812-4678299bea08
github.com/go-cmd/cmd v1.4.1
github.com/golang/protobuf v1.5.3
github.com/google/uuid v1.6.0
github.com/gorilla/rpc v1.2.0
github.com/gorilla/websocket v1.4.2
Expand Down Expand Up @@ -82,6 +81,7 @@ require (
github.com/go-stack/stack v1.8.1 // indirect
github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect
github.com/google/btree v1.1.2 // indirect
github.com/google/go-cmp v0.6.0 // indirect
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8=
github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/ava-labs/avalanchego v1.11.2 h1:8iodZ+RjqpRwHdiXPPtvaNt72qravge7voGzw3yPRzg=
github.com/ava-labs/avalanchego v1.11.2/go.mod h1:oTVnF9idL57J4LM/6RByTmKhI4QvV6OCnF99ysyBljE=
github.com/ava-labs/avalanchego v1.11.3-0.20240312180123-888d1f385b1a h1:BuzvwSi6RinhiqlN8XjwghvpLorFhE6chZOTBsJc8K4=
github.com/ava-labs/avalanchego v1.11.3-0.20240312180123-888d1f385b1a/go.mod h1:Yhtr0gRX0QFBb1Y3WQtymp3vm2qIukQqbkBQ/BP6Bus=
github.com/ava-labs/coreth v0.13.2-0.20240304213436-8afbf2d68461 h1:SIwGF3eVEwmexLm7is/MvG7W5sbmpGXaUT6RfUPP3jw=
github.com/ava-labs/coreth v0.13.2-0.20240304213436-8afbf2d68461/go.mod h1:v24MTMbxFSvyM7YeQFyWiXjIzVo2+UVs7tgH7xrByew=
github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g=
Expand Down
120 changes: 120 additions & 0 deletions scripts/run_prometheus.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/env bash

set -euo pipefail

# Starts a prometheus instance in agent-mode, forwarding to a central
# instance. Intended to enable metrics collection from temporary networks running
# locally and in CI.
#
# The prometheus instance will remain running in the background and will forward
# metrics to the central instance for all tmpnet networks.
#
# To stop it:
#
# $ kill -9 `cat ~/.tmpnet/prometheus/run.pid` && rm ~/.tmpnet/prometheus/run.pid
#

# e.g.,
# PROMETHEUS_ID=<id> PROMETHEUS_PASSWORD=<password> ./scripts/run_prometheus.sh
if ! [[ "$0" =~ scripts/run_prometheus.sh ]]; then
echo "must be run from repository root"
exit 255
fi

PROMETHEUS_WORKING_DIR="${HOME}/.tmpnet/prometheus"
PIDFILE="${PROMETHEUS_WORKING_DIR}"/run.pid

# First check if an agent-mode prometheus is already running. A single instance can collect
# metrics from all local temporary networks.
if pgrep --pidfile="${PIDFILE}" -f 'prometheus.*enable-feature=agent' &> /dev/null; then
echo "prometheus is already running locally with --enable-feature=agent"
exit 0
fi

PROMETHEUS_URL="${PROMETHEUS_URL:-https://prometheus-experimental.avax-dev.network}"
if [[ -z "${PROMETHEUS_URL}" ]]; then
echo "Please provide a value for PROMETHEUS_URL"
exit 1
fi

PROMETHEUS_ID="${PROMETHEUS_ID:-}"
if [[ -z "${PROMETHEUS_ID}" ]]; then
echo "Please provide a value for PROMETHEUS_ID"
exit 1
fi

PROMETHEUS_PASSWORD="${PROMETHEUS_PASSWORD:-}"
if [[ -z "${PROMETHEUS_PASSWORD}" ]]; then
echo "Plase provide a value for PROMETHEUS_PASSWORD"
exit 1
fi

# This was the LTS version when this script was written. Probably not
# much reason to update it unless something breaks since the usage
# here is only to collect metrics from temporary networks.
VERSION="2.45.3"

# Ensure the prometheus command is locally available
CMD=prometheus
if ! command -v "${CMD}" &> /dev/null; then
# Try to use a local version
CMD="${PWD}/bin/prometheus"
if ! command -v "${CMD}" &> /dev/null; then
echo "prometheus not found, attempting to install..."

# Determine the arch
if which sw_vers &> /dev/null; then
echo "on macos, only amd64 binaries are available so rosetta is required on apple silicon machines."
echo "to avoid using rosetta, install via homebrew: brew install prometheus"
DIST=darwin
else
ARCH="$(uname -i)"
if [[ "${ARCH}" != "x86_64" ]]; then
echo "on linux, only amd64 binaries are available. manual installation of prometheus is required."
exit 1
else
DIST="linux"
fi
fi

# Install the specified release
PROMETHEUS_FILE="prometheus-${VERSION}.${DIST}-amd64"
URL="https://github.com/prometheus/prometheus/releases/download/v${VERSION}/${PROMETHEUS_FILE}.tar.gz"
curl -s -L "${URL}" | tar zxv -C /tmp > /dev/null
mkdir -p "$(dirname "${CMD}")"
cp /tmp/"${PROMETHEUS_FILE}/prometheus" "${CMD}"
fi
fi

# Configure prometheus
FILE_SD_PATH="${PROMETHEUS_WORKING_DIR}/file_sd_configs"
mkdir -p "${FILE_SD_PATH}"

echo "writing configuration..."
cat >"${PROMETHEUS_WORKING_DIR}"/prometheus.yaml <<EOL
# my global config
global:
# Make sure this value takes into account the network-shutdown-delay in tests/fixture/e2e/env.go
scrape_interval: 10s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 10s # Evaluate rules every 15 seconds. The default is every 1 minute.
scrape_timeout: 5s # The default is every 10s
scrape_configs:
- job_name: "avalanchego"
metrics_path: "/ext/metrics"
file_sd_configs:
- files:
- '${FILE_SD_PATH}/*.json'
remote_write:
- url: "${PROMETHEUS_URL}/api/v1/write"
basic_auth:
username: "${PROMETHEUS_ID}"
password: "${PROMETHEUS_PASSWORD}"
EOL

echo "starting prometheus..."
cd "${PROMETHEUS_WORKING_DIR}"
nohup "${CMD}" --config.file=prometheus.yaml --web.listen-address=localhost:0 --enable-feature=agent > prometheus.log 2>&1 &
echo $! > "${PIDFILE}"
echo "running with pid $(cat "${PIDFILE}")"
1 change: 1 addition & 0 deletions tests/load/load_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ var _ = ginkgo.Describe("[Load Simulator]", ginkgo.Ordered, func() {
env = e2e.NewTestEnvironment(
flagVars,
utils.NewTmpnetNetwork(
"subnet-evm-small-load",
nodes,
tmpnet.FlagsMap{
// The default tmpnet log level (debug) induces too much overhead for load testing.
Expand Down
3 changes: 2 additions & 1 deletion tests/utils/tmpnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func NewTmpnetNodes(count int) []*tmpnet.Node {
return nodes
}

func NewTmpnetNetwork(nodes []*tmpnet.Node, flags tmpnet.FlagsMap, subnets ...*tmpnet.Subnet) *tmpnet.Network {
func NewTmpnetNetwork(owner string, nodes []*tmpnet.Node, flags tmpnet.FlagsMap, subnets ...*tmpnet.Subnet) *tmpnet.Network {
defaultFlags := tmpnet.FlagsMap{}
defaultFlags.SetDefaults(flags)
defaultFlags.SetDefaults(tmpnet.FlagsMap{
Expand All @@ -33,6 +33,7 @@ func NewTmpnetNetwork(nodes []*tmpnet.Node, flags tmpnet.FlagsMap, subnets ...*t
config.ProposerVMUseCurrentHeightKey: true,
})
return &tmpnet.Network{
Owner: owner,
DefaultFlags: defaultFlags,
Nodes: nodes,
Subnets: subnets,
Expand Down
1 change: 1 addition & 0 deletions tests/warp/warp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte {
env := e2e.NewTestEnvironment(
flagVars,
utils.NewTmpnetNetwork(
"subnet-evm-warp-e2e",
nodes,
tmpnet.FlagsMap{},
utils.NewTmpnetSubnet(subnetAName, genesisPath, chainConfig, nodes...),
Expand Down

0 comments on commit 7b934e9

Please sign in to comment.