diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 480f3f1b24..b50cba4a26 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,8 +9,7 @@ on: pull_request: env: - tmpnet_data_path: ~/.tmpnet/networks/1000 - + grafana_url: https://grafana-experimental.avax-dev.network/d/kBQpRdWnk/avalanche-main-dashboard?orgId=1&refresh=10s&var-filter=is_ephemeral_node%7C%3D%7Cfalse&var-filter=gh_repo%7C%3D%7Cava-labs%2Fsubnet-evm&var-filter=gh_run_id%7C%3D%7C${{ github.run_id }}&var-filter=gh_run_attempt%7C%3D%7C${{ github.run_attempt }} jobs: lint_test: name: Lint @@ -125,15 +124,43 @@ jobs: - name: Build Subnet-EVM Plugin Binary shell: bash run: ./scripts/build.sh /tmp/e2e-test/avalanchego/plugins/srEXiWaHuhNyGwPUi444Tu47ZEDwxTWrbQiuD7FmgSAQ6X7Dy + - name: Start prometheus + shell: bash + run: bash -x ./scripts/run_prometheus.sh + env: + PROMETHEUS_ID: ${{ secrets.PROMETHEUS_ID }} + PROMETHEUS_PASSWORD: ${{ secrets.PROMETHEUS_PASSWORD }} + - name: Start promtail + shell: bash + run: bash -x ./scripts/run_promtail.sh + env: + LOKI_ID: ${{ secrets.LOKI_ID }} + LOKI_PASSWORD: ${{ secrets.LOKI_PASSWORD }} + - name: Notify of metrics availability + shell: bash + run: .github/workflows/notify-metrics-availability.sh + env: + GRAFANA_URL: ${{ env.grafana_url }} + GH_JOB_ID: ${{ github.job }} - name: Run Warp E2E Tests shell: bash run: AVALANCHEGO_BUILD_PATH=/tmp/e2e-test/avalanchego ./scripts/run_ginkgo_warp.sh + env: + GH_REPO: ${{ github.repository }} + GH_WORKFLOW: ${{ github.workflow }} + GH_RUN_ID: ${{ github.run_id }} + GH_RUN_NUMBER: ${{ github.run_number }} + GH_RUN_ATTEMPT: ${{ github.run_attempt }} + GH_JOB_ID: ${{ github.job }} - name: Upload tmpnet network dir for warp testing if: always() uses: actions/upload-artifact@v4 with: name: warp-tmpnet-data - path: ${{ env.tmpnet_data_path }} + path: | + ~/.tmpnet/networks + ~/.tmpnet/prometheus/prometheus.log + ~/.tmpnet/promtail/promtail.log if-no-files-found: error e2e_load: name: e2e load tests @@ -154,15 +181,43 @@ jobs: - name: Build Subnet-EVM Plugin Binary shell: bash run: ./scripts/build.sh /tmp/e2e-test/avalanchego/plugins/srEXiWaHuhNyGwPUi444Tu47ZEDwxTWrbQiuD7FmgSAQ6X7Dy + - name: Start prometheus + shell: bash + run: bash -x ./scripts/run_prometheus.sh + env: + PROMETHEUS_ID: ${{ secrets.PROMETHEUS_ID }} + PROMETHEUS_PASSWORD: ${{ secrets.PROMETHEUS_PASSWORD }} + - name: Start promtail + shell: bash + run: bash -x ./scripts/run_promtail.sh + env: + LOKI_ID: ${{ secrets.LOKI_ID }} + LOKI_PASSWORD: ${{ secrets.LOKI_PASSWORD }} + - name: Notify of metrics availability + shell: bash + run: .github/workflows/notify-metrics-availability.sh + env: + GRAFANA_URL: ${{ env.grafana_url }} + GH_JOB_ID: ${{ github.job }} - name: Run E2E Load Tests shell: bash run: AVALANCHEGO_BUILD_PATH=/tmp/e2e-test/avalanchego ./scripts/run_ginkgo_load.sh + env: + GH_REPO: ${{ github.repository }} + GH_WORKFLOW: ${{ github.workflow }} + GH_RUN_ID: ${{ github.run_id }} + GH_RUN_NUMBER: ${{ github.run_number }} + GH_RUN_ATTEMPT: ${{ github.run_attempt }} + GH_JOB_ID: ${{ github.job }} - name: Upload tmpnet network dir for load testing if: always() uses: actions/upload-artifact@v4 with: name: load-tmpnet-data - path: ${{ env.tmpnet_data_path }} + path: | + ~/.tmpnet/networks + ~/.tmpnet/prometheus/prometheus.log + ~/.tmpnet/promtail/promtail.log if-no-files-found: error build_image: diff --git a/.github/workflows/notify-metrics-availability.sh b/.github/workflows/notify-metrics-availability.sh new file mode 100755 index 0000000000..fd69064045 --- /dev/null +++ b/.github/workflows/notify-metrics-availability.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Timestamps are in seconds +from_timestamp="$(date '+%s')" +monitoring_period=900 # 15 minutes +to_timestamp="$((from_timestamp + monitoring_period))" + +# Grafana expects microseconds, so pad timestamps with 3 zeros +metrics_url="${GRAFANA_URL}&var-filter=gh_job_id%7C%3D%7C${GH_JOB_ID}&from=${from_timestamp}000&to=${to_timestamp}000" + +# Optionally ensure that the link displays metrics only for the shared +# network rather than mixing it with the results for private networks. +if [[ -n "${FILTER_BY_OWNER:-}" ]]; then + metrics_url="${metrics_url}&var-filter=network_owner%7C%3D%7C${FILTER_BY_OWNER}" +fi + +echo "::notice links::metrics ${metrics_url}" diff --git a/go.mod b/go.mod index 158237581c..3510d1be17 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.21 require ( github.com/VictoriaMetrics/fastcache v1.10.0 - github.com/ava-labs/avalanchego v1.11.2 + github.com/ava-labs/avalanchego v1.11.3-0.20240318185506-35c4149e089b github.com/cespare/cp v0.1.0 github.com/davecgh/go-spew v1.1.1 github.com/deckarep/golang-set/v2 v2.1.0 @@ -15,7 +15,6 @@ require ( github.com/fsnotify/fsnotify v1.6.0 github.com/gballet/go-libpcsclite v0.0.0-20191108122812-4678299bea08 github.com/go-cmd/cmd v1.4.1 - github.com/golang/protobuf v1.5.3 github.com/google/uuid v1.6.0 github.com/gorilla/rpc v1.2.0 github.com/gorilla/websocket v1.4.2 @@ -82,6 +81,7 @@ require ( github.com/go-stack/stack v1.8.1 // indirect github.com/go-task/slim-sprig v0.0.0-20230315185526-52ccab3ef572 // indirect github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.3 // indirect github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb // indirect github.com/google/btree v1.1.2 // indirect github.com/google/go-cmp v0.6.0 // indirect diff --git a/go.sum b/go.sum index 624152a4e4..eb3211be78 100644 --- a/go.sum +++ b/go.sum @@ -56,8 +56,8 @@ github.com/ajg/form v1.5.1/go.mod h1:uL1WgH+h2mgNtvBq0339dVnzXdBETtL2LeUXaIv25UY github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156 h1:eMwmnE/GDgah4HI848JfFxHt+iPb26b4zyfspmqY0/8= github.com/allegro/bigcache v1.2.1-0.20190218064605-e24eb225f156/go.mod h1:Cb/ax3seSYIx7SuZdm2G2xzfwmv3TPSk2ucNfQESPXM= github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8= -github.com/ava-labs/avalanchego v1.11.2 h1:8iodZ+RjqpRwHdiXPPtvaNt72qravge7voGzw3yPRzg= -github.com/ava-labs/avalanchego v1.11.2/go.mod h1:oTVnF9idL57J4LM/6RByTmKhI4QvV6OCnF99ysyBljE= +github.com/ava-labs/avalanchego v1.11.3-0.20240318185506-35c4149e089b h1:1GnaPVt62+ONLpkU0gObC3x5EOw+dcbZEbjGXD9/4j0= +github.com/ava-labs/avalanchego v1.11.3-0.20240318185506-35c4149e089b/go.mod h1:Yhtr0gRX0QFBb1Y3WQtymp3vm2qIukQqbkBQ/BP6Bus= github.com/ava-labs/coreth v0.13.2-0.20240304213436-8afbf2d68461 h1:SIwGF3eVEwmexLm7is/MvG7W5sbmpGXaUT6RfUPP3jw= github.com/ava-labs/coreth v0.13.2-0.20240304213436-8afbf2d68461/go.mod h1:v24MTMbxFSvyM7YeQFyWiXjIzVo2+UVs7tgH7xrByew= github.com/aymerick/raymond v2.0.3-0.20180322193309-b565731e1464+incompatible/go.mod h1:osfaiScAUVup+UC9Nfq76eWqDhXlp+4UYaA8uhTBO6g= diff --git a/scripts/run_prometheus.sh b/scripts/run_prometheus.sh new file mode 100644 index 0000000000..1952227231 --- /dev/null +++ b/scripts/run_prometheus.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Starts a prometheus instance in agent-mode, forwarding to a central +# instance. Intended to enable metrics collection from temporary networks running +# locally and in CI. +# +# The prometheus instance will remain running in the background and will forward +# metrics to the central instance for all tmpnet networks. +# +# To stop it: +# +# $ kill -9 `cat ~/.tmpnet/prometheus/run.pid` && rm ~/.tmpnet/prometheus/run.pid +# + +# e.g., +# PROMETHEUS_ID= PROMETHEUS_PASSWORD= ./scripts/run_prometheus.sh +if ! [[ "$0" =~ scripts/run_prometheus.sh ]]; then + echo "must be run from repository root" + exit 255 +fi + +PROMETHEUS_WORKING_DIR="${HOME}/.tmpnet/prometheus" +PIDFILE="${PROMETHEUS_WORKING_DIR}"/run.pid + +# First check if an agent-mode prometheus is already running. A single instance can collect +# metrics from all local temporary networks. +if pgrep --pidfile="${PIDFILE}" -f 'prometheus.*enable-feature=agent' &> /dev/null; then + echo "prometheus is already running locally with --enable-feature=agent" + exit 0 +fi + +PROMETHEUS_URL="${PROMETHEUS_URL:-https://prometheus-experimental.avax-dev.network}" +if [[ -z "${PROMETHEUS_URL}" ]]; then + echo "Please provide a value for PROMETHEUS_URL" + exit 1 +fi + +PROMETHEUS_ID="${PROMETHEUS_ID:-}" +if [[ -z "${PROMETHEUS_ID}" ]]; then + echo "Please provide a value for PROMETHEUS_ID" + exit 1 +fi + +PROMETHEUS_PASSWORD="${PROMETHEUS_PASSWORD:-}" +if [[ -z "${PROMETHEUS_PASSWORD}" ]]; then + echo "Plase provide a value for PROMETHEUS_PASSWORD" + exit 1 +fi + +# This was the LTS version when this script was written. Probably not +# much reason to update it unless something breaks since the usage +# here is only to collect metrics from temporary networks. +VERSION="2.45.3" + +# Ensure the prometheus command is locally available +CMD=prometheus +if ! command -v "${CMD}" &> /dev/null; then + # Try to use a local version + CMD="${PWD}/bin/prometheus" + if ! command -v "${CMD}" &> /dev/null; then + echo "prometheus not found, attempting to install..." + + # Determine the arch + if which sw_vers &> /dev/null; then + echo "on macos, only amd64 binaries are available so rosetta is required on apple silicon machines." + echo "to avoid using rosetta, install via homebrew: brew install prometheus" + DIST=darwin + else + ARCH="$(uname -i)" + if [[ "${ARCH}" != "x86_64" ]]; then + echo "on linux, only amd64 binaries are available. manual installation of prometheus is required." + exit 1 + else + DIST="linux" + fi + fi + + # Install the specified release + PROMETHEUS_FILE="prometheus-${VERSION}.${DIST}-amd64" + URL="https://github.com/prometheus/prometheus/releases/download/v${VERSION}/${PROMETHEUS_FILE}.tar.gz" + curl -s -L "${URL}" | tar zxv -C /tmp > /dev/null + mkdir -p "$(dirname "${CMD}")" + cp /tmp/"${PROMETHEUS_FILE}/prometheus" "${CMD}" + fi +fi + +# Configure prometheus +FILE_SD_PATH="${PROMETHEUS_WORKING_DIR}/file_sd_configs" +mkdir -p "${FILE_SD_PATH}" + +echo "writing configuration..." +cat >"${PROMETHEUS_WORKING_DIR}"/prometheus.yaml < prometheus.log 2>&1 & +echo $! > "${PIDFILE}" +echo "running with pid $(cat "${PIDFILE}")" diff --git a/scripts/run_promtail.sh b/scripts/run_promtail.sh new file mode 100755 index 0000000000..9b386d3d55 --- /dev/null +++ b/scripts/run_promtail.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash + +set -euo pipefail + +# Starts a promtail instance to collect logs from temporary networks +# running locally and in CI. +# +# The promtail instance will remain running in the background and will forward +# logs to the central instance for all tmpnet networks. +# +# To stop it: +# +# $ kill -9 `cat ~/.tmpnet/promtail/run.pid` && rm ~/.tmpnet/promtail/run.pid +# + +# e.g., +# LOKI_ID= LOKI_PASSWORD= ./scripts/run_promtail.sh +if ! [[ "$0" =~ scripts/run_promtail.sh ]]; then + echo "must be run from repository root" + exit 255 +fi + +PROMTAIL_WORKING_DIR="${HOME}/.tmpnet/promtail" +PIDFILE="${PROMTAIL_WORKING_DIR}"/run.pid + +# First check if promtail is already running. A single instance can +# collect logs from all local temporary networks. +if pgrep --pidfile="${PIDFILE}" &> /dev/null; then + echo "promtail is already running" + exit 0 +fi + +LOKI_URL="${LOKI_URL:-https://loki-experimental.avax-dev.network}" +if [[ -z "${LOKI_URL}" ]]; then + echo "Please provide a value for LOKI_URL" + exit 1 +fi + +LOKI_ID="${LOKI_ID:-}" +if [[ -z "${LOKI_ID}" ]]; then + echo "Please provide a value for LOKI_ID" + exit 1 +fi + +LOKI_PASSWORD="${LOKI_PASSWORD:-}" +if [[ -z "${LOKI_PASSWORD}" ]]; then + echo "Plase provide a value for LOKI_PASSWORD" + exit 1 +fi + +# Version as of this writing +VERSION="v2.9.5" + +# Ensure the promtail command is locally available +CMD=promtail +if ! command -v "${CMD}" &> /dev/null; then + # Try to use a local version + CMD="${PWD}/bin/promtail" + if ! command -v "${CMD}" &> /dev/null; then + echo "promtail not found, attempting to install..." + # Determine the arch + if which sw_vers &> /dev/null; then + DIST="darwin-$(uname -m)" + else + ARCH="$(uname -i)" + if [[ "${ARCH}" == "aarch64" ]]; then + ARCH="arm64" + elif [[ "${ARCH}" == "x86_64" ]]; then + ARCH="amd64" + fi + DIST="linux-${ARCH}" + fi + + # Install the specified release + PROMTAIL_FILE="promtail-${DIST}" + ZIP_PATH="/tmp/${PROMTAIL_FILE}.zip" + BIN_DIR="$(dirname "${CMD}")" + URL="https://github.com/grafana/loki/releases/download/${VERSION}/promtail-${DIST}.zip" + curl -L -o "${ZIP_PATH}" "${URL}" + unzip "${ZIP_PATH}" -d "${BIN_DIR}" + mv "${BIN_DIR}/${PROMTAIL_FILE}" "${CMD}" + fi +fi + +# Configure promtail +FILE_SD_PATH="${PROMTAIL_WORKING_DIR}/file_sd_configs" +mkdir -p "${FILE_SD_PATH}" + +echo "writing configuration..." +cat >"${PROMTAIL_WORKING_DIR}"/promtail.yaml < promtail.log 2>&1 & +echo $! > "${PIDFILE}" +echo "running with pid $(cat "${PIDFILE}")" diff --git a/tests/README.md b/tests/README.md index a99cac0b6f..0b0cf9d6d4 100644 --- a/tests/README.md +++ b/tests/README.md @@ -29,3 +29,11 @@ The network started by `tmpnetctl` won't come with subnets configured, so the test suite will add them to the network the first time it runs. Subsequent test runs will be able to reuse those subnets without having to set them up. + +## Collection of logs and metrics + +Logs and metrics can be optionally collected for tmpnet networks and +viewed in grafana. The details of configuration and usage for +subnet-evm mirror those of avalanchego and the same +[documentation](https://github.com/ava-labs/avalanchego/blob/master/tests/fixture/tmpnet/README.md#Monitoring) +applies. diff --git a/tests/load/load_test.go b/tests/load/load_test.go index 5a2b5c21e5..99094291ce 100644 --- a/tests/load/load_test.go +++ b/tests/load/load_test.go @@ -68,6 +68,7 @@ var _ = ginkgo.Describe("[Load Simulator]", ginkgo.Ordered, func() { env = e2e.NewTestEnvironment( flagVars, utils.NewTmpnetNetwork( + "subnet-evm-small-load", nodes, tmpnet.FlagsMap{ // The default tmpnet log level (debug) induces too much overhead for load testing. diff --git a/tests/utils/tmpnet.go b/tests/utils/tmpnet.go index af1e24908a..babb70bc58 100644 --- a/tests/utils/tmpnet.go +++ b/tests/utils/tmpnet.go @@ -24,7 +24,7 @@ func NewTmpnetNodes(count int) []*tmpnet.Node { return nodes } -func NewTmpnetNetwork(nodes []*tmpnet.Node, flags tmpnet.FlagsMap, subnets ...*tmpnet.Subnet) *tmpnet.Network { +func NewTmpnetNetwork(owner string, nodes []*tmpnet.Node, flags tmpnet.FlagsMap, subnets ...*tmpnet.Subnet) *tmpnet.Network { defaultFlags := tmpnet.FlagsMap{} defaultFlags.SetDefaults(flags) defaultFlags.SetDefaults(tmpnet.FlagsMap{ @@ -33,6 +33,7 @@ func NewTmpnetNetwork(nodes []*tmpnet.Node, flags tmpnet.FlagsMap, subnets ...*t config.ProposerVMUseCurrentHeightKey: true, }) return &tmpnet.Network{ + Owner: owner, DefaultFlags: defaultFlags, Nodes: nodes, Subnets: subnets, diff --git a/tests/warp/warp_test.go b/tests/warp/warp_test.go index 3475aecbc8..df97f1c9c5 100644 --- a/tests/warp/warp_test.go +++ b/tests/warp/warp_test.go @@ -104,6 +104,7 @@ var _ = ginkgo.SynchronizedBeforeSuite(func() []byte { env := e2e.NewTestEnvironment( flagVars, utils.NewTmpnetNetwork( + "subnet-evm-warp-e2e", nodes, tmpnet.FlagsMap{}, utils.NewTmpnetSubnet(subnetAName, genesisPath, chainConfig, nodes...),