From 66454fe177a0e770d8c2f9dbf923ae45b3cfb41d Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 16 Sep 2024 14:09:32 +0100 Subject: [PATCH 1/2] ref(ci): consolidate cached states workflows and scripts We've been using multiple approaches to locate and retrieve cached states in GCP. However, this has made it difficult to reuse the same methods across new workflows or different scenarios. To address this, we've streamlined the process to make it more reusable in other contexts. This change will support deploying instances from both the `main` branch and `release`, simplifying future implementations and speeding up the process. Changes: - Use a single bash script (`gcp-get-cached-disks.sh`) to get cached states names and availability - Move script logic from `sub-find-cached-disks.yml` to `gcp-get-cached-disks.sh` and adapt `sub-find-cached-disks.yml` to allow to output available disks and disks names. - Simplify parameters usage in `sub-deploy-integration-tests-gcp.yml` and convert the `Find ${{ inputs.test_id }} cached state disk` step into an independent job, to be able to use the `sub-find-cached-disks.yml` reusable workflow - Remove repetition in `sub-ci-integration-tests-gcp.yml` --- .../scripts/gcp-get-available-disks.sh | 42 ------- .../workflows/scripts/gcp-get-cached-disks.sh | 114 ++++++++++++------ .../sub-deploy-integration-tests-gcp.yml | 109 +++++++---------- .github/workflows/sub-find-cached-disks.yml | 73 +++++------ 4 files changed, 154 insertions(+), 184 deletions(-) delete mode 100755 .github/workflows/scripts/gcp-get-available-disks.sh diff --git a/.github/workflows/scripts/gcp-get-available-disks.sh b/.github/workflows/scripts/gcp-get-available-disks.sh deleted file mode 100755 index 667c6f36c4b..00000000000 --- a/.github/workflows/scripts/gcp-get-available-disks.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env bash - -# Description: -# Check if there are cached state disks available for subsequent jobs to use. -# -# This lookup uses the state version from constants.rs. -# It accepts disks generated by any branch, including draft and unmerged PRs. -# -# If the disk exists, sets the corresponding output to "true": -# - lwd_tip_disk -# - zebra_tip_disk -# - zebra_checkpoint_disk - -set -euxo pipefail - - -LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1) -echo "STATE_VERSION: ${LOCAL_STATE_VERSION}" - -# Function to find a disk image and output its name -find_disk_image() { -local base_name="${1}" -local disk_type="${2}" -local disk_pattern="${base_name}-cache" -local output_var="${base_name}_${disk_type}_disk" -local disk_image - -disk_image=$(gcloud compute images list --filter="status=READY AND name~${disk_pattern}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${disk_type}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) - -if [[ -z "${disk_image}" ]]; then - echo "No ${disk_type^^} disk found for ${base_name^^} on network: ${NETWORK}" - echo "${output_var}=false" >> "${GITHUB_OUTPUT}" -else - echo "Disk: ${disk_image}" - echo "${output_var}=true" >> "${GITHUB_OUTPUT}" -fi -} - -# Find and output LWD and Zebra disks -find_disk_image "lwd" "tip" -find_disk_image "zebrad" "tip" -find_disk_image "zebrad" "checkpoint" diff --git a/.github/workflows/scripts/gcp-get-cached-disks.sh b/.github/workflows/scripts/gcp-get-cached-disks.sh index 9b05c257096..0f38addf10f 100755 --- a/.github/workflows/scripts/gcp-get-cached-disks.sh +++ b/.github/workflows/scripts/gcp-get-cached-disks.sh @@ -1,20 +1,33 @@ #!/usr/bin/env bash -# Description: # This script finds a cached Google Cloud Compute image based on specific criteria. -# It prioritizes images from the current commit, falls back to the main branch, -# and finally checks other branches if needed. The selected image is used for -# setting up the environment in a CI/CD pipeline. +# +# If there are multiple disks: +# - prefer images generated from the same commit, then +# - if prefer_main_cached_state is true, prefer images from the `main` branch, then +# - use any images from any other branch or commit. +# +# Within each of these categories: +# - prefer newer images to older images +# +# The selected image is used for setting up the environment in a CI/CD pipeline. +# It also checks if specific disk types are available for subsequent jobs. set -eo pipefail -# Function to find and report a cached disk image +# Extract local state version +echo "Extracting local state version..." +LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1) +echo "STATE_VERSION: ${LOCAL_STATE_VERSION}" + +# Function to find a cached disk image based on the git pattern (commit, main, or any branch) find_cached_disk_image() { - local search_pattern="${1}" + local git_pattern="${1}" local git_source="${2}" local disk_name + local disk_search_pattern="${DISK_PREFIX}-${git_pattern}-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}" - disk_name=$(gcloud compute images list --filter="status=READY AND name~${search_pattern}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) + disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_search_pattern}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) # Use >&2 to redirect to stderr and avoid sending wrong assignments to stdout if [[ -n "${disk_name}" ]]; then @@ -27,46 +40,71 @@ find_cached_disk_image() { fi } -# Extract local state version -echo "Extracting local state version..." -LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "${GITHUB_WORKSPACE}/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1) -echo "STATE_VERSION: ${LOCAL_STATE_VERSION}" +# Check if both $DISK_PREFIX and $DISK_SUFFIX are set, as they are required to find a cached disk image +if [[ -n "${DISK_PREFIX}" && -n "${DISK_SUFFIX}" ]]; then + # Find the most suitable cached disk image + echo "Finding the most suitable cached disk image..." + CACHED_DISK_NAME="" + + # First, try to find a cached disk image from the current commit + CACHED_DISK_NAME=$(find_cached_disk_image ".+-${GITHUB_SHA_SHORT}" "commit") -# Define DISK_PREFIX based on the requiring state directory -if [[ "${NEEDS_LWD_STATE}" == "true" ]]; then - DISK_PREFIX="${LWD_STATE_DIR}" + # If no cached disk image is found + if [[ -z "${CACHED_DISK_NAME}" ]]; then + # Check if main branch images are preferred + if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then + CACHED_DISK_NAME=$(find_cached_disk_image "main-[0-9a-f]+" "main branch") + # Else, try to find one from any branch + else + CACHED_DISK_NAME=$(find_cached_disk_image ".+-[0-9a-f]+" "any branch") + fi + fi + + # Handle case where no suitable disk image is found + if [[ -z "${CACHED_DISK_NAME}" ]]; then + echo "No suitable cached state disk available." + echo "Cached state test jobs must depend on the cached state rebuild job." + exit 1 + fi + + echo "Selected Disk: ${CACHED_DISK_NAME}" else - DISK_PREFIX="${ZEBRA_STATE_DIR:-${DISK_PREFIX}}" + echo "DISK_PREFIX or DISK_SUFFIX is not set. Skipping disk image search." fi -# Find the most suitable cached disk image -echo "Finding the most suitable cached disk image..." -if [[ -z "${CACHED_DISK_NAME}" ]]; then - # Try to find a cached disk image from the current commit - COMMIT_DISK_PREFIX="${DISK_PREFIX}-.+-${GITHUB_SHA_SHORT}-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}" - CACHED_DISK_NAME=$(find_cached_disk_image "${COMMIT_DISK_PREFIX}" "commit") - # If no cached disk image is found, try to find one from the main branch - if [[ "${PREFER_MAIN_CACHED_STATE}" == "true" ]]; then - MAIN_DISK_PREFIX="${DISK_PREFIX}-main-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}" - CACHED_DISK_NAME=$(find_cached_disk_image "${MAIN_DISK_PREFIX}" "main branch") - # Else, try to find one from any branch +# Function to find and output available disk image types (e.g., lwd_tip_disk, zebra_tip_disk, zebra_checkpoint_disk) +find_available_disk_type() { + local base_name="${1}" + local disk_type="${2}" + local disk_pattern="${base_name}-cache" + local output_var="${base_name}_${disk_type}_disk" + local disk_name + + disk_name=$(gcloud compute images list --filter="status=READY AND name~${disk_pattern}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${disk_type}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) + + # Use >&2 to redirect to stderr and avoid sending wrong assignments to stdout + if [[ -n "${disk_name}" ]]; then + echo "Found ${disk_type^^} disk: ${disk_name} for ${base_name^^} on network: ${NETWORK}" >&2 + disk_description=$(gcloud compute images describe "${disk_name}" --format="value(DESCRIPTION)") + echo "Description: ${disk_description}" >&2 + echo "true" # This is the actual return value when a disk is found else - ANY_DISK_PREFIX="${DISK_PREFIX}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-${DISK_SUFFIX}" - CACHED_DISK_NAME=$(find_cached_disk_image "${ANY_DISK_PREFIX}" "any branch") + echo "No ${disk_type^^} disk found for ${base_name^^} on network: ${NETWORK}" >&2 + echo "false" # This is the actual return value when no disk is found fi +} +if [[ -n "${NETWORK}" ]]; then + # Check for specific disk images (lwd_tip_disk, zebra_tip_disk, zebra_checkpoint_disk) + echo "Checking for specific disk images..." + LWD_TIP_DISK=$(find_available_disk_type "lwd" "tip") + ZEBRA_TIP_DISK=$(find_available_disk_type "zebrad" "tip") + ZEBRA_CHECKPOINT_DISK=$(find_available_disk_type "zebrad" "checkpoint") fi -# Handle case where no suitable disk image is found -if [[ -z "${CACHED_DISK_NAME}" ]]; then - echo "No suitable cached state disk available." - echo "Expected pattern: ${COMMIT_DISK_PREFIX}" - echo "Cached state test jobs must depend on the cached state rebuild job." - exit 1 -fi - -echo "Selected Disk: ${CACHED_DISK_NAME}" - # Exporting variables for subsequent steps echo "Exporting variables for subsequent steps..." export CACHED_DISK_NAME="${CACHED_DISK_NAME}" export LOCAL_STATE_VERSION="${LOCAL_STATE_VERSION}" +export LWD_TIP_DISK="${LWD_TIP_DISK}" +export ZEBRA_TIP_DISK="${ZEBRA_TIP_DISK}" +export ZEBRA_CHECKPOINT_DISK="${ZEBRA_CHECKPOINT_DISK}" diff --git a/.github/workflows/sub-deploy-integration-tests-gcp.yml b/.github/workflows/sub-deploy-integration-tests-gcp.yml index 09af3dd310c..4d1d346ff1d 100644 --- a/.github/workflows/sub-deploy-integration-tests-gcp.yml +++ b/.github/workflows/sub-deploy-integration-tests-gcp.yml @@ -35,23 +35,15 @@ on: # Cached state # - # TODO: find a better name - root_state_path: - required: false - type: string - default: '/zebrad-cache' - description: 'Cached state base directory path' - # TODO: find a better name zebra_state_dir: required: false type: string - default: '' + default: '/var/cache/zebrad-cache' description: 'Zebra cached state directory and input image prefix to search in GCP' - # TODO: find a better name lwd_state_dir: required: false type: string - default: '' + default: '/var/cache/lwd-cache' description: 'Lightwalletd cached state directory and input image prefix to search in GCP' disk_prefix: required: false @@ -61,6 +53,7 @@ on: disk_suffix: required: false type: string + default: 'tip' description: 'Image name suffix' needs_zebra_state: required: true @@ -104,6 +97,29 @@ env: CACHED_STATE_UPDATE_LIMIT: 576 jobs: + # Find a cached state disk for ${{ inputs.test_id }}, matching all of: + # - disk cached state prefix -> zebrad-cache or lwd-cache + # - state version (from the source code) - v{N} + # - network (network) - mainnet or testnet + # - disk target height kind (disk_suffix) - checkpoint or tip + # + # If the test needs a lightwalletd state (needs_lwd_state) set the input disk_prefix accordingly + # - To lwd-cache if needed + # - To zebrad-cache if not + # + # Passes the disk name to subsequent jobs using `cached_disk_name` output + # Passes the state version to subsequent jobs using `state_version` output + # + get-disk-name: + name: Get disk name + uses: ./.github/workflows/sub-find-cached-disks.yml + with: + network: ${{ inputs.network || vars.ZCASH_NETWORK }} + disk_prefix: ${{ inputs.needs_lwd_state && 'lwd-cache' || inputs.needs_zebra_state && 'zebrad-cache' }} + disk_suffix: ${{ inputs.disk_suffix }} + prefer_main_cached_state: ${{ inputs.prefer_main_cached_state }} + test_id: ${{ inputs.test_id }} + # Show all the test logs, then follow the logs of the test we just launched, until it finishes. # Then check the result of the test. # @@ -111,9 +127,14 @@ jobs: test-result: name: Run ${{ inputs.test_id }} test runs-on: zfnd-runners + needs: [ get-disk-name ] + if: ${{ !cancelled() && !failure() }} timeout-minutes: ${{ inputs.is_long_test && 7200 || 180 }} outputs: - cached_disk_name: ${{ steps.get-disk-name.outputs.cached_disk_name }} + cached_disk_name: ${{ needs.get-disk-name.outputs.cached_disk_name }} + state_version: ${{ needs.get-disk-name.outputs.state_version }} + env: + CACHED_DISK_NAME: ${{ needs.get-disk-name.outputs.cached_disk_name }} permissions: contents: 'read' id-token: 'write' @@ -158,47 +179,8 @@ jobs: - name: Set up Cloud SDK uses: google-github-actions/setup-gcloud@v2.1.1 - # Find a cached state disk for this job, matching all of: - # - disk cached state (lwd_state_dir/zebra_state_dir or disk_prefix) - zebrad-cache or lwd-cache - # - state version (from the source code) - v{N} - # - network (network) - mainnet or testnet - # - disk target height kind (disk_suffix) - checkpoint or tip - # - # If the test needs a lightwalletd state (needs_lwd_state) set the variable DISK_PREFIX accordingly - # - To ${{ inputs.lwd_state_dir }}" if needed - # - To ${{ inputs.zebra_state_dir || inputs.disk_prefix }} if not - # - # If there are multiple disks: - # - prefer images generated from the same commit, then - # - if prefer_main_cached_state is true, prefer images from the `main` branch, then - # - use any images from any other branch or commit. - # Within each of these categories: - # - prefer newer images to older images - # - # Passes the disk name to subsequent steps using $CACHED_DISK_NAME env variable - # Passes the state version to subsequent steps using $STATE_VERSION env variable - # - # TODO: move this script into a file, and call it from sub-find-cached-disks.yml as well. - - name: Find ${{ inputs.test_id }} cached state disk - id: get-disk-name - if: ${{ inputs.needs_zebra_state || inputs.needs_lwd_state }} - env: - GITHUB_SHA_SHORT: ${{ env.GITHUB_SHA_SHORT }} - NEEDS_LWD_STATE: ${{ inputs.needs_lwd_state }} - LWD_STATE_DIR: ${{ inputs.lwd_state_dir }} - ZEBRA_STATE_DIR: ${{ inputs.zebra_state_dir }} - DISK_PREFIX: ${{ inputs.disk_prefix }} - NETWORK: ${{ env.NETWORK }} # use lowercase version from env, not input - DISK_SUFFIX: ${{ inputs.disk_suffix }} - PREFER_MAIN_CACHED_STATE: ${{ inputs.prefer_main_cached_state }} - run: | - source ./.github/workflows/scripts/gcp-get-cached-disks.sh - echo "STATE_VERSION=${LOCAL_STATE_VERSION}" >> "${GITHUB_ENV}" - echo "CACHED_DISK_NAME=${CACHED_DISK_NAME}" >> "${GITHUB_ENV}" - echo "cached_disk_name=${CACHED_DISK_NAME}" >> "${GITHUB_OUTPUT}" - # Create a Compute Engine virtual machine and attach a cached state disk using the - # $CACHED_DISK_NAME variable as the source image to populate the disk cached state + # $CACHED_DISK_NAME env as the source image to populate the disk cached state # if the test needs it. - name: Create ${{ inputs.test_id }} GCP compute instance id: create-instance @@ -256,8 +238,7 @@ jobs: # # The disk mounted in the VM is located at /dev/$DISK_NAME, we mount the root `/` of this disk to the docker # container, and might have two different paths (if lightwalletd state is needed): - # - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR - # - /var/cache/lwd-cache -> ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} -> $LIGHTWALLETD_DATA_DIR + # - ${{ inputs.zebra_state_dir }} and ${{ inputs.lwd_state_dir }} # # Currently we do this by mounting the same disk at both paths. # @@ -268,7 +249,7 @@ jobs: # These paths must match the variables used by the tests in Rust, which are also set in # `ci-unit-tests-docker.yml` to be able to run this tests. # - # Although we're mounting the disk root to both directories, Zebra and Lightwalletd + # Although we're mounting the disk root to both directories, Zebra and Lightwalletd, tests # will only respect the values from $ZEBRA_CACHED_STATE_DIR and $LIGHTWALLETD_DATA_DIR, # the inputs like ${{ inputs.zebra_state_dir }} and ${{ inputs.lwd_state_dir }} # are only used to match those variables paths. @@ -286,12 +267,12 @@ jobs: # Extract the correct disk name based on the device-name DISK_NAME=$(ls -l /dev/disk/by-id | grep -oE "google-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} -> ../../[^ ]+" | grep -oE "/[^/]+$" | cut -c 2-) - MOUNT_FLAGS="--mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }}" + MOUNT_FLAGS="--mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.zebra_state_dir }}" # Check if we need to mount for Lightwalletd state # lightwalletd-full-sync reads Zebra and writes lwd, so it is handled specially. if [[ "${{ inputs.needs_lwd_state }}" == "true" || "${{ inputs.test_id }}" == "lwd-full-sync" ]]; then - MOUNT_FLAGS="$MOUNT_FLAGS --mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }}" + MOUNT_FLAGS="$MOUNT_FLAGS --mount type=volume,volume-driver=local,volume-opt=device=/dev/$DISK_NAME,volume-opt=type=ext4,dst=${{ inputs.lwd_state_dir }}" fi sudo docker run \ @@ -401,6 +382,9 @@ jobs: # Normally, if a job is skipped, all the jobs that depend on it are also skipped. # So we need to override the default success() check to make this job run. if: ${{ !cancelled() && !failure() && (inputs.saves_to_disk || inputs.force_save_to_disk) }} + env: + STATE_VERSION: ${{ needs.test-result.outputs.state_version }} + CACHED_DISK_NAME: ${{ needs.test-result.outputs.cached_disk_name }} permissions: contents: 'read' id-token: 'write' @@ -457,17 +441,6 @@ jobs: - name: Set up Cloud SDK uses: google-github-actions/setup-gcloud@v2.1.1 - # Get the state version from the local constants.rs file to be used in the image creation, - # as the state version is part of the disk image name. - # - # Passes the state version to subsequent steps using $STATE_VERSION env variable - - name: Get state version from constants.rs - run: | - LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1) - echo "STATE_VERSION: $LOCAL_STATE_VERSION" - - echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> "$GITHUB_ENV" - # Sets the $UPDATE_SUFFIX env var to "-u" if updating a previous cached state, # and the empty string otherwise. # @@ -641,7 +614,7 @@ jobs: - name: Get original cached state height from google cloud run: | ORIGINAL_HEIGHT="0" - ORIGINAL_DISK_NAME="${{ format('{0}', needs.test-result.outputs.cached_disk_name) }}" + ORIGINAL_DISK_NAME="${{ format('{0}', env.CACHED_DISK_NAME) }}" if [[ -n "$ORIGINAL_DISK_NAME" ]]; then ORIGINAL_HEIGHT=$(gcloud compute images list --filter="status=READY AND name=$ORIGINAL_DISK_NAME" --format="value(labels.height)") diff --git a/.github/workflows/sub-find-cached-disks.yml b/.github/workflows/sub-find-cached-disks.yml index 79fdbff8efb..00254c14be5 100644 --- a/.github/workflows/sub-find-cached-disks.yml +++ b/.github/workflows/sub-find-cached-disks.yml @@ -14,22 +14,43 @@ on: description: 'The Zcash network used to look up the disks' required: true type: string + disk_prefix: + required: false + type: string + disk_suffix: + required: false + type: string + prefer_main_cached_state: + required: false + type: boolean + test_id: + description: 'The test ID requiring the cached state disks' + required: false + type: string outputs: + state_version: + description: 'The version of the cached state disks' + value: ${{ jobs.get-cached-disks.outputs.state_version }} + cached_disk_name: + description: 'The name of the cached state disk' + value: ${{ jobs.get-cached-disks.outputs.cached_disk_name }} lwd_tip_disk: description: 'true if there is a lightwalletd and Zebra cached state disk, synced near the chain tip' - value: ${{ jobs.get-available-disks.outputs.lwd_tip_disk }} + value: ${{ jobs.get-cached-disks.outputs.lwd_tip_disk }} zebra_tip_disk: description: 'true if there is a Zebra cached state disk synced near the chain tip' - value: ${{ jobs.get-available-disks.outputs.zebra_tip_disk }} + value: ${{ jobs.get-cached-disks.outputs.zebra_tip_disk }} zebra_checkpoint_disk: description: 'true if there is a Zebra cached state disk synced to the mandatory Zebra checkpoint' - value: ${{ jobs.get-available-disks.outputs.zebra_checkpoint_disk }} + value: ${{ jobs.get-cached-disks.outputs.zebra_checkpoint_disk }} jobs: - get-available-disks: - name: Check if cached state disks exist + get-cached-disks: + name: Get ${{ inputs.test_id || inputs.network }} cached disk runs-on: ubuntu-latest outputs: + state_version: ${{ steps.get-available-disks.outputs.state_version }} + cached_disk_name: ${{ steps.get-available-disks.outputs.cached_disk_name }} lwd_tip_disk: ${{ steps.get-available-disks.outputs.lwd_tip_disk }} zebra_tip_disk: ${{ steps.get-available-disks.outputs.zebra_tip_disk }} zebra_checkpoint_disk: ${{ steps.get-available-disks.outputs.zebra_checkpoint_disk }} @@ -63,38 +84,18 @@ jobs: echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV # Check if there are cached state disks available for subsequent jobs to use. - - name: Check if cached state disks exist + - name: Check if cached state disks exists id: get-available-disks env: - GITHUB_WORKSPACE: ${{ env.GITHUB_WORKSPACE }} + GITHUB_SHA_SHORT: ${{ env.GITHUB_SHA_SHORT }} NETWORK: ${{ env.NETWORK }} # use lowercase version from env, not input - # TODO: Use the `gcp-get-available-disks.sh` script instead of the inline script, - # as this is crashing. And it might related to the returned JSON values. + DISK_PREFIX: ${{ inputs.disk_prefix }} + DISK_SUFFIX: ${{ inputs.disk_suffix }} + PREFER_MAIN_CACHED_STATE: ${{ inputs.prefer_main_cached_state }} run: | - # ./.github/workflows/scripts/gcp-get-available-disks.sh - LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1) - echo "STATE_VERSION: $LOCAL_STATE_VERSION" - LWD_TIP_DISK=$(gcloud compute images list --filter="status=READY AND name~lwd-cache-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-tip" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) - if [[ -z "$LWD_TIP_DISK" ]]; then - echo "No TIP disk found for lightwalletd on network: ${NETWORK}" - echo "lwd_tip_disk=${{ toJSON(false) }}" >> "$GITHUB_OUTPUT" - else - echo "Disk: $LWD_TIP_DISK" - echo "lwd_tip_disk=${{ toJSON(true) }}" >> "$GITHUB_OUTPUT" - fi - ZEBRA_TIP_DISK=$(gcloud compute images list --filter="status=READY AND name~zebrad-cache-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-tip" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) - if [[ -z "$ZEBRA_TIP_DISK" ]]; then - echo "No TIP disk found for Zebra on network: ${NETWORK}" - echo "zebra_tip_disk=${{ toJSON(false) }}" >> "$GITHUB_OUTPUT" - else - echo "Disk: $ZEBRA_TIP_DISK" - echo "zebra_tip_disk=${{ toJSON(true) }}" >> "$GITHUB_OUTPUT" - fi - ZEBRA_CHECKPOINT_DISK=$(gcloud compute images list --filter="status=READY AND name~zebrad-cache-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-${NETWORK}-checkpoint" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) - if [[ -z "$ZEBRA_CHECKPOINT_DISK" ]]; then - echo "No CHECKPOINT disk found for Zebra on network: ${NETWORK}" - echo "zebra_checkpoint_disk=${{ toJSON(false) }}" >> "$GITHUB_OUTPUT" - else - echo "Disk: $ZEBRA_CHECKPOINT_DISK" - echo "zebra_checkpoint_disk=${{ toJSON(true) }}" >> "$GITHUB_OUTPUT" - fi + source ./.github/workflows/scripts/gcp-get-cached-disks.sh + echo "state_version=${LOCAL_STATE_VERSION}" >> "${GITHUB_OUTPUT}" + echo "cached_disk_name=${CACHED_DISK_NAME}" >> "${GITHUB_OUTPUT}" + echo "lwd_tip_disk=${LWD_TIP_DISK}" >> "${GITHUB_OUTPUT}" + echo "zebra_tip_disk=${ZEBRA_TIP_DISK}" >> "${GITHUB_OUTPUT}" + echo "zebra_checkpoint_disk=${ZEBRA_CHECKPOINT_DISK}" >> "${GITHUB_OUTPUT}" From b09411b11025c43fd75b1a56fdd6e126edc6f137 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Mon, 16 Sep 2024 14:19:26 +0100 Subject: [PATCH 2/2] ref(tests): Use the `ZEBRA_CACHED_STATE_DIR` env var across tests We had a technical debt with some tests using a hardcoded value for the cache directory (`/zebrad-cache`), which generated inconsistency across disks and cached states directories. Changes: - Allow sync tests to use the `ZEBRA_CACHED_STATE_DIR` as the cache directory, if specified - Update the `entrypoint.sh` to reflect this change - Add the `ZEBRA_CACHED_STATE_DIR` variable to the missing tests in `sub-ci-integration-tests-gcp.yml`, and remove extra parameters to call reusable workflows. --- .../sub-ci-integration-tests-gcp.yml | 54 +++---------------- docker/entrypoint.sh | 14 ++--- zebrad/tests/acceptance.rs | 24 +++++---- zebrad/tests/common/sync.rs | 16 ++++-- 4 files changed, 40 insertions(+), 68 deletions(-) diff --git a/.github/workflows/sub-ci-integration-tests-gcp.yml b/.github/workflows/sub-ci-integration-tests-gcp.yml index 76cb168feb9..3ff5ab1e79a 100644 --- a/.github/workflows/sub-ci-integration-tests-gcp.yml +++ b/.github/workflows/sub-ci-integration-tests-gcp.yml @@ -31,6 +31,10 @@ on: #! #! The job names in `ci-integration-tests-gcp.yml`, `ci-integration-tests-gcp.patch.yml` and #! `ci-integration-tests-gcp.patch-external.yml` must be kept in sync. +#! +#! The test variables ZEBRA_CACHED_STATE_DIR and LIGHTWALLETD_DATA_DIR used in some steps are set in the +#! `sub-deploy-integration-tests-gcp.yml` workflow file as inputs. If modified in this file, they must +#! also be updated in the `sub-deploy-integration-tests-gcp.yml` file. jobs: # to also run a job on Mergify head branches, # add `|| (github.event_name == 'push' && startsWith(github.head_ref, 'mergify/merge-queue/'))`: @@ -79,7 +83,7 @@ jobs: app_name: zebrad test_id: sync-to-checkpoint test_description: Test sync up to mandatory checkpoint - test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_DISK_REBUILD=1 -e ZEBRA_FORCE_USE_COLOR=1" + test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_DISK_REBUILD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache" needs_zebra_state: false saves_to_disk: true force_save_to_disk: ${{ inputs.force_save_to_disk || false }} @@ -108,7 +112,7 @@ jobs: app_name: zebrad test_id: sync-past-checkpoint test_description: Test full validation sync from a cached state - test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_CHECKPOINT_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1" + test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_CHECKPOINT_SYNC=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache" needs_zebra_state: true saves_to_disk: false disk_suffix: checkpoint @@ -138,13 +142,12 @@ jobs: test_description: Test a full sync up to the tip # The value of FULL_SYNC_MAINNET_TIMEOUT_MINUTES is currently ignored. # TODO: update the test to use {{ input.network }} instead? - test_variables: "-e NETWORK=Mainnet -e FULL_SYNC_MAINNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1" + test_variables: "-e NETWORK=Mainnet -e FULL_SYNC_MAINNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache" # This test runs for longer than 6 hours, so it needs multiple jobs is_long_test: true needs_zebra_state: false saves_to_disk: true force_save_to_disk: ${{ inputs.force_save_to_disk || false }} - disk_suffix: tip height_grep_text: 'current_height.*=.*Height.*\(' secrets: inherit # We want to prevent multiple full zebrad syncs running at the same time, @@ -184,9 +187,6 @@ jobs: # update the disk on every PR, to increase CI speed saves_to_disk: true force_save_to_disk: ${{ inputs.force_save_to_disk || false }} - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" height_grep_text: 'current_height.*=.*Height.*\(' secrets: inherit @@ -217,9 +217,6 @@ jobs: needs_zebra_state: true # test-update-sync updates the disk on every PR, so we don't need to do it here saves_to_disk: false - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" height_grep_text: 'current_height.*=.*Height.*\(' secrets: inherit @@ -248,7 +245,7 @@ jobs: test_id: full-sync-testnet test_description: Test a full sync up to the tip on testnet # The value of FULL_SYNC_TESTNET_TIMEOUT_MINUTES is currently ignored. - test_variables: "-e NETWORK=Testnet -e FULL_SYNC_TESTNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1" + test_variables: "-e NETWORK=Testnet -e FULL_SYNC_TESTNET_TIMEOUT_MINUTES=0 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache" network: "Testnet" # A full testnet sync could take 2-10 hours in April 2023. # The time varies a lot due to the small number of nodes. @@ -256,7 +253,6 @@ jobs: needs_zebra_state: false saves_to_disk: true force_save_to_disk: ${{ inputs.force_save_to_disk || false }} - disk_suffix: tip height_grep_text: 'current_height.*=.*Height.*\(' secrets: inherit # We want to prevent multiple full zebrad syncs running at the same time, @@ -300,9 +296,6 @@ jobs: # we don't have a test-update-sync-testnet job, so we need to update the disk here saves_to_disk: true force_save_to_disk: ${{ inputs.force_save_to_disk || false }} - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" height_grep_text: 'zebra_tip_height.*=.*Height.*\(' secrets: inherit @@ -335,10 +328,6 @@ jobs: saves_to_disk: true force_save_to_disk: ${{ inputs.force_save_to_disk || false }} disk_prefix: lwd-cache - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" - lwd_state_dir: "lwd-cache" height_grep_text: "Waiting for block: " secrets: inherit # We want to prevent multiple lightwalletd full syncs running at the same time, @@ -372,10 +361,6 @@ jobs: saves_to_disk: true force_save_to_disk: ${{ inputs.force_save_to_disk || false }} disk_prefix: lwd-cache - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" - lwd_state_dir: "lwd-cache" height_grep_text: "Waiting for block: " secrets: inherit @@ -401,9 +386,6 @@ jobs: test_variables: "-e NETWORK=${{ inputs.network || vars.ZCASH_NETWORK }} -e TEST_LWD_RPC_CALL=1 -e ZEBRA_TEST_LIGHTWALLETD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache" needs_zebra_state: true saves_to_disk: false - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" secrets: inherit # Test that Zebra can handle a lightwalletd send transaction RPC call, using a cached Zebra tip state @@ -427,10 +409,6 @@ jobs: needs_zebra_state: true needs_lwd_state: true saves_to_disk: false - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" - lwd_state_dir: "lwd-cache" secrets: inherit # Test that Zebra can handle gRPC wallet calls, using a cached Zebra tip state @@ -454,10 +432,6 @@ jobs: needs_zebra_state: true needs_lwd_state: true saves_to_disk: false - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" - lwd_state_dir: "lwd-cache" secrets: inherit ## getblocktemplate-rpcs using cached Zebra state on mainnet @@ -485,9 +459,6 @@ jobs: needs_zebra_state: true needs_lwd_state: false saves_to_disk: false - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" secrets: inherit # Test that Zebra can handle a submit block RPC call, using a cached Zebra tip state @@ -511,9 +482,6 @@ jobs: needs_zebra_state: true needs_lwd_state: false saves_to_disk: false - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" secrets: inherit # Test that the scanner can continue scanning where it was left when zebrad restarts. @@ -537,9 +505,6 @@ jobs: needs_zebra_state: true needs_lwd_state: false saves_to_disk: true - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" secrets: inherit # Test that the scan task registers keys, deletes keys, and subscribes to results for keys while running. @@ -563,9 +528,6 @@ jobs: needs_zebra_state: true needs_lwd_state: false saves_to_disk: false - disk_suffix: tip - root_state_path: "/var/cache" - zebra_state_dir: "zebrad-cache" secrets: inherit failure-issue: diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index b67cf5ee5b5..b6613e97157 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -250,23 +250,20 @@ case "$1" in # Run a Zebra full sync test on mainnet. run_cargo_test "${ENTRYPOINT_FEATURES}" "full_sync_mainnet" # List directory generated by test - # TODO: replace with ${ZEBRA_CACHED_STATE_DIR} in Rust and workflows - check_directory_files "/zebrad-cache" + check_directory_files "${ZEBRA_CACHED_STATE_DIR}" elif [[ -n "${FULL_SYNC_TESTNET_TIMEOUT_MINUTES}" ]]; then # Run a Zebra full sync test on testnet. run_cargo_test "${ENTRYPOINT_FEATURES}" "full_sync_testnet" # List directory generated by test - # TODO: replace with ${ZEBRA_CACHED_STATE_DIR} in Rust and workflows - check_directory_files "/zebrad-cache" + check_directory_files "${ZEBRA_CACHED_STATE_DIR}" elif [[ "${TEST_DISK_REBUILD}" -eq "1" ]]; then # Run a Zebra sync up to the mandatory checkpoint. # # TODO: use environmental variables instead of Rust features (part of #2995) run_cargo_test "test_sync_to_mandatory_checkpoint_${NETWORK,,},${ENTRYPOINT_FEATURES}" "sync_to_mandatory_checkpoint_${NETWORK,,}" - # TODO: replace with ${ZEBRA_CACHED_STATE_DIR} in Rust and workflows - check_directory_files "/zebrad-cache" + check_directory_files "${ZEBRA_CACHED_STATE_DIR}" elif [[ "${TEST_UPDATE_SYNC}" -eq "1" ]]; then # Run a Zebra sync starting at the cached tip, and syncing to the latest tip. @@ -279,8 +276,7 @@ case "$1" in # Run a Zebra sync starting at the cached mandatory checkpoint, and syncing past it. # # List directory used by test - # TODO: replace with ${ZEBRA_CACHED_STATE_DIR} in Rust and workflows - check_directory_files "/zebrad-cache" + check_directory_files "${ZEBRA_CACHED_STATE_DIR}" # TODO: use environmental variables instead of Rust features (part of #2995) run_cargo_test "test_sync_past_mandatory_checkpoint_${NETWORK,,},${ENTRYPOINT_FEATURES}" "sync_past_mandatory_checkpoint_${NETWORK,,}" @@ -368,4 +364,4 @@ case "$1" in exec "$@" fi ;; -esac \ No newline at end of file +esac diff --git a/zebrad/tests/acceptance.rs b/zebrad/tests/acceptance.rs index c21b0a0e3e3..cd3572ce3f2 100644 --- a/zebrad/tests/acceptance.rs +++ b/zebrad/tests/acceptance.rs @@ -29,9 +29,10 @@ //! - `FULL_SYNC_MAINNET_TIMEOUT_MINUTES` env variable: The total number of minutes we //! will allow this test to run or give up. Value for the Mainnet full sync tests. //! - `FULL_SYNC_TESTNET_TIMEOUT_MINUTES` env variable: The total number of minutes we -//! will allow this test to run or give up. Value for the Testnet ful sync tests. -//! - `/zebrad-cache` directory: For some sync tests, this needs to be created in -//! the file system, the created directory should have write permissions. +//! will allow this test to run or give up. Value for the Testnet full sync tests. +//! - `ZEBRA_CACHED_STATE_DIR` env variable: The path to a Zebra cached state directory. +//! If not set, it defaults to `/zebrad-cache`. For some sync tests, this directory needs to be +//! created in the file system with write permissions. //! //! Here are some examples on how to run each of the tests: //! @@ -40,13 +41,15 @@ //! //! $ cargo test sync_large_checkpoints_mempool_mainnet -- --ignored --nocapture //! -//! $ sudo mkdir /zebrad-cache -//! $ sudo chmod 777 /zebrad-cache +//! $ export ZEBRA_CACHED_STATE_DIR="/zebrad-cache" +//! $ sudo mkdir -p "$ZEBRA_CACHED_STATE_DIR" +//! $ sudo chmod 777 "$ZEBRA_CACHED_STATE_DIR" //! $ export FULL_SYNC_MAINNET_TIMEOUT_MINUTES=600 //! $ cargo test full_sync_mainnet -- --ignored --nocapture //! -//! $ sudo mkdir /zebrad-cache -//! $ sudo chmod 777 /zebrad-cache +//! $ export ZEBRA_CACHED_STATE_DIR="/zebrad-cache" +//! $ sudo mkdir -p "$ZEBRA_CACHED_STATE_DIR" +//! $ sudo chmod 777 "$ZEBRA_CACHED_STATE_DIR" //! $ export FULL_SYNC_TESTNET_TIMEOUT_MINUTES=600 //! $ cargo test full_sync_testnet -- --ignored --nocapture //! ``` @@ -67,9 +70,10 @@ //! at least the `ZEBRA_TEST_LIGHTWALLETD` environment variable is present: //! //! - `ZEBRA_TEST_LIGHTWALLETD` env variable: Needs to be present to run any of the lightwalletd tests. -//! - `ZEBRA_CACHED_STATE_DIR` env var: The path to a zebra blockchain database. -//! - `LIGHTWALLETD_DATA_DIR` env variable. The path to a lightwalletd database. -//! - `--features lightwalletd-grpc-tests` cargo flag. The flag given to cargo to build the source code of the running test. +//! - `ZEBRA_CACHED_STATE_DIR` env variable: The path to a Zebra cached state directory. +//! If not set, it defaults to `/zebrad-cache`. +//! - `LIGHTWALLETD_DATA_DIR` env variable: The path to a lightwalletd database. +//! - `--features lightwalletd-grpc-tests` cargo flag: The flag given to cargo to build the source code of the running test. //! //! Here are some examples of running each test: //! diff --git a/zebrad/tests/common/sync.rs b/zebrad/tests/common/sync.rs index ff5234c2b1e..bac394099f5 100644 --- a/zebrad/tests/common/sync.rs +++ b/zebrad/tests/common/sync.rs @@ -5,7 +5,7 @@ //! Test functions in this file will not be run. //! This file is only for test library code. -use std::{path::PathBuf, time::Duration}; +use std::{env, path::PathBuf, time::Duration}; use tempfile::TempDir; @@ -326,10 +326,20 @@ pub fn check_sync_logs_until( Ok(zebrad) } +/// Returns the cache directory for Zebra's state. +/// +/// It checks the `ZEBRA_CACHED_STATE_DIR` environment variable and returns its value if set. +/// Otherwise, it defaults to `"/zebrad-cache"`. +fn get_zebra_cached_state_dir() -> PathBuf { + env::var("ZEBRA_CACHED_STATE_DIR") + .unwrap_or_else(|_| "/zebrad-cache".to_string()) + .into() +} + /// Returns a test config for caching Zebra's state up to the mandatory checkpoint. pub fn cached_mandatory_checkpoint_test_config(network: &Network) -> Result { let mut config = persistent_test_config(network)?; - config.state.cache_dir = "/zebrad-cache".into(); + config.state.cache_dir = get_zebra_cached_state_dir(); // To get to the mandatory checkpoint, we need to sync lots of blocks. // (Most tests use a smaller limit to minimise redundant block downloads.) @@ -377,7 +387,7 @@ pub fn create_cached_database_height( config.state.debug_stop_at_height = Some(height.0); config.consensus.checkpoint_sync = checkpoint_sync; - let dir = PathBuf::from("/zebrad-cache"); + let dir = get_zebra_cached_state_dir(); let mut child = dir .with_exact_config(&config)? .spawn_child(args!["start"])?