Skip to content

Commit

Permalink
Parallelize & Partition Verification (rust-lang#229)
Browse files Browse the repository at this point in the history
Shard Kani verification workflow across multiple runners by:

1. Running `kani list --format json`, which outputs a JSON file like
this:

```json
{
  "kani-version": "0.56.0",
  "file-version": "0.1",
  "standard-harnesses": {
    "src/lib.rs": [
      "proof3",
      "verify::proof2"
    ],
    "src/test.rs": [
      "test::proof4",
      "test::proof5"
    ]
  },
  "contract-harnesses": {
    "src/lib.rs": [
      "proof"
    ]
  },
  "contracts": [
    {
      "function": "bar",
      "file": "src/lib.rs",
      "harnesses": [
        "proof"
      ]
    }
  ],
  "totals": {
    "standard-harnesses": 4,
    "contract-harnesses": 1,
    "functions-under-contract": 1
  }
}
```
2. Extracting the harnesses inside `"standard-harnesses"` and
`"contract-harnesses"` into an array called `ALL_HARNESSES` and the
length of that array into `HARNESS_COUNT`. (So in this example,
`ALL_HARNESSES = [proof3, verify::proof2, test::proof4, test::proof5,
proof]` and `HARNESS_COUNT=5`).
3. Dividing the harnesses evenly between four workers. For example, if
worker 1's harnesses are `proof3` and `verify::proof2`, then we call
`kani verify-std --harness proof3 --harness verify::proof2 --exact`. The
`--exact` makes Kani look for the exact harness name. This is important
so that we don't match on partial patterns, e.g., if there is a harness
called "foo" and a harness called "foo_bar", passing `--harness foo`
without `--exact` would match against both harnesses, and then `foo_bar`
would run twice.
4. Also parallelize verification within a single runner by passing `-j`
to Kani.

I chose four workers somewhat arbitrarily--it makes each worker take
about 45 minutes to an hour to finish. I thought it was good to have a
balance between too few workers (which still makes us wait a while) and
too many workers (which makes you look through more logs to find where a
given harness is being verified). But happy to play with this number if
people have opinions.

By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 and MIT licenses.
  • Loading branch information
carolynzech authored Dec 19, 2024
1 parent 866a195 commit 67e2f1c
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 18 deletions.
20 changes: 17 additions & 3 deletions .github/workflows/kani.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,39 @@ defaults:

jobs:
check-kani-on-std:
name: Verify std library
name: Verify std library (partition ${{ matrix.partition }})
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-latest]
partition: [1, 2, 3, 4]
include:
- os: ubuntu-latest
base: ubuntu
- os: macos-latest
base: macos
fail-fast: false

env:
# Define the index of this particular worker [1-WORKER_TOTAL]
WORKER_INDEX: ${{ matrix.partition }}
# Total number of workers running this step
WORKER_TOTAL: 4

steps:
# Step 1: Check out the repository
- name: Checkout Repository
uses: actions/checkout@v4
with:
path: head
submodules: true

# Step 2: Run Kani on the std library (default configuration)

# Step 2: Install jq
- name: Install jq
if: matrix.os == 'ubuntu-latest'
run: sudo apt-get install -y jq

# Step 3: Run Kani on the std library (default configuration)
- name: Run Kani Verification
run: head/scripts/run-kani.sh --path ${{github.workspace}}/head

Expand Down
104 changes: 89 additions & 15 deletions scripts/run-kani.sh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,22 @@ TOML_FILE=${KANI_TOML_FILE:-$DEFAULT_TOML_FILE}
REPO_URL=${KANI_REPO_URL:-$DEFAULT_REPO_URL}
BRANCH_NAME=${KANI_BRANCH_NAME:-$DEFAULT_BRANCH_NAME}

# Unstable arguments to pass to Kani
unstable_args="-Z function-contracts -Z mem-predicates -Z float-lib -Z c-ffi -Z loop-contracts"

# Variables used for parallel harness verification
# When we say "parallel," we mean two dimensions of parallelization:
# 1. Sharding verification across multiple workers. The Kani workflow that calls this script defines WORKER_INDEX and WORKER_TOTAL for this purpose:
# we shard verification across WORKER_TOTAL workers, where each worker has a unique WORKER_INDEX that it uses to derive its share of ALL_HARNESSES to verify.
# 2. Within a single worker, we parallelize verification between multiple cores by invoking kani with -j.

# Array of all of the harnesses in the repository, set in get_harnesses()
declare -a ALL_HARNESSES
# Length of ALL_HARNESSES, set in get_harnesses()
declare -i HARNESS_COUNT
# `kani list` JSON FILE_VERSION that the parallel verification command expects
EXPECTED_JSON_FILE_VERSION="0.1"

# Function to read commit ID from TOML file
read_commit_from_toml() {
local file="$1"
Expand Down Expand Up @@ -151,10 +167,50 @@ get_kani_path() {
echo "$(realpath "$build_dir/scripts/kani")"
}

run_kani_command() {
# Run kani list with JSON format and process with jq to extract harness names and total number of harnesses.
# Note: The code to extract ALL_HARNESSES is dependent on `kani list --format json` FILE_VERSION 0.1.
# (The FILE_VERSION variable is defined in Kani in the list module's output code, current path kani-driver/src/list/output.rs)
# If FILE_VERSION changes, first update the ALL_HARNESSES extraction logic to work with the new format, if necessary,
# then update EXPECTED_JSON_FILE_VERSION.
get_harnesses() {
local kani_path="$1"
shift
"$kani_path" "$@"
"$kani_path" list -Z list $unstable_args ./library --std --format json
local json_file_version=$(jq -r '.["file-version"]' "$WORK_DIR/kani-list.json")
if [[ $json_file_version != $EXPECTED_JSON_FILE_VERSION ]]; then
echo "Error: The JSON file-version in kani-list.json does not equal $EXPECTED_JSON_FILE_VERSION"
exit 1
fi
# Extract the harnesses inside "standard-harnesses" and "contract-harnesses"
# into an array called ALL_HARNESSES and the length of that array into HARNESS_COUNT
ALL_HARNESSES=($(jq -r '
([.["standard-harnesses"] | to_entries | .[] | .value[]] +
[.["contract-harnesses"] | to_entries | .[] | .value[]]) |
.[]
' $WORK_DIR/kani-list.json))
HARNESS_COUNT=${#ALL_HARNESSES[@]}
}

# Given an array of harness names, run verification for those harnesses
run_verification_subset() {
local kani_path="$1"
local harnesses=("${@:2}") # All arguments after kani_path are harness names

# Build the --harness arguments
local harness_args=""
for harness in "${harnesses[@]}"; do
harness_args="$harness_args --harness $harness"
done

echo "Running verification for harnesses:"
printf '%s\n' "${harnesses[@]}"
"$kani_path" verify-std -Z unstable-options ./library \
$unstable_args \
$harness_args --exact \
-j \
--output-format=terse \
$command_args \
--enable-unstable \
--cbmc-args --object-bits 12
}

# Check if binary exists and is up to date
Expand All @@ -176,7 +232,6 @@ check_binary_exists() {
return 1
}


main() {
local build_dir="$WORK_DIR/kani_build"

Expand Down Expand Up @@ -209,19 +264,38 @@ main() {
"$kani_path" --version

if [[ "$run_command" == "verify-std" ]]; then
echo "Running Kani verify-std command..."
"$kani_path" verify-std -Z unstable-options ./library \
-Z function-contracts \
-Z mem-predicates \
-Z loop-contracts \
-Z float-lib \
-Z c-ffi \
$command_args \
--enable-unstable \
--cbmc-args --object-bits 12
if [[ -n "$WORKER_INDEX" && -n "$WORKER_TOTAL" ]]; then
echo "Running as parallel worker $WORKER_INDEX of $WORKER_TOTAL"
get_harnesses "$kani_path"

echo "All harnesses:"
printf '%s\n' "${ALL_HARNESSES[@]}"
echo "Total number of harnesses: $HARNESS_COUNT"

# Calculate this worker's portion (add WORKER_TOTAL - 1 to force ceiling division)
chunk_size=$(( (HARNESS_COUNT + WORKER_TOTAL - 1) / WORKER_TOTAL ))
echo "Number of harnesses this worker will run: $chunk_size"

start_idx=$(( (WORKER_INDEX - 1) * chunk_size ))
echo "Start index into ALL_HARNESSES is $start_idx"

# Extract this worker's harnesses
worker_harnesses=("${ALL_HARNESSES[@]:$start_idx:$chunk_size}")

# Run verification for this subset
run_verification_subset "$kani_path" "${worker_harnesses[@]}"
else
# Run verification for all harnesses (not in parallel)
echo "Running Kani verify-std command..."
"$kani_path" verify-std -Z unstable-options ./library \
$unstable_args
$command_args \
--enable-unstable \
--cbmc-args --object-bits 12
fi
elif [[ "$run_command" == "list" ]]; then
echo "Running Kani list command..."
"$kani_path" list -Z list -Z function-contracts -Z mem-predicates -Z float-lib -Z c-ffi ./library --std --format markdown
"$kani_path" list -Z list $unstable_args ./library --std --format markdown
fi
}

Expand Down

0 comments on commit 67e2f1c

Please sign in to comment.