Skip to content

Commit

Permalink
Simplify local reproduction instructions
Browse files Browse the repository at this point in the history
This change is a big simplification of reproducing a build locally:

- Moved a lot of configurable behavior into source-able local_* env files
- Deleted a lot of extraneous instructions
- Added "how do I choose..." instructions for clarification
- Fixed a bug preventing multiple invocations of rename_and_verify_wheels
- Made any.sh and bisect.sh use the cleaner method of sourcing local_ envs
- Cleaned up ci_default and added "how to see an overview of env variables"

One of the big things is that I added some logic in local_default that resets
the list of Bazel common args to remove things like --config=rbe. This way a
user can choose to run a "release" env configuration without being blocked by
permission errors, or by needing to manually amend their configuration. As a
result, gathering the correct set of config values is quite easy (see the new
instructions). The alternative would be to split "auth-related" flags into
a separate variable, which I don't like, as I think it's more confusing for
future maintainers to have to continuously decide which options go where.
  • Loading branch information
angerson committed Dec 9, 2023
1 parent 5fd4edf commit 7371929
Show file tree
Hide file tree
Showing 9 changed files with 171 additions and 294 deletions.
376 changes: 99 additions & 277 deletions ci/official/README.md

Large diffs are not rendered by default.

8 changes: 5 additions & 3 deletions ci/official/any.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@
# ./any.sh
set -euxo pipefail
cd "$(dirname "$0")/../../" # tensorflow/
REQUESTED_TFCI="$TFCI"
export TFCI=$(mktemp)
echo >>$TFCI "source $REQUESTED_TFCI"
echo >>$TFCI "source ci/official/envs/disable_all_uploads"
echo >>$TFCI "source ci/official/envs/local_multicache"
if [[ -n "${TF_ANY_SCRIPT:-}" ]]; then
cp "$TFCI" any
echo "source ci/official/envs/disable_all_uploads" >> any
export TFCI=$(realpath any)
"$TF_ANY_SCRIPT"
elif [[ -n "${TF_ANY_TARGETS:-}" ]]; then
source "${BASH_SOURCE%/*}/utilities/setup.sh"
Expand Down
10 changes: 6 additions & 4 deletions ci/official/bisect.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@
# export TF_ANY_MODE=test
set -euxo pipefail
cd "$(dirname "$0")/../../" # tensorflow/
cp "$TFCI" bisect
echo "source ci/official/envs/disable_all_uploads" >> bisect
export TFCI=$(realpath bisect)
REQUESTED_TFCI="$TFCI"
export TFCI=$(mktemp)
echo >>$TFCI "source $REQUESTED_TFCI"
echo >>$TFCI "source ci/official/envs/disable_all_uploads"
echo >>$TFCI "source ci/official/envs/local_multicache"
git bisect start "$TF_BISECT_BAD" "$TF_BISECT_GOOD"
git bisect run "$TF_BISECT_SCRIPT"
git bisect run $TF_BISECT_SCRIPT
27 changes: 19 additions & 8 deletions ci/official/envs/ci_default
Original file line number Diff line number Diff line change
@@ -1,7 +1,18 @@
# Note: this gets sourced in utilities/setup.sh
# Note: this file gets sourced in utilities/setup.sh, which has "set -u"
# (error on undefined variables). This ensures that (a) every TFCI variable
# has an explicit default value, and (b) no script can accidentally use a
# variable that doesn't exist. Please keep this list in alphabetical order.
#
# Find usage in scripts with e.g.:
# cd ci/official
# ls *.sh utilities/*.sh | xargs grep -H --color=always TFCI_ARG_HERE
# You may also get an overview, e.g.:
# cd ci/official
# grep -o '^TFCI\w*' envs/ci_default | xargs -n 1 -I{} bash -c "echo; echo {}; grep -R -H --exclude-dir=envs --color=always '{}'"

TFCI_BAZEL_BAZELRC_ARGS=
TFCI_BAZEL_COMMON_ARGS=
TFCI_BAZEL_CONFIG_PREFIX=
TFCI_BAZEL_TARGET_SELECTING_CONFIG_PREFIX=
TFCI_BUILD_PIP_PACKAGE_ARGS=
TFCI_DOCKER_ARGS=
TFCI_DOCKER_ENABLE=1
Expand All @@ -12,6 +23,12 @@ TFCI_DOCKER_REBUILD_ENABLE=0
TFCI_DOCKER_REBUILD_UPLOAD_ENABLE=0
TFCI_INDEX_HTML_ENABLE=1
TFCI_LIB_SUFFIX=
TFCI_MACOS_BAZEL_TEST_DIR_ENABLE=
TFCI_MACOS_BAZEL_TEST_DIR_PATH=
TFCI_MACOS_INSTALL_BAZELISK_ENABLE=
TFCI_MACOS_INSTALL_BAZELISK_URL=
TFCI_MACOS_PYENV_INSTALL_ENABLE=
TFCI_MACOS_UPGRADE_PYENV_ENABLE=
TFCI_NIGHTLY_UPDATE_VERSION_ENABLE=
TFCI_NVIDIA_SMI_ENABLE=
TFCI_OUTPUT_DIR=build_output
Expand All @@ -30,9 +47,3 @@ TFCI_WHL_AUDIT_PLAT=
TFCI_WHL_BAZEL_TEST_ENABLE=1
TFCI_WHL_SIZE_LIMIT=
TFCI_WHL_SIZE_LIMIT_ENABLE=1
TFCI_MACOS_UPGRADE_PYENV_ENABLE=
TFCI_MACOS_INSTALL_BAZELISK_ENABLE=
TFCI_MACOS_INSTALL_BAZELISK_URL=
TFCI_MACOS_PYENV_INSTALL_ENABLE=
TFCI_MACOS_BAZEL_TEST_DIR_ENABLE=
TFCI_MACOS_BAZEL_TEST_DIR_PATH=
22 changes: 22 additions & 0 deletions ci/official/envs/local_default
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Base settings for a local execution
# Don't generate the summary index
TFCI_INDEX_HTML_ENABLE=
# Defining this variable as empty will overwrite "hey, that's empty" errors
KOKORO_KEYSTORE_DIR=
# Don't try and upload anything, since permissions won't work anyway
source ci/official/envs/disable_all_uploads
# Remove execution-affecting arguments from the TFCI_BAZEL_COMMON_ARGS value
# so that other local_ envs can add whatever they want. This seems easier
# than trying to juggle different environment variables for it.
function localize_bazel_args() {
echo "$TFCI_BAZEL_COMMON_ARGS" | sed \
-e 's/--config tf_public_cache_push//g' \
-e 's/--config tf_public_macos_cache_push//g' \
-e 's/--config tf_public_cache//g' \
-e 's/--config tf_public_macos_cache//g' \
-e 's/--config resultstore//g' \
-e 's/--config rbe//g'
}
# Other envs can now use:
# TFCI_BAZEL_COMMON_ARGS="$TFCI_BAZEL_COMMON_ARGS <other args>"
TFCI_BAZEL_COMMON_ARGS=$(localize_bazel_args)
7 changes: 7 additions & 0 deletions ci/official/envs/local_multicache
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Combine TF public build cache and local disk cache
# The cache configs are different for MacOS and Linux
if [[ $(uname -s) == "Darwin" ]]; then
TFCI_BAZEL_COMMON_ARGS="$TFCI_BAZEL_COMMON_ARGS --config tf_public_macos_cache --disk_cache=$TFCI_OUTPUT_DIR/cache"
else
TFCI_BAZEL_COMMON_ARGS="$TFCI_BAZEL_COMMON_ARGS --config tf_public_cache --disk_cache=$TFCI_OUTPUT_DIR/cache"
fi
2 changes: 2 additions & 0 deletions ci/official/envs/local_nodocker
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Disable Docker
TFCI_DOCKER_ENABLE=0
3 changes: 3 additions & 0 deletions ci/official/envs/local_rbe
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Use RBE and Resultstore. If using Docker, requires passthrough of gcloud credentials.
TFCI_DOCKER_ARGS="$TFCI_DOCKER_ARGS -v $HOME/.config/gcloud:/root/.config/gcloud"
TFCI_BAZEL_COMMON_ARGS="$TFCI_BAZEL_COMMON_ARGS --config rbe"
10 changes: 8 additions & 2 deletions ci/official/utilities/rename_and_verify_wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,15 @@ set -euxo pipefail

cd "$TFCI_OUTPUT_DIR"

# Move extra wheel files somewhere out of the way. This script
# expects just one wheel file to exist.
if [[ "$(ls *.whl | wc -l | tr -d ' ')" != "1" ]]; then
echo "Error: $TFCI_OUTPUT_DIR should contain exactly one .whl file."
exit 1
echo "More than one wheel file is present: moving the oldest to"
echo "$TFCI_OUTPUT_DIR/extra_wheels."
# List all .whl files by their modification time (ls -t) and move anything
# other than the most recently-modified one (the newest one).
mkdir -p $TFCI_OUTPUT_DIR/extra_wheels
ls -t *.whl | tail -n +2 | xargs mv -t $TFCI_OUTPUT_DIR/extra_wheels
fi

# Repair wheels with auditwheel and delete the old one.
Expand Down

0 comments on commit 7371929

Please sign in to comment.