From bfdf186c214543a6a688e196bd136c71162e9d2c Mon Sep 17 00:00:00 2001 From: Chris Evich Date: Tue, 8 Aug 2023 14:37:56 -0400 Subject: [PATCH] Improve build-push error handling Around the time of this commit, the automated multiarch manifest-list builds for both skopeo and podman have been failing somewhere in the `build-push.sh` script. The actual build appears to work fine, the `tag-version.sh` mod-command runs fine, but the tag-search in `get_manifest_tags()` (called by `push_images()`) fails with the error: `jq: error (at :29): Cannot iterate over null (null)` Unfortunately the problem does not reproduce for me locally, nor can it be reproduced using a dry-run build (`--nopush` bypasses the tag search.) Improve debugging of this situation by moving the `if ((PUSH))` check and adding an exception clause to display the would-be pushed images (and tags). Also: * Simplify the `get_manifest_tags()` tag search by adjusting the jq filter to gracefully ignore an empty set of images and/or images without any list of names. Rely on `push_images()` catching the empty-list and throwing an error. * Add a comment regarding the need for the `confirm_arches` call after the `parallel_build` call in the main part of the script. * Improve the debug-ability of `confirm_arches()` in the case of a bad/incomplete/unreadable manifest-list (see item above). Detect both inspect command errors and jq/pipeline errors. In the case of jq/pipeline failure, show the input JSON to aid debugging. * Improve variable-name consistency by removing many `_` prefixes. Signed-off-by: Chris Evich --- build-push/bin/build-push.sh | 88 ++++++++++++++++++++++----------- build-push/test/fake_buildah.sh | 6 ++- 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/build-push/bin/build-push.sh b/build-push/bin/build-push.sh index be61f6e..de296b1 100755 --- a/build-push/bin/build-push.sh +++ b/build-push/bin/build-push.sh @@ -327,17 +327,26 @@ parallel_build() { } confirm_arches() { - local filter=".manifests[].platform.architecture" + local inspjson + local filter=".manifests[].platform.architecture?" local arch local maniarches dbg "in confirm_arches()" req_env_vars FQIN ARCHES RUNTIME - maniarches=$($RUNTIME manifest inspect "containers-storage:$FQIN:latest" | \ - jq -r "$filter" | \ - grep -v 'null' | \ - tr -s '[:space:]' ' ' | \ - sed -z '$ s/[\n ]$//') + if ! inspjson=$($RUNTIME manifest inspect "containers-storage:$FQIN:latest"); then + die "Error reading manifest list metadata for 'containers-storage:$FQIN:latest'" + fi + + # Ignore items w/o an arch, convert into space-delimited string for grep error message (below) + # TODO: Use an array instead, could be simpler? Would need testing. + if ! maniarches=$(jq -r "$filter" <<<"$inspjson" | \ + grep -v 'null' | \ + tr -s '[:space:]' ' ' | \ + sed -z '$ s/[\n ]$//'); then + die "Error processing manifest list metadata: +$inspjson" + fi dbg "Found manifest arches: $maniarches" for arch in $ARCHES; do @@ -377,44 +386,60 @@ run_prepmod_cmd() { # Outputs sorted list of FQIN w/ tags to stdout, silent otherwise get_manifest_tags() { - local _json + local result_json + local fqin_names dbg "in get_manifest_fqins()" # At the time of this comment, there is no reliable way to # lookup all tags based solely on inspecting a manifest. # However, since we know $FQIN (remember, value has no tag) we can - # use it to search all related names container storage. Unfortunately + # use it to search all related names in container storage. Unfortunately # because images can have multiple tags, the `reference` filter - # can return names we don't care about. Work around this by - # sending the final result back through a grep of $FQIN - _json=$($RUNTIME images --json --filter=reference=$FQIN) - dbg "Image listing json: $_json" - if [[ -n "$_json" ]] && jq --exit-status '.[].names' <<<"$_json" &>/dev/null - then - jq --raw-output '.[].names[]'<<<"$_json" | grep "$FQIN" | sort + # can return names we don't care about. Work around this with a + # grep of $FQIN in the results. + if ! result_json=$($RUNTIME images --json --filter=reference=$FQIN); then + die "Error listing manifest-list images that reference '$FQIN'" + fi + + dbg "Image listing json: $result_json" + if [[ -n "$result_json" ]]; then + # Rely on the caller to handle an empty list, ignore items missing a name key. + if ! fqin_names=$(jq -r '.[]? | .names[]?'<<<"$result_json"); then + die "Error obtaining image names from '$FQIN' manifest-list search result: +$result_json" + fi + grep "$FQIN"<<<"$fqin_names" | sort fi } push_images() { - local _fqins - local _fqin + local fqin_list + local fqin dbg "in push_images()" # It's possible that --modcmd=* removed all images, make sure # this is known to the caller. - _fqins=$(get_manifest_tags) - if [[ -z "$_fqins" ]]; then + if ! fqin_list=$(get_manifest_tags); then + die "Error retrieving set of manifest-list tags to push for '$FQIN'" + fi + if [[ -z "$fqin_list" ]]; then die "No FQIN(s) to be pushed." fi - dbg "Will try to push FQINs: $_fqins" - - registry_login - for _fqin in $_fqins; do - # Note: --all means push manifest AND images it references - msg "Pushing $_fqin" - $RUNTIME manifest push --all $_fqin docker://$_fqin - done + if ((PUSH)); then + dbg "Will try to push FQINs: '$fqin_list'" + + registry_login + for fqin in $fqin_list; do + # Note: --all means push manifest AND images it references + msg "Pushing $fqin" + $RUNTIME manifest push --all $fqin docker://$fqin + done + else + # Even if --nopush was specified, be helpful to humans with a lookup of all the + # relevant tags for $FQIN that would have been pushed and display them. + warn "Option --nopush specified, not pushing: '$fqin_list'" + fi } ##### MAIN() ##### @@ -433,9 +458,16 @@ if [[ -n "$PREPCMD" ]]; then fi parallel_build "$FQIN:latest" + +# If a parallel build or the manifest-list assembly fails, buildah +# may still exit successfully. Catch this condition by verifying +# all expected arches are present in the manifest list. confirm_arches + if [[ -n "$MODCMD" ]]; then registry_login run_prepmod_cmd mod "$MODCMD" fi -if ((PUSH)); then push_images; fi + +# Handles --nopush internally +push_images diff --git a/build-push/test/fake_buildah.sh b/build-push/test/fake_buildah.sh index 713e662..ff5cfc7 100755 --- a/build-push/test/fake_buildah.sh +++ b/build-push/test/fake_buildah.sh @@ -29,13 +29,15 @@ if [[ "$1" == "build" ]]; then elif [[ "$1" == "manifest" ]]; then # validate json while outputing it jq . $DATF -elif [[ "$1" =~ info ]]; then +elif [[ "$1" == "info" ]]; then case "$@" in *arch*) echo "amd64" ;; *cpus*) echo "2" ;; *) exit 1 ;; esac +elif [[ "$1" == "images" ]]; then + echo '[{"names":["localhost/foo/bar:latest"]}]' else - echo "ERROR: Unexpected call to fake_buildah.sh" + echo "ERROR: Unexpected arg '$1' to fake_buildah.sh" > /dev/stderr exit 9 fi