Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Partially revert https://github.com/knative/test-infra/pull/2440 #2443

Merged
merged 1 commit into from
Sep 21, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 43 additions & 6 deletions scripts/library.sh
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,50 @@ function wait_until_object_does_not_exist() {
# Waits until all pods are running in the given namespace.
# Parameters: $1 - namespace.
function wait_until_pods_running() {
echo "Waiting until all pods in namespace $1 are up"
kubectl wait pod --for=condition=Ready -n "$1" -l '!job-name' --timeout=5m || return 1
# Also wait for all the job pods to be completed.
# This is mainly for maintaining backward compatibility.
if [[ $(kubectl get jobs --ignore-not-found=true -n "$1") ]]; then
kubectl wait job --for=condition=Complete --all -n "$1" --timeout=5m || return 1
echo -n "Waiting until all pods in namespace $1 are up"
local failed_pod=""
for i in {1..150}; do # timeout after 5 minutes
# List all pods. Ignore Terminating pods as those have either been replaced through
# a deployment or terminated on purpose (through chaosduck for example).
local pods="$(kubectl get pods --no-headers -n $1 2>/dev/null | grep -v Terminating)"
# All pods must be running (ignore ImagePull error to allow the pod to retry)
local not_running_pods=$(echo "${pods}" | grep -v Running | grep -v Completed | grep -v ErrImagePull | grep -v ImagePullBackOff)
if [[ -n "${pods}" ]] && [[ -z "${not_running_pods}" ]]; then
# All Pods are running or completed. Verify the containers on each Pod.
local all_ready=1
while read pod ; do
local status=(`echo -n ${pod} | cut -f2 -d' ' | tr '/' ' '`)
# Set this Pod as the failed_pod. If nothing is wrong with it, then after the checks, set
# failed_pod to the empty string.
failed_pod=$(echo -n "${pod}" | cut -f1 -d' ')
# All containers must be ready
[[ -z ${status[0]} ]] && all_ready=0 && break
[[ -z ${status[1]} ]] && all_ready=0 && break
[[ ${status[0]} -lt 1 ]] && all_ready=0 && break
[[ ${status[1]} -lt 1 ]] && all_ready=0 && break
[[ ${status[0]} -ne ${status[1]} ]] && all_ready=0 && break
# All the tests passed, this is not a failed pod.
failed_pod=""
done <<< "$(echo "${pods}" | grep -v Completed)"
if (( all_ready )); then
echo -e "\nAll pods are up:\n${pods}"
return 0
fi
elif [[ -n "${not_running_pods}" ]]; then
# At least one Pod is not running, just save the first one's name as the failed_pod.
failed_pod="$(echo "${not_running_pods}" | head -n 1 | cut -f1 -d' ')"
fi
echo -n "."
sleep 2
done
echo -e "\n\nERROR: timeout waiting for pods to come up\n${pods}"
if [[ -n "${failed_pod}" ]]; then
echo -e "\n\nFailed Pod (data in YAML format) - ${failed_pod}\n"
kubectl -n $1 get pods "${failed_pod}" -oyaml
echo -e "\n\nPod Logs\n"
kubectl -n $1 logs "${failed_pod}" --all-containers
fi
return 1
}

# Waits until all batch jobs complete in the given namespace.
Expand Down