Skip to content

Commit

Permalink
Improvements to wait_for_pods function
Browse files Browse the repository at this point in the history
Signed-off-by: hbelmiro <[email protected]>
  • Loading branch information
hbelmiro committed Sep 3, 2024
1 parent 0d098db commit a59d586
Show file tree
Hide file tree
Showing 14 changed files with 250 additions and 71 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/backend.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,12 @@ jobs:
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Create KFP cluster
uses: ./.github/actions/kfp-tekton-cluster
- name: Set up Python 3.10
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Create KFP cluster
uses: ./.github/actions/kfp-tekton-cluster
- name: Install sdk
run: |
python3 -m venv .venv
Expand Down
30 changes: 30 additions & 0 deletions .github/workflows/e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,11 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: Create KFP cluster
uses: ./.github/actions/kfp-cluster

Expand All @@ -46,6 +51,11 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: Create KFP cluster
uses: ./.github/actions/kfp-cluster

Expand All @@ -69,6 +79,11 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: Create KFP cluster
uses: ./.github/actions/kfp-cluster

Expand All @@ -92,6 +107,11 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: Create KFP cluster
uses: ./.github/actions/kfp-cluster

Expand All @@ -115,6 +135,11 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: Create KFP cluster
uses: ./.github/actions/kfp-cluster

Expand Down Expand Up @@ -144,6 +169,11 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.9

- name: Create KFP cluster
uses: ./.github/actions/kfp-cluster

Expand Down
11 changes: 6 additions & 5 deletions .github/workflows/kfp-kubernetes-execution-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
pull_request:
paths:
- '.github/workflows/kfp-kubernetes-execution-tests.yml'
- 'scripts/deploy/github/**'
- 'sdk/python/**'
- 'api/v2alpha1/**'
- 'kubernetes_platform/**'
Expand All @@ -18,17 +19,17 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'

- name: Create KFP cluster
uses: ./.github/actions/kfp-cluster

- name: Forward API port
run: ./scripts/deploy/github/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'

- name: apt-get update
run: sudo apt-get update

Expand Down
1 change: 1 addition & 0 deletions .github/workflows/kfp-samples.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ on:
- master
pull_request:
paths:
- 'scripts/deploy/github/**'
- 'samples/**'
- 'backend/src/v2/**'
- '.github/workflows/kfp-samples.yml'
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/kubeflow-pipelines-integration-v2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
pull_request:
paths:
- '.github/workflows/kubeflow-pipelines-integration-v2.yml'
- 'scripts/deploy/github/**'
- 'samples'
- 'core'
- 'backend'
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/periodic.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.8
- name: Create KFP cluster
uses: ./.github/actions/kfp-cluster
- name: Port forward kfp apiserver
Expand Down
11 changes: 6 additions & 5 deletions .github/workflows/sdk-execution.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
pull_request:
paths:
- '.github/workflows/sdk-execution.yml'
- 'scripts/deploy/github/**'
- 'sdk/python/**'
- 'api/v2alpha1/**'

Expand All @@ -17,17 +18,17 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.8

- name: Create KFP cluster
uses: ./.github/actions/kfp-cluster

- name: Forward API port
run: ./scripts/deploy/github/forward-port.sh "kubeflow" "ml-pipeline" 8888 8888

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.8

- name: apt-get update
run: sudo apt-get update

Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/upgrade-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:
pull_request:
paths:
- '.github/workflows/upgrade-test.yml'
- 'scripts/deploy/github/**'
- 'backend/**'
- 'manifests/kustomize/**'

Expand All @@ -17,6 +18,11 @@ jobs:
- name: Checkout code
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: 3.8

- name: Create KFP cluster
uses: ./.github/actions/kfp-cluster

Expand Down
6 changes: 2 additions & 4 deletions scripts/deploy/github/deploy-kfp-tekton.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,16 +40,14 @@ then
exit 1
fi

# Check if all pods are running - allow 20 retries (10 minutes)
wait_for_pods kubeflow 40 30 || EXIT_CODE=$?
# Check if all pods are running - (10 minutes)
wait_for_pods || EXIT_CODE=$?
if [[ $EXIT_CODE -ne 0 ]]
then
echo "Deploy unsuccessful. Not all pods running."
exit 1
fi

echo "List Kubeflow: "
kubectl get pod -n kubeflow
collect_artifacts kubeflow

echo "List Tekton control plane: "
Expand Down
6 changes: 2 additions & 4 deletions scripts/deploy/github/deploy-kfp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,14 @@ then
exit 1
fi

# Check if all pods are running - allow 20 retries (10 minutes)
wait_for_pods kubeflow 40 30 || EXIT_CODE=$?
# Check if all pods are running - (10 minutes)
wait_for_pods || EXIT_CODE=$?
if [[ $EXIT_CODE -ne 0 ]]
then
echo "Deploy unsuccessful. Not all pods running."
exit 1
fi

echo "List Kubeflow: "
kubectl get pod -n kubeflow
collect_artifacts kubeflow

echo "Finished KFP deployment."
Expand Down
54 changes: 3 additions & 51 deletions scripts/deploy/github/helper-functions.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,57 +56,9 @@ wait_for_namespace () {
}

wait_for_pods () {
if [[ $# -ne 3 ]]
then
echo "Usage: wait_for_pods namespace max_retries sleep_time"
return 1
fi

local namespace=$1
local max_retries=$2
local sleep_time=$3

local i=0

while [[ $i -lt $max_retries ]]
do
local pods
local statuses
local num_pods
local num_running
pods=$(kubectl get pod -n "$namespace")
# echo "$pods"
# kubectl get pvc -n "$namespace"

if [[ -z $pods ]]
then
echo "no pod is up yet"
else
# Using quotations around variables to keep column format in echo
# Remove 1st line (header line) -> trim whitespace -> cut statuses column (3rd column)
# Might be overkill to parse down to specific columns :).
statuses=$(echo "$pods" | tail -n +2 | tr -s ' ' | cut -d ' ' -f 3)
num_pods=$(echo "$statuses" | wc -l | xargs)
num_running=$(echo "$statuses" | grep -ow "Running\|Completed" | wc -l | xargs)

local msg="${num_running}/${num_pods} pods running in \"${namespace}\"."

if [[ $num_running -ne $num_pods ]]
then
# for debugging
# kubectl get pod -n "$namespace" | grep '0/1' | awk '{print $1}' | xargs kubectl describe pod -n "$namespace"
echo "$msg Checking again in ${sleep_time}s."
else
echo "$msg"
return 0
fi
fi

sleep "$sleep_time"
i=$((i+1))
done

return 1
C_DIR="${BASH_SOURCE%/*}"
pip install -r "${C_DIR}"/kfp-readiness/requirements.txt
python "${C_DIR}"/kfp-readiness/wait_for_pods.py
}

deploy_with_retries () {
Expand Down
1 change: 1 addition & 0 deletions scripts/deploy/github/kfp-readiness/requirements.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
-r ../../../../sdk/python/requirements.in
Loading

0 comments on commit a59d586

Please sign in to comment.