Skip to content

Commit

Permalink
Fix cilium watchdog to write CNI config at least once
Browse files Browse the repository at this point in the history
Previously, the newly built CNI config is never written when:

- Watchdog is in use
- Cilium is in use
- Fast start is disabled
- An existing CNI config is already there
  • Loading branch information
jingyuanliang committed Sep 27, 2024
1 parent 2007892 commit 0b69c09
Show file tree
Hide file tree
Showing 6 changed files with 373 additions and 2 deletions.
10 changes: 8 additions & 2 deletions scripts/install-cni.sh
Original file line number Diff line number Diff line change
Expand Up @@ -475,17 +475,23 @@ cilium_watchdog_success_wait=${CILIUM_WATCHDOG_SUCCESS_WAIT:-300}
cilium_watchdog_failure_retry=${CILIUM_WATCHDOG_FAILURE_RETRY:-60}
cilium_watchdog_fast_start_wait=${CILIUM_WATCHDOG_FAST_START_WAIT:-60}

file_written=false

if [[ -n "${CILIUM_FAST_START_NAMESPACES:-}" ]]; then
log "Cilium has fast-start; writing CNI config upfront then wait for ${cilium_watchdog_fast_start_wait}s and start to check Cilium health."
write_file "${output_file}" "${cni_spec}"
file_written=true
sleep "${cilium_watchdog_fast_start_wait}"s
fi

while true; do
log "Checking Cilium health allowing retries for up to ${cilium_watchdog_failure_retry}s."
if cilium_health_check "${cilium_watchdog_failure_retry}"; then
log "Cilium healthz reported success; writing CNI config if not already there then wait for ${cilium_watchdog_success_wait}s."
[[ ! -f "${output_file}" ]] && write_file "${output_file}" "${cni_spec}"
log "Cilium healthz reported success; writing CNI config if never written or not already there then wait for ${cilium_watchdog_success_wait}s."
if [[ ${file_written} != "true" ]] || [[ ! -f "${output_file}" ]]; then
write_file "${output_file}" "${cni_spec}"
file_written=true
fi
sleep "${cilium_watchdog_success_wait}"s
else
log "Cilium does not appear healthy; removing CNI config if it exists then wait for 2s before retry."
Expand Down
66 changes: 66 additions & 0 deletions scripts/testcase/testcase-basic-overwrite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
export KUBERNETES_SERVICE_HOST=kubernetes.default.svc
export KUBERNETES_SERVICE_PORT=443

export ENABLE_CALICO_NETWORK_POLICY=false
export ENABLE_CILIUM_PLUGIN=false
export ENABLE_MASQUERADE=false
export ENABLE_IPV6=false

CNI_SPEC_TEMPLATE=$(cat testdata/spec-template.json)
export CNI_SPEC_TEMPLATE

function before_test() {

# shellcheck disable=SC2329
function curl() {
# shellcheck disable=SC2317
case "$*" in
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes*)
echo '{"object":{
"metadata": {
"labels": {
},
"creationTimestamp": "2024-01-03T11:54:01Z",
"name": "gke-my-cluster-default-pool-128bc25d-9c94",
"resourceVersion": "891003",
"uid": "f2353a2f-ca8c-4ca0-8dd3-ad1f964a54f0"
},
"spec": {
"podCIDR": "10.52.1.0/24",
"podCIDRs": [
"10.52.1.0/24"
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}}'
;;
*)
#unsupported
exit 1
esac
}
export -f curl

echo '"unchanged"' >"/host/etc/cni/net.d/${CNI_SPEC_NAME}"

}

function verify() {
local expected
local actual

expected=$(jq -S . <"testdata/expected-basic.json")
actual=$(jq -S . <"/host/etc/cni/net.d/${CNI_SPEC_NAME}")

if [ "$expected" != "$actual" ] ; then
echo "Expected cni_spec value:"
echo "$expected"
echo "but actual was"
echo "$actual"
return 1
fi

}
71 changes: 71 additions & 0 deletions scripts/testcase/testcase-cilium-overwrite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
export KUBERNETES_SERVICE_HOST=kubernetes.default.svc
export KUBERNETES_SERVICE_PORT=443

export ENABLE_CALICO_NETWORK_POLICY=false
export ENABLE_CILIUM_PLUGIN=true
export CILIUM_HEALTHZ_PORT=63197
export CILIUM_FAST_START_NAMESPACES=
export ENABLE_MASQUERADE=false
export ENABLE_IPV6=false

CNI_SPEC_TEMPLATE=$(cat testdata/spec-template.json)
export CNI_SPEC_TEMPLATE

function before_test() {

# shellcheck disable=SC2329
function curl() {
# shellcheck disable=SC2317
case "$*" in
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes*)
echo '{"object":{
"metadata": {
"labels": {
},
"creationTimestamp": "2024-01-03T11:54:01Z",
"name": "gke-my-cluster-default-pool-128bc25d-9c94",
"resourceVersion": "891003",
"uid": "f2353a2f-ca8c-4ca0-8dd3-ad1f964a54f0"
},
"spec": {
"podCIDR": "10.52.1.0/24",
"podCIDRs": [
"10.52.1.0/24"
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}}'
;;
*http://localhost:63197/*)
echo 'healthz'
;;
*)
#unsupported
exit 1
esac
}
export -f curl

echo '"unchanged"' >"/host/etc/cni/net.d/${CNI_SPEC_NAME}"

}

function verify() {
local expected
local actual

expected=$(jq -S . <"testdata/expected-cilium.json")
actual=$(jq -S . <"/host/etc/cni/net.d/${CNI_SPEC_NAME}")

if [ "$expected" != "$actual" ] ; then
echo "Expected cni_spec value:"
echo "$expected"
echo "but actual was"
echo "$actual"
return 1
fi

}
83 changes: 83 additions & 0 deletions scripts/testcase/testcase-watchdog-cilium-faststart-overwrite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
export KUBERNETES_SERVICE_HOST=kubernetes.default.svc
export KUBERNETES_SERVICE_PORT=443

export ENABLE_CALICO_NETWORK_POLICY=false
export ENABLE_CILIUM_PLUGIN=true
export CILIUM_HEALTHZ_PORT=63197
export CILIUM_FAST_START_NAMESPACES=default,kube-system
export ENABLE_MASQUERADE=false
export ENABLE_IPV6=false
export RUN_CNI_WATCHDOG=true

CNI_SPEC_TEMPLATE=$(cat testdata/spec-template.json)
export CNI_SPEC_TEMPLATE

export TEST_WANT_EXIT_CODE=24

function before_test() {

# shellcheck disable=SC2329
function curl() {
# shellcheck disable=SC2317
case "$*" in
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes*)
echo '{"object":{
"metadata": {
"labels": {
},
"creationTimestamp": "2024-01-03T11:54:01Z",
"name": "gke-my-cluster-default-pool-128bc25d-9c94",
"resourceVersion": "891003",
"uid": "f2353a2f-ca8c-4ca0-8dd3-ad1f964a54f0"
},
"spec": {
"podCIDR": "10.52.1.0/24",
"podCIDRs": [
"10.52.1.0/24"
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}}'
;;
*http://localhost:63197/*)
# With fast-start enabled, CNI config should have been written
# at the first Cilium health check attempt.
exit "${TEST_WANT_EXIT_CODE}"
;;
*)
#unsupported
exit 1
esac
}
export -f curl

# shellcheck disable=SC2317,SC2329
function sleep() {
echo "[MOCK called] sleep $*"
echo "[MOCK] this test expects a delay during fast start."
}
export -f sleep

echo '"unchanged"' >"/host/etc/cni/net.d/${CNI_SPEC_NAME}"

}

function verify() {
local expected
local actual

expected=$(jq -S . <"testdata/expected-cilium-faststart.json")
actual=$(jq -S . <"/host/etc/cni/net.d/${CNI_SPEC_NAME}")

if [ "$expected" != "$actual" ] ; then
echo "Expected cni_spec value:"
echo "$expected"
echo "but actual was"
echo "$actual"
return 1
fi

}
75 changes: 75 additions & 0 deletions scripts/testcase/testcase-watchdog-cilium-overwrite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
export KUBERNETES_SERVICE_HOST=kubernetes.default.svc
export KUBERNETES_SERVICE_PORT=443

export ENABLE_CALICO_NETWORK_POLICY=false
export ENABLE_CILIUM_PLUGIN=true
export CILIUM_HEALTHZ_PORT=63197
export CILIUM_FAST_START_NAMESPACES=
export ENABLE_MASQUERADE=false
export ENABLE_IPV6=false
export RUN_CNI_WATCHDOG=true

CNI_SPEC_TEMPLATE=$(cat testdata/spec-template.json)
export CNI_SPEC_TEMPLATE

# shellcheck disable=SC2034
TEST_WANT_EXIT_CODE=${TEST_EXIT_CODE_SLEEP}

function before_test() {

# shellcheck disable=SC2329
function curl() {
# shellcheck disable=SC2317
case "$*" in
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes*)
echo '{"object":{
"metadata": {
"labels": {
},
"creationTimestamp": "2024-01-03T11:54:01Z",
"name": "gke-my-cluster-default-pool-128bc25d-9c94",
"resourceVersion": "891003",
"uid": "f2353a2f-ca8c-4ca0-8dd3-ad1f964a54f0"
},
"spec": {
"podCIDR": "10.52.1.0/24",
"podCIDRs": [
"10.52.1.0/24"
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}}'
;;
*http://localhost:63197/*)
echo 'healthz'
;;
*)
#unsupported
exit 1
esac
}
export -f curl

echo '"unchanged"' >"/host/etc/cni/net.d/${CNI_SPEC_NAME}"

}

function verify() {
local expected
local actual

expected=$(jq -S . <"testdata/expected-cilium.json")
actual=$(jq -S . <"/host/etc/cni/net.d/${CNI_SPEC_NAME}")

if [ "$expected" != "$actual" ] ; then
echo "Expected cni_spec value:"
echo "$expected"
echo "but actual was"
echo "$actual"
return 1
fi

}
70 changes: 70 additions & 0 deletions scripts/testcase/testcase-watchdog-overwrite.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
export KUBERNETES_SERVICE_HOST=kubernetes.default.svc
export KUBERNETES_SERVICE_PORT=443

export ENABLE_CALICO_NETWORK_POLICY=false
export ENABLE_CILIUM_PLUGIN=false
export ENABLE_MASQUERADE=false
export ENABLE_IPV6=false
export RUN_CNI_WATCHDOG=true

CNI_SPEC_TEMPLATE=$(cat testdata/spec-template.json)
export CNI_SPEC_TEMPLATE

# shellcheck disable=SC2034
TEST_WANT_EXIT_CODE=${TEST_EXIT_CODE_SLEEP}

function before_test() {

# shellcheck disable=SC2329
function curl() {
# shellcheck disable=SC2317
case "$*" in
*http://metadata.google.internal/computeMetadata/v1/instance/network-interfaces/0*)
echo '{"ipv6s": ["2600:1900:4000:318:0:7:0:0"]}'
;;
*https://kubernetes.default.svc:443/api/v1/nodes*)
echo '{"object":{
"metadata": {
"labels": {
},
"creationTimestamp": "2024-01-03T11:54:01Z",
"name": "gke-my-cluster-default-pool-128bc25d-9c94",
"resourceVersion": "891003",
"uid": "f2353a2f-ca8c-4ca0-8dd3-ad1f964a54f0"
},
"spec": {
"podCIDR": "10.52.1.0/24",
"podCIDRs": [
"10.52.1.0/24"
],
"providerID": "gce://my-gke-project/us-central1-c/gke-my-cluster-default-pool-128bc25d-9c94"
}
}}'
;;
*)
#unsupported
exit 1
esac
}
export -f curl

echo '"unchanged"' >"/host/etc/cni/net.d/${CNI_SPEC_NAME}"

}

function verify() {
local expected
local actual

expected=$(jq -S . <"testdata/expected-basic.json")
actual=$(jq -S . <"/host/etc/cni/net.d/${CNI_SPEC_NAME}")

if [ "$expected" != "$actual" ] ; then
echo "Expected cni_spec value:"
echo "$expected"
echo "but actual was"
echo "$actual"
return 1
fi

}

0 comments on commit 0b69c09

Please sign in to comment.