From 637485f8293cf4d7a31b83af6900157fe8fe8eb1 Mon Sep 17 00:00:00 2001 From: alyssavu Date: Fri, 26 Jan 2024 22:17:41 +0000 Subject: [PATCH] feat: add function to condense script --- .../cloud-init/artifacts/aks-check-network.sh | 173 +++++++++--------- pkg/templates/templates_generated.go | 173 +++++++++--------- 2 files changed, 172 insertions(+), 174 deletions(-) diff --git a/parts/linux/cloud-init/artifacts/aks-check-network.sh b/parts/linux/cloud-init/artifacts/aks-check-network.sh index 1cfed4d5322..75ce0654d83 100644 --- a/parts/linux/cloud-init/artifacts/aks-check-network.sh +++ b/parts/linux/cloud-init/artifacts/aks-check-network.sh @@ -6,14 +6,14 @@ # and log the results to the events directory. For now, this script has to be triggered manually to # collect the log. In the future, we will run it periodically to check and alert any issue. -set -o nounset -set -o pipefail - -APISERVER_FQDN=$1 +APISERVER_FQDN=${1:-''} CUSTOM_ENDPOINT=${2:-''} EVENTS_LOGGING_PATH="/var/log/azure/Microsoft.Azure.Extensions.CustomScript/events/" AZURE_CONFIG_PATH="/etc/kubernetes/azure.json" +AKS_CA_CERT_PATH="/etc/kubernetes/certs/apiserver.crt" +AKS_CERT_PATH="/etc/kubernetes/certs/client.crt" +AKS_KEY_PATH="/etc/kubernetes/certs/client.key" RESOLV_CONFIG_PATH="/etc/resolv.conf" SYSTEMD_RESOLV_CONFIG_PATH="/run/systemd/resolve/resolv.conf" @@ -22,13 +22,13 @@ METADATA_ENDPOINT="http://169.254.169.254/metadata/identity/oauth2/token?api-ver API_VERSION="2023-11-01" AKS_ENDPOINT="https://${ARM_ENDPOINT}/providers/Microsoft.ContainerService/operations?api-version=${API_VERSION}" APISERVER_ENDPOINT="https://${APISERVER_FQDN}/healthz" -ACS_BINARY_ENDPOINT="https://acs-mirror.azureedge.net/azure-cni/v1.4.43/binaries/azure-vnet-cni-linux-amd64-v1.4.43.tgz" +ACS_BINARY_ENDPOINT="acs-mirror.azureedge.net/azure-cni/v1.4.43/binaries/azure-vnet-cni-linux-amd64-v1.4.43.tgz" TEMP_DIR=$(mktemp -d) NSLOOKUP_FILE="${TEMP_DIR}/nslookup.log" TOKEN_FILE="${TEMP_DIR}/access_token.json" -URL_LISTS=("mcr.microsoft.com" "login.microsoftonline.com" "packages.microsoft.com" "acs-mirror.azureedge.net") +URL_LIST=("mcr.microsoft.com" "login.microsoftonline.com" "packages.microsoft.com" "acs-mirror.azureedge.net") MAX_RETRY=3 DELAY=5 @@ -56,7 +56,7 @@ function logs_to_events { --arg EventTid "0" \ '{Timestamp: $Timestamp, OperationId: $OperationId, Version: $Version, TaskName: $TaskName, EventLevel: $EventLevel, Message: $Message, EventPid: $EventPid, EventTid: $EventTid}' ) - echo ${json_string} > ${EVENTS_LOGGING_PATH}${eventsFileName}.json + # echo ${json_string} > ${EVENTS_LOGGING_PATH}${eventsFileName}.json # this allows an error from the command at ${@} to be returned and correct code assigned in cse_main if [ "$ret" != "0" ]; then @@ -64,22 +64,70 @@ function logs_to_events { fi } +function dns_trace { + local endpoint=$1 + + echo "Trace DNS request for $endpoint" + dig $endpoint + host -a $endpoint +} + +function check_and_curl { + local url=$1 + local error_msg=$2 + + # Check DNS + nslookup $url > /dev/null + if [ $? -eq 0 ]; then + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to $url'" + else + dns_trace $url + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to $url'" + continue + fi + + i=0 + while true; + do + # Curl the url and capture the response code + if [ $url == "acs-mirror.azureedge.net" ]; then + response=$(curl -I -s -o /dev/null -w "%{http_code}" "https://${ACS_BINARY_ENDPOINT}" -L) + else + response=$(curl -s -o /dev/null -w "%{http_code}" "https://${url}" -L) + fi + + if [ $response -ge 200 ] && [ $response -lt 400 ]; then + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled $url with returned status code $response'" + break + fi + + # If the response code is not within successful range, increment the error count + i=$(( $i + 1 )) + # If we have reached the maximum number of retries, log an error + if [[ $i -eq $MAX_RETRY ]]; then + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $url after $MAX_RETRY attempts with returned status code $response. $error_msg'" + break + fi + + # Sleep for the specified delay before trying again + sleep $DELAY + done +} + if ! [ -e "${AZURE_CONFIG_PATH}" ]; then - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to find azure.json file. Are you running inside Kubernetes?'" - exit 1 + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - WARNING: Failed to find $AZURE_CONFIG_PATH file. Are you running inside Kubernetes?'" fi -azure_config=$(cat $AZURE_CONFIG_PATH) # check DNS resolution to ARM endpoint nslookup $ARM_ENDPOINT > $NSLOOKUP_FILE if [ $? -eq 0 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to endpoint $ARM_ENDPOINT'" + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to $ARM_ENDPOINT'" else error_log=$(cat $NSLOOKUP_FILE) - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to endpoint $ARM_ENDPOINT with error $error_log'" + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to $ARM_ENDPOINT with error $error_log'" - # perform basic DNS troubleshooting + # check resolv.conf nameserver=$(cat $NSLOOKUP_FILE | grep "Server" | awk '{print $2}') echo "Checking resolv.conf for nameserver $nameserver" cat $RESOLV_CONFIG_PATH | grep $nameserver @@ -90,75 +138,50 @@ else if [ $? -ne 0 ]; then logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - FAILURE: Nameserver $nameserver wasn't found in $SYSTEMD_RESOLV_CONFIG_PATH'" fi + + # trace request + dns_trace $ARM_ENDPOINT + exit 1 fi # check access to ARM endpoint result=$(curl -s -o $TOKEN_FILE -w "%{http_code}" -H Metadata:true $METADATA_ENDPOINT) if [ $result -eq 200 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully sent metadata endpoint request with returned status code $result'" + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully retrieved access token'" access_token=$(cat $TOKEN_FILE | jq -r .access_token) res=$(curl -X GET -H "Authorization: Bearer $access_token" -H "Content-Type:application/json" -s -o /dev/null -w "%{http_code}" $AKS_ENDPOINT) if [ $res -ge 200 ] && [ $res -lt 400 ]; then logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled $ARM_ENDPOINT with returned status code $res'" else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $ARM_ENDPOINT with returned status code $res'" + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $ARM_ENDPOINT with returned status code $res. This endpoint is required for Kubernetes operations against the Azure API'" fi else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to send metadata endpoint request with returned status code $result. Can't check access to $ARM_ENDPOINT'" + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to retrieve access token with returned status code $result. Can't check access to $ARM_ENDPOINT'" fi -# Check access to apiserver -nslookup $APISERVER_FQDN > /dev/null -if [ $? -eq 0 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to endpoint $APISERVER_FQDN'" - res=$(curl -s -o /dev/null -w "%{http_code}" --cacert /etc/kubernetes/certs/apiserver.crt --cert /etc/kubernetes/certs/client.crt --key /etc/kubernetes/certs/client.key $APISERVER_ENDPOINT) - if [ $res -ge 200 ] && [ $res -lt 400 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled apiserver $APISERVER_FQDN with returned status code $res'" - else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl apiserver $APISERVER_FQDN with returned status code $res'" - fi +# check access to apiserver +if [ -z "$APISERVER_FQDN" ]; then + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - WARNING: No apiserver FQDN provided. Skipping apiserver check.'" else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to endpoint $APISERVER_FQDN'" -fi - -for url in ${URL_LISTS[@]}; -do - # Check DNS - nslookup $url > /dev/null + nslookup $APISERVER_FQDN > /dev/null if [ $? -eq 0 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to endpoint $url'" + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to $APISERVER_FQDN'" + res=$(curl -s -o /dev/null -w "%{http_code}" --cacert $AKS_CA_CERT_PATH --cert $AKS_CERT_PATH --key $AKS_KEY_PATH $APISERVER_ENDPOINT) + if [ $res -ge 200 ] && [ $res -lt 400 ]; then + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled apiserver $APISERVER_FQDN with returned status code $res'" + else + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $APISERVER_FQDN with returned status code $res. Node can't connect to the apiserver'" + fi else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to endpoint $url'" - continue + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to $APISERVER_FQDN'" + dns_trace $APISERVER_FQDN fi +fi - i=0 - while true; - do - # Ping the url and capture the response code - if [ $url == "acs-mirror.azureedge.net" ]; then - response=$(curl -I -s -o /dev/null -w "%{http_code}" $ACS_BINARY_ENDPOINT -L) - else - response=$(curl -s -o /dev/null -w "%{http_code}" "https://$url" -L) - fi - - if [ $response -ge 200 ] && [ $response -lt 400 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled $url with returned status code $response'" - break - fi - - # If the response code is not 200, increment the error count - i=$(( $i + 1 )) - # If we have reached the maximum number of retries, log an error - if [[ $i -eq $MAX_RETRY ]]; then - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $url after $MAX_RETRY attempts with returned status code $response'" - break - fi - - # Sleep for the specified delay before trying again - sleep $DELAY - done +for url in ${URL_LIST[@]}; +do + check_and_curl $url "" done if [ ! -z "$CUSTOM_ENDPOINT" ]; then @@ -166,30 +189,6 @@ if [ ! -z "$CUSTOM_ENDPOINT" ]; then extra_urls=($(echo $CUSTOM_ENDPOINT | tr "," "\n")) for url in "${extra_urls[@]}" do - nslookup $url > /dev/null - if [ $? -eq 0 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to endpoint $url'" - else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to endpoint $url'" - continue - fi - - i=0 - while true; - do - response=$(curl -s -o /dev/null -w "%{http_code}" "https://$url" -L) - if [ $response -ge 200 ] && [ $response -lt 400 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled $url with returned status code $response'" - break - fi - - i=$(( $i + 1 )) - if [[ $i -eq $MAX_RETRY ]]; then - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $url after $MAX_RETRY attempts with returned status code $response'" - break - fi - - sleep $DELAY - done + check_and_curl $url "" done fi \ No newline at end of file diff --git a/pkg/templates/templates_generated.go b/pkg/templates/templates_generated.go index 08a6b089bb9..73bef866508 100644 --- a/pkg/templates/templates_generated.go +++ b/pkg/templates/templates_generated.go @@ -297,14 +297,14 @@ var _linuxCloudInitArtifactsAksCheckNetworkSh = []byte(`#!/bin/bash # and log the results to the events directory. For now, this script has to be triggered manually to # collect the log. In the future, we will run it periodically to check and alert any issue. -set -o nounset -set -o pipefail - -APISERVER_FQDN=$1 +APISERVER_FQDN=${1:-''} CUSTOM_ENDPOINT=${2:-''} EVENTS_LOGGING_PATH="/var/log/azure/Microsoft.Azure.Extensions.CustomScript/events/" AZURE_CONFIG_PATH="/etc/kubernetes/azure.json" +AKS_CA_CERT_PATH="/etc/kubernetes/certs/apiserver.crt" +AKS_CERT_PATH="/etc/kubernetes/certs/client.crt" +AKS_KEY_PATH="/etc/kubernetes/certs/client.key" RESOLV_CONFIG_PATH="/etc/resolv.conf" SYSTEMD_RESOLV_CONFIG_PATH="/run/systemd/resolve/resolv.conf" @@ -313,13 +313,13 @@ METADATA_ENDPOINT="http://169.254.169.254/metadata/identity/oauth2/token?api-ver API_VERSION="2023-11-01" AKS_ENDPOINT="https://${ARM_ENDPOINT}/providers/Microsoft.ContainerService/operations?api-version=${API_VERSION}" APISERVER_ENDPOINT="https://${APISERVER_FQDN}/healthz" -ACS_BINARY_ENDPOINT="https://acs-mirror.azureedge.net/azure-cni/v1.4.43/binaries/azure-vnet-cni-linux-amd64-v1.4.43.tgz" +ACS_BINARY_ENDPOINT="acs-mirror.azureedge.net/azure-cni/v1.4.43/binaries/azure-vnet-cni-linux-amd64-v1.4.43.tgz" TEMP_DIR=$(mktemp -d) NSLOOKUP_FILE="${TEMP_DIR}/nslookup.log" TOKEN_FILE="${TEMP_DIR}/access_token.json" -URL_LISTS=("mcr.microsoft.com" "login.microsoftonline.com" "packages.microsoft.com" "acs-mirror.azureedge.net") +URL_LIST=("mcr.microsoft.com" "login.microsoftonline.com" "packages.microsoft.com" "acs-mirror.azureedge.net") MAX_RETRY=3 DELAY=5 @@ -347,7 +347,7 @@ function logs_to_events { --arg EventTid "0" \ '{Timestamp: $Timestamp, OperationId: $OperationId, Version: $Version, TaskName: $TaskName, EventLevel: $EventLevel, Message: $Message, EventPid: $EventPid, EventTid: $EventTid}' ) - echo ${json_string} > ${EVENTS_LOGGING_PATH}${eventsFileName}.json + # echo ${json_string} > ${EVENTS_LOGGING_PATH}${eventsFileName}.json # this allows an error from the command at ${@} to be returned and correct code assigned in cse_main if [ "$ret" != "0" ]; then @@ -355,22 +355,70 @@ function logs_to_events { fi } +function dns_trace { + local endpoint=$1 + + echo "Trace DNS request for $endpoint" + dig $endpoint + host -a $endpoint +} + +function check_and_curl { + local url=$1 + local error_msg=$2 + + # Check DNS + nslookup $url > /dev/null + if [ $? -eq 0 ]; then + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to $url'" + else + dns_trace $url + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to $url'" + continue + fi + + i=0 + while true; + do + # Curl the url and capture the response code + if [ $url == "acs-mirror.azureedge.net" ]; then + response=$(curl -I -s -o /dev/null -w "%{http_code}" "https://${ACS_BINARY_ENDPOINT}" -L) + else + response=$(curl -s -o /dev/null -w "%{http_code}" "https://${url}" -L) + fi + + if [ $response -ge 200 ] && [ $response -lt 400 ]; then + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled $url with returned status code $response'" + break + fi + + # If the response code is not within successful range, increment the error count + i=$(( $i + 1 )) + # If we have reached the maximum number of retries, log an error + if [[ $i -eq $MAX_RETRY ]]; then + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $url after $MAX_RETRY attempts with returned status code $response. $error_msg'" + break + fi + + # Sleep for the specified delay before trying again + sleep $DELAY + done +} + if ! [ -e "${AZURE_CONFIG_PATH}" ]; then - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to find azure.json file. Are you running inside Kubernetes?'" - exit 1 + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - WARNING: Failed to find $AZURE_CONFIG_PATH file. Are you running inside Kubernetes?'" fi -azure_config=$(cat $AZURE_CONFIG_PATH) # check DNS resolution to ARM endpoint nslookup $ARM_ENDPOINT > $NSLOOKUP_FILE if [ $? -eq 0 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to endpoint $ARM_ENDPOINT'" + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to $ARM_ENDPOINT'" else error_log=$(cat $NSLOOKUP_FILE) - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to endpoint $ARM_ENDPOINT with error $error_log'" + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to $ARM_ENDPOINT with error $error_log'" - # perform basic DNS troubleshooting + # check resolv.conf nameserver=$(cat $NSLOOKUP_FILE | grep "Server" | awk '{print $2}') echo "Checking resolv.conf for nameserver $nameserver" cat $RESOLV_CONFIG_PATH | grep $nameserver @@ -381,75 +429,50 @@ else if [ $? -ne 0 ]; then logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - FAILURE: Nameserver $nameserver wasn't found in $SYSTEMD_RESOLV_CONFIG_PATH'" fi + + # trace request + dns_trace $ARM_ENDPOINT + exit 1 fi # check access to ARM endpoint result=$(curl -s -o $TOKEN_FILE -w "%{http_code}" -H Metadata:true $METADATA_ENDPOINT) if [ $result -eq 200 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully sent metadata endpoint request with returned status code $result'" + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully retrieved access token'" access_token=$(cat $TOKEN_FILE | jq -r .access_token) res=$(curl -X GET -H "Authorization: Bearer $access_token" -H "Content-Type:application/json" -s -o /dev/null -w "%{http_code}" $AKS_ENDPOINT) if [ $res -ge 200 ] && [ $res -lt 400 ]; then logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled $ARM_ENDPOINT with returned status code $res'" else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $ARM_ENDPOINT with returned status code $res'" + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $ARM_ENDPOINT with returned status code $res. This endpoint is required for Kubernetes operations against the Azure API'" fi else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to send metadata endpoint request with returned status code $result. Can't check access to $ARM_ENDPOINT'" + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to retrieve access token with returned status code $result. Can't check access to $ARM_ENDPOINT'" fi -# Check access to apiserver -nslookup $APISERVER_FQDN > /dev/null -if [ $? -eq 0 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to endpoint $APISERVER_FQDN'" - res=$(curl -s -o /dev/null -w "%{http_code}" --cacert /etc/kubernetes/certs/apiserver.crt --cert /etc/kubernetes/certs/client.crt --key /etc/kubernetes/certs/client.key $APISERVER_ENDPOINT) - if [ $res -ge 200 ] && [ $res -lt 400 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled apiserver $APISERVER_FQDN with returned status code $res'" - else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl apiserver $APISERVER_FQDN with returned status code $res'" - fi +# check access to apiserver +if [ -z "$APISERVER_FQDN" ]; then + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - WARNING: No apiserver FQDN provided. Skipping apiserver check.'" else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to endpoint $APISERVER_FQDN'" -fi - -for url in ${URL_LISTS[@]}; -do - # Check DNS - nslookup $url > /dev/null + nslookup $APISERVER_FQDN > /dev/null if [ $? -eq 0 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to endpoint $url'" + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to $APISERVER_FQDN'" + res=$(curl -s -o /dev/null -w "%{http_code}" --cacert $AKS_CA_CERT_PATH --cert $AKS_CERT_PATH --key $AKS_KEY_PATH $APISERVER_ENDPOINT) + if [ $res -ge 200 ] && [ $res -lt 400 ]; then + logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled apiserver $APISERVER_FQDN with returned status code $res'" + else + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $APISERVER_FQDN with returned status code $res. Node can't connect to the apiserver'" + fi else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to endpoint $url'" - continue + logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to $APISERVER_FQDN'" + dns_trace $APISERVER_FQDN fi +fi - i=0 - while true; - do - # Ping the url and capture the response code - if [ $url == "acs-mirror.azureedge.net" ]; then - response=$(curl -I -s -o /dev/null -w "%{http_code}" $ACS_BINARY_ENDPOINT -L) - else - response=$(curl -s -o /dev/null -w "%{http_code}" "https://$url" -L) - fi - - if [ $response -ge 200 ] && [ $response -lt 400 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled $url with returned status code $response'" - break - fi - - # If the response code is not 200, increment the error count - i=$(( $i + 1 )) - # If we have reached the maximum number of retries, log an error - if [[ $i -eq $MAX_RETRY ]]; then - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $url after $MAX_RETRY attempts with returned status code $response'" - break - fi - - # Sleep for the specified delay before trying again - sleep $DELAY - done +for url in ${URL_LIST[@]}; +do + check_and_curl $url "" done if [ ! -z "$CUSTOM_ENDPOINT" ]; then @@ -457,31 +480,7 @@ if [ ! -z "$CUSTOM_ENDPOINT" ]; then extra_urls=($(echo $CUSTOM_ENDPOINT | tr "," "\n")) for url in "${extra_urls[@]}" do - nslookup $url > /dev/null - if [ $? -eq 0 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully tested DNS resolution to endpoint $url'" - else - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to test DNS resolution to endpoint $url'" - continue - fi - - i=0 - while true; - do - response=$(curl -s -o /dev/null -w "%{http_code}" "https://$url" -L) - if [ $response -ge 200 ] && [ $response -lt 400 ]; then - logs_to_events "AKS.CSE.testingTraffic.success" "echo '$(date) - SUCCESS: Successfully curled $url with returned status code $response'" - break - fi - - i=$(( $i + 1 )) - if [[ $i -eq $MAX_RETRY ]]; then - logs_to_events "AKS.CSE.testingTraffic.failure" "echo '$(date) - ERROR: Failed to curl $url after $MAX_RETRY attempts with returned status code $response'" - break - fi - - sleep $DELAY - done + check_and_curl $url "" done fi`)