Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

using discard_unpacked_layers with CRI to reduce the size on disk of the cached images #5077

Open
wants to merge 17 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
7 changes: 6 additions & 1 deletion e2e/kubelet/generate-kubelet-flags.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,12 @@ retrycmd_get_tarball() {
tar_retries=$1; wait_sleep=$2; tarball=$3; url=$4
echo "${tar_retries} retries"
for i in $(seq 1 $tar_retries); do
tar -tzf $tarball && break || \
if [ -f $tarball ]; then
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this an unrelated change?

tar -tzf $tarball
if [ $? -eq 0 ]; then
break
fi
fi
if [ $i -eq $tar_retries ]; then
return 1
else
Expand Down
1 change: 0 additions & 1 deletion nbcparser/pkg/parser/templates/cse_cmd.sh.gtpl
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ LOAD_BALANCER_SKU={{getStringFromLoadBalancerSkuType .ClusterConfig.GetLoadBalan
EXCLUDE_MASTER_FROM_STANDARD_LB={{getExcludeMasterFromStandardLB .ClusterConfig.GetLoadBalancerConfig}}
MAXIMUM_LOADBALANCER_RULE_COUNT={{getMaxLBRuleCount .ClusterConfig.GetLoadBalancerConfig}}
CONTAINER_RUNTIME=containerd
CLI_TOOL=ctr
CONTAINERD_DOWNLOAD_URL_BASE={{.ContainerdConfig.GetContainerdDownloadUrlBase}}
NETWORK_MODE="transparent"
KUBE_BINARY_URL={{.KubeBinaryConfig.GetKubeBinaryUrl}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,5 +80,5 @@ EVENT_JSON=$( jq -n \
--arg EventTid "0" \
'{Timestamp: $Timestamp, OperationId: $OperationId, Version: $Version, TaskName: $TaskName, EventLevel: $EventLevel, Message: $Message, EventPid: $EventPid, EventTid: $EventTid}'
)

mkdir -p ${EVENTS_LOGGING_DIR}
Copy link
Collaborator

@cameronmeissner cameronmeissner Oct 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we shouldn't need to do this, WALinuxAgent will do this as a part of executing the extension

echo ${EVENT_JSON} > ${EVENTS_LOGGING_DIR}${EVENTS_FILE_NAME}.json
Original file line number Diff line number Diff line change
Expand Up @@ -353,5 +353,5 @@ EVENT_JSON=$( jq -n \
--arg EventTid "0" \
'{Timestamp: $Timestamp, OperationId: $OperationId, Version: $Version, TaskName: $TaskName, EventLevel: $EventLevel, Message: $Message, EventPid: $EventPid, EventTid: $EventTid}'
)

mkdir -p ${EVENTS_LOGGING_DIR}
echo ${EVENT_JSON} > ${EVENTS_LOGGING_DIR}${EVENTS_FILE_NAME}.json
1 change: 0 additions & 1 deletion parts/linux/cloud-init/artifacts/cse_cmd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ LOAD_BALANCER_SKU={{GetVariable "loadBalancerSku"}}
EXCLUDE_MASTER_FROM_STANDARD_LB={{GetVariable "excludeMasterFromStandardLB"}}
MAXIMUM_LOADBALANCER_RULE_COUNT={{GetVariable "maximumLoadBalancerRuleCount"}}
CONTAINER_RUNTIME={{GetParameter "containerRuntime"}}
CLI_TOOL={{GetParameter "cliTool"}}
CONTAINERD_DOWNLOAD_URL_BASE={{GetParameter "containerdDownloadURLBase"}}
NETWORK_MODE={{GetParameter "networkMode"}}
KUBE_BINARY_URL={{GetParameter "kubeBinaryURL"}}
Expand Down
4 changes: 2 additions & 2 deletions parts/linux/cloud-init/artifacts/cse_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ configGPUDrivers() {
if [[ $OS == $UBUNTU_OS_NAME ]]; then
mkdir -p /opt/{actions,gpu}
if [[ "${CONTAINER_RUNTIME}" == "containerd" ]]; then
ctr -n k8s.io image pull $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG
pullContainerImage "crictl" $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG
retrycmd_if_failure 5 10 600 bash -c "$CTR_GPU_INSTALL_CMD $NVIDIA_DRIVER_IMAGE:$NVIDIA_DRIVER_IMAGE_TAG gpuinstall /entrypoint.sh install"
ret=$?
if [[ "$ret" != "0" ]]; then
Expand Down Expand Up @@ -885,4 +885,4 @@ setKubeletNodeIPFlag() {
fi
}

#EOF
#EOF
15 changes: 13 additions & 2 deletions parts/linux/cloud-init/artifacts/cse_helpers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,12 @@ retrycmd_get_tarball() {
tar_retries=$1; wait_sleep=$2; tarball=$3; url=$4
echo "${tar_retries} retries"
for i in $(seq 1 $tar_retries); do
tar -tzf $tarball && break || \
if [ -f $tarball ]; then
tar -tzf $tarball
if [ $? -eq 0 ]; then
break
fi
fi
if [ $i -eq $tar_retries ]; then
return 1
else
Expand All @@ -221,7 +226,12 @@ retrycmd_get_tarball_from_registry_with_oras() {
tar_folder=$(dirname "$tarball")
echo "${tar_retries} retries"
for i in $(seq 1 $tar_retries); do
tar -tzf $tarball && break || \
if [ -f $tarball ]; then
tar -tzf $tarball
if [ $? -eq 0 ]; then
break
fi
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this change necessary?

fi
if [ $i -eq $tar_retries ]; then
return 1
else
Expand Down Expand Up @@ -444,6 +454,7 @@ logs_to_events() {
--arg EventTid "0" \
'{Timestamp: $Timestamp, OperationId: $OperationId, Version: $Version, TaskName: $TaskName, EventLevel: $EventLevel, Message: $Message, EventPid: $EventPid, EventTid: $EventTid}'
)
mkdir -p ${EVENTS_LOGGING_DIR}
echo ${json_string} > ${EVENTS_LOGGING_DIR}${eventsFileName}.json

# this allows an error from the command at ${@} to be returned and correct code assigned in cse_main
Expand Down
94 changes: 34 additions & 60 deletions parts/linux/cloud-init/artifacts/cse_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ downloadSecureTLSBootstrapKubeletExecPlugin() {
plugin_download_path="${SECURE_TLS_BOOTSTRAP_KUBELET_EXEC_PLUGIN_DOWNLOAD_DIR}/tls-bootstrap-client"

if [ ! -f "$plugin_download_path" ]; then
retrycmd_if_failure 30 5 60 curl -fSL -o "$plugin_download_path" "$plugin_url" || exit $ERR_DOWNLOAD_SECURE_TLS_BOOTSTRAP_KUBELET_EXEC_PLUGIN_TIMEOUT
retrycmd_if_failure 30 5 60 curl -sfSL -o "$plugin_download_path" "$plugin_url" || exit $ERR_DOWNLOAD_SECURE_TLS_BOOTSTRAP_KUBELET_EXEC_PLUGIN_TIMEOUT
chown -R root:root "$SECURE_TLS_BOOTSTRAP_KUBELET_EXEC_PLUGIN_DOWNLOAD_DIR"
chmod -R 755 "$SECURE_TLS_BOOTSTRAP_KUBELET_EXEC_PLUGIN_DOWNLOAD_DIR"
fi
Expand Down Expand Up @@ -385,21 +385,22 @@ downloadCrictl() {
}

installCrictl() {
local crictlVersion=${1}
CPU_ARCH=$(getCPUArch)
currentVersion=$(crictl --version 2>/dev/null | sed 's/crictl version //g')
if [[ "${currentVersion}" != "" ]]; then
echo "version ${currentVersion} of crictl already installed. skipping installCrictl of target version ${KUBERNETES_VERSION%.*}.0"
echo "version ${currentVersion} of crictl already installed. skipping installCrictl of target version ${crictlVersion%.*}.0"
else
# this is only called during cse. VHDs should have crictl binaries pre-cached so no need to download.
# if the vhd does not have crictl pre-baked, return early
CRICTL_TGZ_TEMP="crictl-v${CRICTL_VERSION}-linux-${CPU_ARCH}.tar.gz"
CRICTL_TGZ_TEMP="crictl-v${crictlVersion}-linux-${CPU_ARCH}.tar.gz"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is crictlVersion being set properly? I don't see it being passed to installCrictl during CSE but maybe I'm missing that

if [[ ! -f "$CRICTL_DOWNLOAD_DIR/${CRICTL_TGZ_TEMP}" ]]; then
rm -rf ${CRICTL_DOWNLOAD_DIR}
echo "pre-cached crictl not found: skipping installCrictl"
return 1
fi
echo "Unpacking crictl into ${CRICTL_BIN_DIR}"
tar zxvf "$CRICTL_DOWNLOAD_DIR/${CRICTL_TGZ_TEMP}" -C ${CRICTL_BIN_DIR}
tar zxvf "$CRICTL_DOWNLOAD_DIR/${CRICTL_TGZ_TEMP}" -C ${CRICTL_BIN_DIR} || exit $ERR_CRICTL_OPERATION_ERROR
chown root:root $CRICTL_BIN_DIR/crictl
chmod 755 $CRICTL_BIN_DIR/crictl
fi
Expand Down Expand Up @@ -626,29 +627,29 @@ installKubeletKubectlAndKubeProxy() {
}

pullContainerImage() {
CLI_TOOL=$1
CONTAINER_IMAGE_URL=$2
echo "pulling the image ${CONTAINER_IMAGE_URL} using ${CLI_TOOL}"
if [[ ${CLI_TOOL} == "ctr" ]]; then
logs_to_events "AKS.CSE.imagepullctr.${CONTAINER_IMAGE_URL}" "retrycmd_if_failure 2 1 120 ctr --namespace k8s.io image pull $CONTAINER_IMAGE_URL" || (echo "timed out pulling image ${CONTAINER_IMAGE_URL} via ctr" && exit $ERR_CONTAINERD_CTR_IMG_PULL_TIMEOUT)
elif [[ ${CLI_TOOL} == "crictl" ]]; then
logs_to_events "AKS.CSE.imagepullcrictl.${CONTAINER_IMAGE_URL}" "retrycmd_if_failure 2 1 120 crictl pull $CONTAINER_IMAGE_URL" || (echo "timed out pulling image ${CONTAINER_IMAGE_URL} via crictl" && exit $ERR_CONTAINERD_CRICTL_IMG_PULL_TIMEOUT)
local cliTool=$1
local containerImageURL=$2
echo "pulling the image ${containerImageURL} using ${cliTool}"
if [[ ${cliTool} == "ctr" ]]; then
logs_to_events "AKS.CSE.imagepullctr.${containerImageURL}" "retrycmd_if_failure 2 1 120 ctr --namespace k8s.io image pull $containerImageURL" || (echo "timed out pulling image ${containerImageURL} via ctr" && exit $ERR_CONTAINERD_CTR_IMG_PULL_TIMEOUT)
elif [[ ${cliTool} == "crictl" ]]; then
logs_to_events "AKS.CSE.imagepullcrictl.${containerImageURL}" "retrycmd_if_failure 2 1 120 crictl pull $containerImageURL" || (echo "timed out pulling image ${containerImageURL} via crictl" && exit $ERR_CONTAINERD_CRICTL_IMG_PULL_TIMEOUT)
else
logs_to_events "AKS.CSE.imagepull.${CONTAINER_IMAGE_URL}" "retrycmd_if_failure 2 1 120 docker pull $CONTAINER_IMAGE_URL" || (echo "timed out pulling image ${CONTAINER_IMAGE_URL} via docker" && exit $ERR_DOCKER_IMG_PULL_TIMEOUT)
logs_to_events "AKS.CSE.imagepull.${containerImageURL}" "retrycmd_if_failure 2 1 120 docker pull $containerImageURL" || (echo "timed out pulling image ${containerImageURL} via docker" && exit $ERR_DOCKER_IMG_PULL_TIMEOUT)
fi
}

retagContainerImage() {
CLI_TOOL=$1
CONTAINER_IMAGE_URL=$2
RETAG_IMAGE_URL=$3
echo "retagging from ${CONTAINER_IMAGE_URL} to ${RETAG_IMAGE_URL} using ${CLI_TOOL}"
if [[ ${CLI_TOOL} == "ctr" ]]; then
ctr --namespace k8s.io image tag $CONTAINER_IMAGE_URL $RETAG_IMAGE_URL
elif [[ ${CLI_TOOL} == "crictl" ]]; then
crictl image tag $CONTAINER_IMAGE_URL $RETAG_IMAGE_URL
local cliTool=$1
local containerImageURL=$2
local retagImageURL=$3
echo "retagging from ${containerImageURL} to ${retagImageURL} using ${cliTool}"
if [[ ${cliTool} == "ctr" ]]; then
ctr --namespace k8s.io image tag $containerImageURL $retagImageURL
elif [[ ${cliTool} == "crictl" ]]; then
crictl image tag $containerImageURL $retagImageURL
else
docker image tag $CONTAINER_IMAGE_URL $RETAG_IMAGE_URL
docker image tag $containerImageURL $retagImageURL
fi
}

Expand All @@ -668,7 +669,6 @@ retagMCRImagesForChina() {
# in mooncake, the mcr endpoint is: mcr.azk8s.cn
# shellcheck disable=SC2001
retagMCRImage=$(echo ${mcrImage} | sed -e 's/^mcr.microsoft.com/mcr.azk8s.cn/g')
# can't use CLI_TOOL because crictl doesn't support retagging.
if [[ "${CONTAINER_RUNTIME}" == "containerd" ]]; then
retagContainerImage "ctr" ${mcrImage} ${retagMCRImage}
else
Expand All @@ -678,22 +678,22 @@ retagMCRImagesForChina() {
}

removeContainerImage() {
CLI_TOOL=$1
CONTAINER_IMAGE_URL=$2
if [[ "${CLI_TOOL}" == "docker" ]]; then
docker image rm $CONTAINER_IMAGE_URL
local cliTool=$1
local containerImageURL=$2
if [[ "${cliTool}" == "docker" ]]; then
docker image rm $containerImageURL
else
# crictl should always be present
crictl rmi $CONTAINER_IMAGE_URL
crictl rmi $containerImageURL
fi
}

cleanUpImages() {
local targetImage=$1
export targetImage
function cleanupImagesRun() {
local cliTool=$1
local targetImage=$2
if [ "${NEEDS_CONTAINERD}" == "true" ]; then
if [[ "${CLI_TOOL}" == "crictl" ]]; then
if [[ "${cliTool}" == "crictl" ]]; then
images_to_delete=$(crictl images | awk '{print $1":"$2}' | grep -vE "${KUBERNETES_VERSION}$|${KUBERNETES_VERSION}.[0-9]+$|${KUBERNETES_VERSION}-|${KUBERNETES_VERSION}_" | grep ${targetImage} | tr ' ' '\n')
else
images_to_delete=$(ctr --namespace k8s.io images list | awk '{print $1}' | grep -vE "${KUBERNETES_VERSION}$|${KUBERNETES_VERSION}.[0-9]+$|${KUBERNETES_VERSION}-|${KUBERNETES_VERSION}_" | grep ${targetImage} | tr ' ' '\n')
Expand All @@ -707,52 +707,26 @@ cleanUpImages() {
elif [[ "${images_to_delete}" != "" ]]; then
echo "${images_to_delete}" | while read image; do
if [ "${NEEDS_CONTAINERD}" == "true" ]; then
removeContainerImage ${CLI_TOOL} ${image}
removeContainerImage ${cliTool} ${image}
else
removeContainerImage "docker" ${image}
fi
done
fi
}

export -f cleanupImagesRun
retrycmd_if_failure 10 5 120 bash -c cleanupImagesRun
retrycmd_if_failure 10 5 120 bash -c cleanupImagesRun ${1} ${2}
}

cleanUpKubeProxyImages() {
echo $(date),$(hostname), startCleanUpKubeProxyImages
cleanUpImages "kube-proxy"
cleanUpImages "ctr" "kube-proxy"
echo $(date),$(hostname), endCleanUpKubeProxyImages
}

cleanupRetaggedImages() {
if [[ "${TARGET_CLOUD}" != "AzureChinaCloud" ]]; then
if [ "${NEEDS_CONTAINERD}" == "true" ]; then
if [[ "${CLI_TOOL}" == "crictl" ]]; then
images_to_delete=$(crictl images | awk '{print $1":"$2}' | grep '^mcr.azk8s.cn/' | tr ' ' '\n')
else
images_to_delete=$(ctr --namespace k8s.io images list | awk '{print $1}' | grep '^mcr.azk8s.cn/' | tr ' ' '\n')
fi
else
images_to_delete=$(docker images --format '{{OpenBraces}}.Repository{{CloseBraces}}:{{OpenBraces}}.Tag{{CloseBraces}}' | grep '^mcr.azk8s.cn/' | tr ' ' '\n')
fi
if [[ "${images_to_delete}" != "" ]]; then
echo "${images_to_delete}" | while read image; do
if [ "${NEEDS_CONTAINERD}" == "true" ]; then
# crictl will remove *ALL* references to a given imageID (SHA), which removes too much, so always use ctr
removeContainerImage "ctr" ${image}
else
removeContainerImage "docker" ${image}
fi
done
fi
else
echo "skipping container cleanup for AzureChinaCloud"
fi
}

cleanUpContainerImages() {
export KUBERNETES_VERSION
export CLI_TOOL
export -f retrycmd_if_failure
export -f removeContainerImage
export -f cleanUpImages
Expand Down
1 change: 1 addition & 0 deletions parts/linux/cloud-init/artifacts/cse_start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ EVENT_JSON=$( jq -n \
--arg EventTid "0" \
'{Timestamp: $Timestamp, OperationId: $OperationId, Version: $Version, TaskName: $TaskName, EventLevel: $EventLevel, Message: $Message, EventPid: $EventPid, EventTid: $EventTid}'
)
mkdir -p ${EVENTS_LOGGING_DIR}
echo ${EVENT_JSON} > ${EVENTS_LOGGING_DIR}${EVENTS_FILE_NAME}.json

# force a log upload to the host after the provisioning script finishes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ installDeps() {
}

updateAptWithMicrosoftPkg() {
retrycmd_if_failure_no_stats 120 5 25 curl https://packages.microsoft.com/config/ubuntu/${UBUNTU_RELEASE}/prod.list > /tmp/microsoft-prod.list || exit $ERR_MOBY_APT_LIST_TIMEOUT
retrycmd_if_failure_no_stats 120 5 25 curl -s https://packages.microsoft.com/config/ubuntu/${UBUNTU_RELEASE}/prod.list > /tmp/microsoft-prod.list || exit $ERR_MOBY_APT_LIST_TIMEOUT
retrycmd_if_failure 10 5 10 cp /tmp/microsoft-prod.list /etc/apt/sources.list.d/ || exit $ERR_MOBY_APT_LIST_TIMEOUT
if [[ ${UBUNTU_RELEASE} == "18.04" ]]; then {
echo "deb [arch=amd64,arm64,armhf] https://packages.microsoft.com/ubuntu/18.04/multiarch/prod testing main" > /etc/apt/sources.list.d/microsoft-prod-testing.list
Expand All @@ -71,7 +71,7 @@ updateAptWithMicrosoftPkg() {
}
fi

retrycmd_if_failure_no_stats 120 5 25 curl https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > /tmp/microsoft.gpg || exit $ERR_MS_GPG_KEY_DOWNLOAD_TIMEOUT
retrycmd_if_failure_no_stats 120 5 25 curl -s https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > /tmp/microsoft.gpg || exit $ERR_MS_GPG_KEY_DOWNLOAD_TIMEOUT
retrycmd_if_failure 10 5 10 cp /tmp/microsoft.gpg /etc/apt/trusted.gpg.d/ || exit $ERR_MS_GPG_KEY_DOWNLOAD_TIMEOUT
apt_get_update || exit $ERR_APT_UPDATE_TIMEOUT
}
Expand Down
2 changes: 0 additions & 2 deletions pkg/agent/baker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,6 @@ var _ = Describe("Assert generated customData and cseCmd", func() {
}
config.ContainerdPackageURL = "containerd-package-url"
}, func(o *nodeBootstrappingOutput) {
Expect(o.vars["CLI_TOOL"]).To(Equal("ctr"))
Expect(o.vars["CONTAINERD_PACKAGE_URL"]).To(Equal("containerd-package-url"))
}),

Expand All @@ -461,7 +460,6 @@ var _ = Describe("Assert generated customData and cseCmd", func() {
}
config.ContainerdPackageURL = "containerd-package-url"
}, func(o *nodeBootstrappingOutput) {
Expect(o.vars["CLI_TOOL"]).To(Equal("docker"))
Expect(o.vars["CONTAINERD_PACKAGE_URL"]).To(Equal(""))
}),
Entry("AKSUbuntu1604 with temp disk (api field)", "AKSUbuntu1604+TempDiskExplicit", "1.15.7",
Expand Down
3 changes: 0 additions & 3 deletions pkg/agent/params.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,16 +89,13 @@ func assignKubernetesParametersFromAgentProfile(profile *datamodel.AgentPoolProf
// this allows for heteregenous clusters
addValue(parametersMap, "containerRuntime", profile.KubernetesConfig.ContainerRuntime)
if profile.KubernetesConfig.ContainerRuntime == "containerd" {
addValue(parametersMap, "cliTool", "ctr")
if config.ContainerdVersion != "" {
addValue(parametersMap, "containerdVersion", config.ContainerdVersion)
}
if config.TeleportdPluginURL != "" {
addValue(parametersMap, "teleportdPluginURL", config.TeleportdPluginURL)
}
addValue(parametersMap, "containerdPackageURL", config.ContainerdPackageURL)
} else {
addValue(parametersMap, "cliTool", "docker")
}
}

Expand Down
Loading
Loading