From 36689d910dea835c16cec7557dc6f82af63cb6a2 Mon Sep 17 00:00:00 2001 From: Jiajing Hu Date: Wed, 13 Dec 2023 11:49:25 +0800 Subject: [PATCH] Add script for migrating from other CNI to Antrea Add a YAML file antrea-migrator.yml to migrate clusters with other CNIs to Antrea. It will restart all Pods in-place. A new image "antrea-migrator" is responsible for restarting all Pods on each Nodes. It is a DaemonSet that tries to kill sandboxes to restart the Pods in-place. Signed-off-by: hjiajing --- .github/workflows/build.yml | 19 +++++++ Makefile | 10 ++++ build/images/Dockerfile.build.migrator | 37 +++++++++++++ build/images/scripts/migrate_cni | 50 +++++++++++++++++ build/yamls/antrea-migrator.yml | 57 ++++++++++++++++++++ docs/migrate-to-antrea.md | 74 ++++++++++++++++++++++++++ hack/.notableofcontents | 1 + 7 files changed, 248 insertions(+) create mode 100644 build/images/Dockerfile.build.migrator create mode 100755 build/images/scripts/migrate_cni create mode 100644 build/yamls/antrea-migrator.yml create mode 100644 docs/migrate-to-antrea.md diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 71c86d2657b..f33f0640259 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -160,3 +160,22 @@ jobs: run: | echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin docker push antrea/flow-aggregator:latest + + build-antrea-migrator: + needs: check-changes + if: ${{ needs.check-changes.outputs.has_changes == 'yes' || github.event_name == 'push' }} + runs-on: [ubuntu-latest] + steps: + - uses: actions/checkout@v4 + with: + show-progress: false + - name: Build antrea-migrator Docker image + run: make build-migrator + - name: Push antrea-migrator Docker image to registry + if: ${{ github.repository == 'antrea-io/antrea' && github.event_name == 'push' && github.ref == 'refs/heads/main' }} + env: + DOCKER_USERNAME: ${{ secrets.DOCKER_USERNAME }} + DOCKER_PASSWORD: ${{ secrets.DOCKER_PASSWORD }} + run: | + echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin + docker push antrea/antrea-migrator:latest diff --git a/Makefile b/Makefile index e612daeb145..0bed0777b0d 100644 --- a/Makefile +++ b/Makefile @@ -380,6 +380,16 @@ build-scale-simulator: docker build -t antrea/antrea-ubuntu-simulator:$(DOCKER_IMG_VERSION) -f build/images/Dockerfile.simulator.build.ubuntu $(DOCKER_BUILD_ARGS) . docker tag antrea/antrea-ubuntu-simulator:$(DOCKER_IMG_VERSION) antrea/antrea-ubuntu-simulator +.PHONY: build-migrator +build-migrator: + @echo "===> Building antrea/antrea-migrator Docker image <===" +ifneq ($(NO_PULL),) + docker build -t antrea/antrea-migrator:$(DOCKER_IMG_VERSION) -f build/images/Dockerfile.build.migrator $(DOCKER_BUILD_ARGS) . +else + docker build --pull -t antrea/antrea-migrator:$(DOCKER_IMG_VERSION) -f build/images/Dockerfile.build.migrator $(DOCKER_BUILD_ARGS) . +endif + docker tag antrea/antrea-migrator:$(DOCKER_IMG_VERSION) antrea/antrea-migrator + .PHONY: manifest manifest: @echo "===> Generating dev manifest for Antrea <===" diff --git a/build/images/Dockerfile.build.migrator b/build/images/Dockerfile.build.migrator new file mode 100644 index 00000000000..b89a773ac5a --- /dev/null +++ b/build/images/Dockerfile.build.migrator @@ -0,0 +1,37 @@ +# Copyright 2024 Antrea Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM ubuntu:22.04 + +LABEL maintainer="Antrea " +LABEL description="The Docker image to migrate other CNIs to Antrea CNI." + +USER root + +COPY build/images/scripts/migrate_cni /usr/local/bin/ + +ENV CRICTL_VERSION="v1.27.0" +RUN apt update \ + && apt install -y jq ca-certificates wget \ + && rm -rf /var/cache/apt/* /var/lib/apt/lists/* \ + && wget -q https://github.com/kubernetes-sigs/cri-tools/releases/download/${CRICTL_VERSION}/crictl-${CRICTL_VERSION}-linux-amd64.tar.gz \ + && tar -xzf crictl-${CRICTL_VERSION}-linux-amd64.tar.gz \ + && mv crictl /usr/local/bin/ \ + && rm -f crictl-${CRICTL_VERSION}-linux-amd64.tar.gz \ + && chmod +x /usr/local/bin/crictl \ + && chmod +x /usr/local/bin/migrate_cni + +COPY --from=registry.k8s.io/pause:latest /pause /pause + +CMD ["/pause"] diff --git a/build/images/scripts/migrate_cni b/build/images/scripts/migrate_cni new file mode 100755 index 00000000000..54361d3b2ce --- /dev/null +++ b/build/images/scripts/migrate_cni @@ -0,0 +1,50 @@ +#!/usr/bin/env bash + +# Copyright 2024 Antrea Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +CRICTL="crictl --runtime-endpoint $RUNTIME_ENDPOINT" + +for file in /etc/cni/net.d/*; do + if [[ $file == *.conf || $file == *.conflist || $file == *.json ]]; then + if [[ $file != *10-antrea.conflist ]]; then + mv $file $file.bak + fi + fi +done + +# Remove rules added by the previous CNI in CHAIN CNI-HOSTPORT-DNAT. +# CHAIN CNI-HOSTPORT-DNAT is created by portmap for ingress controllers. It will not be deleted +# even if the CNI is removed, so we delete the stale rules here. +if iptables -t nat -S | grep -q "CNI-HOSTPORT-DNAT"; then + rules=$(iptables -t nat -L CNI-HOSTPORT-DNAT --line-numbers | grep -v "antrea" | awk 'NR>2 {print $1}') + chains=$(iptables -t nat -L CNI-HOSTPORT-DNAT | grep -v "antrea" | awk 'NR>2 {print $1}') + for rule in $rules; do + iptables -t nat -D CNI-HOSTPORT-DNAT "$rule" + done + for chain in $chains; do + iptables -t nat -F "$chain" + iptables -t nat -X "$chain" + done +fi + +pods=$($CRICTL pods -q) +for pod in $pods; do + network_type=$($CRICTL inspectp $pod | jq -r .status.linux.namespaces.options.network) + if [ "$network_type" == "POD" ]; then + $CRICTL stopp $pod && $CRICTL rmp $pod + fi +done diff --git a/build/yamls/antrea-migrator.yml b/build/yamls/antrea-migrator.yml new file mode 100644 index 00000000000..0d35dea8807 --- /dev/null +++ b/build/yamls/antrea-migrator.yml @@ -0,0 +1,57 @@ +kind: DaemonSet +apiVersion: apps/v1 +metadata: + labels: + app: antrea + component: antrea-migrator + name: antrea-migrator + namespace: kube-system +spec: + selector: + matchLabels: + app: antrea + component: antrea-migrator + template: + metadata: + labels: + app: antrea + component: antrea-migrator + spec: + hostPID: true + hostNetwork: true + nodeSelector: + kubernetes.io/os: linux + tolerations: + - key: CriticalAddonsOnly + operator: Exists + - effect: NoSchedule + operator: Exists + - effect: NoExecute + operator: Exists + volumes: + - name: cni-cfg + hostPath: + path: /etc/cni/net.d + - name: containerd + hostPath: + path: /run/containerd + initContainers: + - name: antrea-migrator-init + image: antrea/antrea-migrator:latest + imagePullPolicy: IfNotPresent + securityContext: + privileged: true + command: + - migrate_cni + env: + - name: RUNTIME_ENDPOINT + value: unix:///run/containerd/containerd.sock + volumeMounts: + - mountPath: /run/containerd + name: containerd + - mountPath: /etc/cni/net.d + name: cni-cfg + containers: + - image: antrea/antrea-migrator:latest + imagePullPolicy: IfNotPresent + name: antrea-migrator diff --git a/docs/migrate-to-antrea.md b/docs/migrate-to-antrea.md new file mode 100644 index 00000000000..1eb4d2bc9b1 --- /dev/null +++ b/docs/migrate-to-antrea.md @@ -0,0 +1,74 @@ +# Migrate from another CNI to Antrea + +This document provides guidance on migrating from other CNIs to Antrea +starting from version v1.15.0 onwards. + +NOTE: The following is a reference list of CNIs and versions for which we have +verified the migration process. CNIs and versions that are not listed here +might also work. Please create an issue if you run into problems during the +migration to Antrea. During the migration process, no Kubernetes resources +should be created or deleted, otherwise the migration process might fail or +some unexpected problems might occur. + +| CNI | Version | +|---------|---------| +| Calico | v3.26 | +| Flannel | v0.22.0 | + +The migration process is divided into three steps: + +1. Clean up the old CNI. +2. Install Antrea in the cluster. +3. Deploy Antrea migrator. + +## Clean up the old CNI + +The cleanup process varies across CNIs, typically you should remove +the DaemonSet, Deployment, and CRDs of the old CNI from the cluster. +For example, if you used `kubectl apply -f ` to install +the old CNI, you could then use `kubectl delete -f ` to +uninstall it. + +## Install Antrea + +The second step is to install Antrea in the cluster. You can follow the +[installation guide](https://github.com/antrea-io/antrea/blob/main/docs/getting-started.md) +to install Antrea. The following is an example of installing Antrea v1.14.1: + +```bash +kubectl apply -f https://github.com/antrea-io/antrea/releases/download/v1.14.1/antrea.yml +``` + +## Deploy Antrea migrator + +After Antrea is up and running, you can now deploy Antrea migrator +by the following command. The migrator runs as a DaemonSet, `antrea-migrator`, +in the cluster, which will restart all non hostNetwork Pods in the cluster +in-place and perform necessary network resource cleanup. + +```bash +kubectl apply -f https://raw.githubusercontent.com/antrea-io/antrea/main/build/yamls/antrea-migrator.yml +``` + +The reason for restarting all Pods is that Antrea needs to take over the +network management and IPAM from the old CNI. In order to avoid the Pods +being rescheduled and minimize service downtime, the migrator restarts +all non-hostNetwork Pods in-place by restarting their sandbox containers. +Therefore, it's expected to see the `RESTARTS` count for these Pods being +increased by 1 like below: + +```bash +$ kubectl get pod -o wide +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +migrate-example-6d6b97f96b-29qbq 1/1 Running 1 (24s ago) 2m5s 10.10.1.3 test-worker +migrate-example-6d6b97f96b-dqx2g 1/1 Running 1 (23s ago) 2m5s 10.10.1.6 test-worker +migrate-example-6d6b97f96b-jpflg 1/1 Running 1 (23s ago) 2m5s 10.10.1.5 test-worker +``` + +When the `antrea-migrator` Pods on all Nodes are in `Running` state, +the migration process is completed. You can then remove the `antrea-migrator` +DaemonSet safely with the following command: + +```bash +kubectl delete -f https://raw.githubusercontent.com/antrea-io/antrea/main/build/yamls/antrea-migrator.yml +``` diff --git a/hack/.notableofcontents b/hack/.notableofcontents index 4429fdd9aae..c895665f8b7 100644 --- a/hack/.notableofcontents +++ b/hack/.notableofcontents @@ -26,6 +26,7 @@ docs/maintainers/build-kubemark.md docs/maintainers/getting-started-gif.md docs/maintainers/release.md docs/maintainers/updating-ovs-windows.md +docs/migrate-to-antrea.md docs/multicluster/api.md docs/multicluster/antctl.md docs/multicluster/architecture.md