diff --git a/src/cloud-api-adaptor/test/e2e/assessment_helpers.go b/src/cloud-api-adaptor/test/e2e/assessment_helpers.go index 762022b4e..5d1e08f0f 100644 --- a/src/cloud-api-adaptor/test/e2e/assessment_helpers.go +++ b/src/cloud-api-adaptor/test/e2e/assessment_helpers.go @@ -12,6 +12,7 @@ import ( "testing" "time" + pv "github.com/confidential-containers/cloud-api-adaptor/src/cloud-api-adaptor/test/provisioner" log "github.com/sirupsen/logrus" "gopkg.in/yaml.v2" batchv1 "k8s.io/api/batch/v1" @@ -182,24 +183,10 @@ func IsPulledWithNydusSnapshotter(ctx context.Context, t *testing.T, client klie return false, fmt.Errorf("No cloud-api-adaptor pod found in podList: %v", podlist.Items) } +// This code has moved to the provisioner package which e2e depends on, but I've kept this +// indirection for now so we don't have to update lots of extra code in this PR func GetPodLog(ctx context.Context, client klient.Client, pod v1.Pod) (string, error) { - clientset, err := kubernetes.NewForConfig(client.RESTConfig()) - if err != nil { - return "", err - } - - req := clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).GetLogs(pod.ObjectMeta.Name, &v1.PodLogOptions{}) - podLogs, err := req.Stream(ctx) - if err != nil { - return "", err - } - defer podLogs.Close() - buf := new(bytes.Buffer) - _, err = io.Copy(buf, podLogs) - if err != nil { - return "", err - } - return buf.String(), nil + return pv.GetPodLog(ctx, client, pod) } func ComparePodLogString(ctx context.Context, client klient.Client, customPod v1.Pod, expectedPodlogString string) (string, error) { @@ -452,23 +439,10 @@ func ProvisionPod(ctx context.Context, client klient.Client, t *testing.T, pod * return nil } -func DeletePod(ctx context.Context, client klient.Client, pod *v1.Pod, tcDelDuration *time.Duration) error { - duration := 1 * time.Minute - if tcDelDuration == nil { - tcDelDuration = &duration - } - if err := client.Resources().Delete(ctx, pod); err != nil { - return err - } - log.Infof("Deleting pod %s...", pod.Name) - if err := wait.For(conditions.New( - client.Resources()).ResourceDeleted(pod), - wait.WithInterval(5*time.Second), - wait.WithTimeout(*tcDelDuration)); err != nil { - return err - } - log.Infof("Pod %s has been successfully deleted within %.0fs", pod.Name, tcDelDuration.Seconds()) - return nil +// This code has moved to the provisioner package which e2e depends on, but I've kept this +// indirection for now so we don't have to update lots of extra code in this PR +func DeletePod(ctx context.Context, client klient.Client, pod *v1.Pod, deleteDuration *time.Duration) error { + return pv.DeletePod(ctx, client, pod, deleteDuration) } func CreateAndWaitForNamespace(ctx context.Context, client klient.Client, namespaceName string) error { diff --git a/src/cloud-api-adaptor/test/provisioner/common.go b/src/cloud-api-adaptor/test/provisioner/common.go index b8b935852..b458dbe36 100644 --- a/src/cloud-api-adaptor/test/provisioner/common.go +++ b/src/cloud-api-adaptor/test/provisioner/common.go @@ -4,14 +4,22 @@ package provisioner import ( + "bytes" "context" "encoding/json" "fmt" + "io" "strings" + "time" + log "github.com/sirupsen/logrus" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/kubernetes" + "sigs.k8s.io/e2e-framework/klient" "sigs.k8s.io/e2e-framework/klient/k8s" + "sigs.k8s.io/e2e-framework/klient/wait" + "sigs.k8s.io/e2e-framework/klient/wait/conditions" "sigs.k8s.io/e2e-framework/pkg/envconf" ) @@ -54,3 +62,42 @@ func AddNodeRoleWorkerLabel(ctx context.Context, clusterName string, cfg *envcon } return nil } + +func GetPodLog(ctx context.Context, client klient.Client, pod corev1.Pod) (string, error) { + clientset, err := kubernetes.NewForConfig(client.RESTConfig()) + if err != nil { + return "", err + } + + req := clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).GetLogs(pod.ObjectMeta.Name, &corev1.PodLogOptions{}) + podLogs, err := req.Stream(ctx) + if err != nil { + return "", err + } + defer podLogs.Close() + buf := new(bytes.Buffer) + _, err = io.Copy(buf, podLogs) + if err != nil { + return "", err + } + return buf.String(), nil +} + +func DeletePod(ctx context.Context, client klient.Client, pod *corev1.Pod, deleteDuration *time.Duration) error { + duration := 1 * time.Minute + if deleteDuration == nil { + deleteDuration = &duration + } + if err := client.Resources().Delete(ctx, pod); err != nil { + return err + } + log.Infof("Deleting pod %s...", pod.Name) + if err := wait.For(conditions.New( + client.Resources()).ResourceDeleted(pod), + wait.WithInterval(5*time.Second), + wait.WithTimeout(*deleteDuration)); err != nil { + return err + } + log.Infof("Pod %s has been successfully deleted within %.0fs", pod.Name, deleteDuration.Seconds()) + return nil +} diff --git a/src/cloud-api-adaptor/test/provisioner/provision.go b/src/cloud-api-adaptor/test/provisioner/provision.go index bd40dc555..57ce3f5e4 100644 --- a/src/cloud-api-adaptor/test/provisioner/provision.go +++ b/src/cloud-api-adaptor/test/provisioner/provision.go @@ -13,6 +13,7 @@ import ( "os" "os/exec" "path/filepath" + "strings" "time" "github.com/BurntSushi/toml" @@ -23,7 +24,9 @@ import ( "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" _ "k8s.io/client-go/plugin/pkg/client/auth/oidc" + "sigs.k8s.io/e2e-framework/klient" "sigs.k8s.io/e2e-framework/klient/k8s" + "sigs.k8s.io/e2e-framework/klient/k8s/resources" "sigs.k8s.io/e2e-framework/klient/wait" "sigs.k8s.io/e2e-framework/klient/wait/conditions" "sigs.k8s.io/e2e-framework/pkg/envconf" @@ -487,6 +490,7 @@ func (p *CloudAPIAdaptor) Deploy(ctx context.Context, cfg *envconf.Config, props if err != nil { return err } + resources := client.Resources(p.namespace) log.Info("Install the controller manager") @@ -583,6 +587,72 @@ func (p *CloudAPIAdaptor) Deploy(ctx context.Context, cfg *envconf.Config, props return err } + // Temp workaround to clean up images + err = cleanUpContainerdImages(ctx, cfg, client) + if err != nil { + return err + } + + return nil +} + +// This is a temporary workaround due to an issue with containerd not caching images with snapshotter labels +// and peer-pods using the nydus-snapshotter to implement the image pull on guest. We first need to clean up images +// that have already been cached locally. This is done with the +// [convenience debug container](https://github.com/stevenhorsman/containerd-image-cleanup) +// which deletes some common images (and can be expanded) +func cleanUpContainerdImages(ctx context.Context, cfg *envconf.Config, client klient.Client) error { + log.Info("Run the containerd-image-cleanup workaround...") + nodeList := &corev1.NodeList{} + if err := client.Resources().List(ctx, nodeList, resources.WithLabelSelector("katacontainers.io/kata-runtime=true")); err != nil { + return err + } + + for _, node := range nodeList.Items { + log.Tracef("Running cleanup debug container on worker node: %s\n", node.Name) + // Run the debug pod on the node + cmd := exec.Command("kubectl", "debug", "node/"+node.Name, "--image=quay.io/stevenhorsman/containerd-image-cleanup:latest") + cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG="+cfg.KubeconfigFile())) + stdoutStderr, err := cmd.CombinedOutput() + log.Tracef("%v, output: %s", cmd, stdoutStderr) + if err != nil { + return err + } + log.Trace("Created debug pod") + + // Wait for pod/job to be completed + pods := &corev1.PodList{} + cleanup_pod := &corev1.Pod{} + _ = client.Resources().List(context.TODO(), pods) + for _, pod := range pods.Items { + if strings.HasPrefix(pod.ObjectMeta.Name, "node-debugger") { + cleanup_pod = &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{Name: pod.ObjectMeta.Name, Namespace: pod.ObjectMeta.Namespace}, + } + break + } + } + + if err := wait.For(conditions.New(client.Resources()).PodPhaseMatch(cleanup_pod, corev1.PodSucceeded), wait.WithTimeout(time.Second*60)); err != nil { + return err + } + log.Tracef("Waited for cleanup pod to finish") + + // Print the logs + log.Tracef("Log of the clean up pod %.v", cleanup_pod.Name) + podLogString, err := GetPodLog(ctx, client, *cleanup_pod) + if err != nil { + return err + } + log.Trace(podLogString) + + // Clean up the pod/job + err = DeletePod(ctx, client, cleanup_pod, nil) + if err != nil { + return err + } + log.Trace("Deleted cleanup pod") + } return nil }