Skip to content

Commit

Permalink
test/e2e: Add image cleanup workaround
Browse files Browse the repository at this point in the history
Due to an issue with containerd not caching images with snapshotter labels
and peer-pods using the nydus-snapshotter to implement the image pull on
guest, we see failures do to the pause image (and potentially other test
images) having been pulled with overlayfs. This means that nydus-snapshotter
doesn't trigger the guest_pull code, so to get this working, we first need
to clean up images that have already been cached locally. For the CI I've
written a debug container that does this, so run it after provisioning of
the cluster and hopefully the tests will then pass.

Signed-off-by: stevenhorsman <[email protected]>
  • Loading branch information
stevenhorsman committed May 1, 2024
1 parent 8335fea commit 16417bf
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 34 deletions.
42 changes: 8 additions & 34 deletions src/cloud-api-adaptor/test/e2e/assessment_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"testing"
"time"

pv "github.com/confidential-containers/cloud-api-adaptor/src/cloud-api-adaptor/test/provisioner"
log "github.com/sirupsen/logrus"
"gopkg.in/yaml.v2"
batchv1 "k8s.io/api/batch/v1"
Expand Down Expand Up @@ -182,24 +183,10 @@ func IsPulledWithNydusSnapshotter(ctx context.Context, t *testing.T, client klie
return false, fmt.Errorf("No cloud-api-adaptor pod found in podList: %v", podlist.Items)
}

// This code has moved to the provisioner package which e2e depends on, but I've kept this
// indirection for now so we don't have to update lots of extra code in this PR
func GetPodLog(ctx context.Context, client klient.Client, pod v1.Pod) (string, error) {
clientset, err := kubernetes.NewForConfig(client.RESTConfig())
if err != nil {
return "", err
}

req := clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).GetLogs(pod.ObjectMeta.Name, &v1.PodLogOptions{})
podLogs, err := req.Stream(ctx)
if err != nil {
return "", err
}
defer podLogs.Close()
buf := new(bytes.Buffer)
_, err = io.Copy(buf, podLogs)
if err != nil {
return "", err
}
return buf.String(), nil
return pv.GetPodLog(ctx, client, pod)
}

func ComparePodLogString(ctx context.Context, client klient.Client, customPod v1.Pod, expectedPodlogString string) (string, error) {
Expand Down Expand Up @@ -452,23 +439,10 @@ func ProvisionPod(ctx context.Context, client klient.Client, t *testing.T, pod *
return nil
}

func DeletePod(ctx context.Context, client klient.Client, pod *v1.Pod, tcDelDuration *time.Duration) error {
duration := 1 * time.Minute
if tcDelDuration == nil {
tcDelDuration = &duration
}
if err := client.Resources().Delete(ctx, pod); err != nil {
return err
}
log.Infof("Deleting pod %s...", pod.Name)
if err := wait.For(conditions.New(
client.Resources()).ResourceDeleted(pod),
wait.WithInterval(5*time.Second),
wait.WithTimeout(*tcDelDuration)); err != nil {
return err
}
log.Infof("Pod %s has been successfully deleted within %.0fs", pod.Name, tcDelDuration.Seconds())
return nil
// This code has moved to the provisioner package which e2e depends on, but I've kept this
// indirection for now so we don't have to update lots of extra code in this PR
func DeletePod(ctx context.Context, client klient.Client, pod *v1.Pod, deleteDuration *time.Duration) error {
return pv.DeletePod(ctx, client, pod, deleteDuration)
}

func CreateAndWaitForNamespace(ctx context.Context, client klient.Client, namespaceName string) error {
Expand Down
47 changes: 47 additions & 0 deletions src/cloud-api-adaptor/test/provisioner/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,22 @@
package provisioner

import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"strings"
"time"

log "github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/kubernetes"
"sigs.k8s.io/e2e-framework/klient"
"sigs.k8s.io/e2e-framework/klient/k8s"
"sigs.k8s.io/e2e-framework/klient/wait"
"sigs.k8s.io/e2e-framework/klient/wait/conditions"
"sigs.k8s.io/e2e-framework/pkg/envconf"
)

Expand Down Expand Up @@ -54,3 +62,42 @@ func AddNodeRoleWorkerLabel(ctx context.Context, clusterName string, cfg *envcon
}
return nil
}

func GetPodLog(ctx context.Context, client klient.Client, pod corev1.Pod) (string, error) {
clientset, err := kubernetes.NewForConfig(client.RESTConfig())
if err != nil {
return "", err
}

req := clientset.CoreV1().Pods(pod.ObjectMeta.Namespace).GetLogs(pod.ObjectMeta.Name, &corev1.PodLogOptions{})
podLogs, err := req.Stream(ctx)
if err != nil {
return "", err
}
defer podLogs.Close()
buf := new(bytes.Buffer)
_, err = io.Copy(buf, podLogs)
if err != nil {
return "", err
}
return buf.String(), nil
}

func DeletePod(ctx context.Context, client klient.Client, pod *corev1.Pod, deleteDuration *time.Duration) error {
duration := 1 * time.Minute
if deleteDuration == nil {
deleteDuration = &duration
}
if err := client.Resources().Delete(ctx, pod); err != nil {
return err
}
log.Infof("Deleting pod %s...", pod.Name)
if err := wait.For(conditions.New(
client.Resources()).ResourceDeleted(pod),
wait.WithInterval(5*time.Second),
wait.WithTimeout(*deleteDuration)); err != nil {
return err
}
log.Infof("Pod %s has been successfully deleted within %.0fs", pod.Name, deleteDuration.Seconds())
return nil
}
70 changes: 70 additions & 0 deletions src/cloud-api-adaptor/test/provisioner/provision.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"os"
"os/exec"
"path/filepath"
"strings"
"time"

"github.com/BurntSushi/toml"
Expand All @@ -23,7 +24,9 @@ import (
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
_ "k8s.io/client-go/plugin/pkg/client/auth/oidc"
"sigs.k8s.io/e2e-framework/klient"
"sigs.k8s.io/e2e-framework/klient/k8s"
"sigs.k8s.io/e2e-framework/klient/k8s/resources"
"sigs.k8s.io/e2e-framework/klient/wait"
"sigs.k8s.io/e2e-framework/klient/wait/conditions"
"sigs.k8s.io/e2e-framework/pkg/envconf"
Expand Down Expand Up @@ -487,6 +490,7 @@ func (p *CloudAPIAdaptor) Deploy(ctx context.Context, cfg *envconf.Config, props
if err != nil {
return err
}

resources := client.Resources(p.namespace)

log.Info("Install the controller manager")
Expand Down Expand Up @@ -583,6 +587,72 @@ func (p *CloudAPIAdaptor) Deploy(ctx context.Context, cfg *envconf.Config, props
return err
}

// Temp workaround to clean up images
err = cleanUpContainerdImages(ctx, cfg, client)
if err != nil {
return err
}

return nil
}

// This is a temporary workaround due to an issue with containerd not caching images with snapshotter labels
// and peer-pods using the nydus-snapshotter to implement the image pull on guest. We first need to clean up images
// that have already been cached locally. This is done with the
// [convenience debug container](https://github.com/stevenhorsman/containerd-image-cleanup)
// which deletes some common images (and can be expanded)
func cleanUpContainerdImages(ctx context.Context, cfg *envconf.Config, client klient.Client) error {
log.Info("Run the containerd-image-cleanup workaround...")
nodeList := &corev1.NodeList{}
if err := client.Resources().List(ctx, nodeList, resources.WithLabelSelector("katacontainers.io/kata-runtime=true")); err != nil {
return err
}

for _, node := range nodeList.Items {
log.Tracef("Running cleanup debug container on worker node: %s\n", node.Name)
// Run the debug pod on the node
cmd := exec.Command("kubectl", "debug", "node/"+node.Name, "--image=quay.io/stevenhorsman/containerd-image-cleanup:latest")
cmd.Env = append(os.Environ(), fmt.Sprintf("KUBECONFIG="+cfg.KubeconfigFile()))
stdoutStderr, err := cmd.CombinedOutput()
log.Tracef("%v, output: %s", cmd, stdoutStderr)
if err != nil {
return err
}
log.Trace("Created debug pod")

// Wait for pod/job to be completed
pods := &corev1.PodList{}
cleanup_pod := &corev1.Pod{}
_ = client.Resources().List(context.TODO(), pods)
for _, pod := range pods.Items {
if strings.HasPrefix(pod.ObjectMeta.Name, "node-debugger") {
cleanup_pod = &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{Name: pod.ObjectMeta.Name, Namespace: pod.ObjectMeta.Namespace},
}
break
}
}

if err := wait.For(conditions.New(client.Resources()).PodPhaseMatch(cleanup_pod, corev1.PodSucceeded), wait.WithTimeout(time.Second*60)); err != nil {
return err
}
log.Tracef("Waited for cleanup pod to finish")

// Print the logs
log.Tracef("Log of the clean up pod %.v", cleanup_pod.Name)
podLogString, err := GetPodLog(ctx, client, *cleanup_pod)
if err != nil {
return err
}
log.Trace(podLogString)

// Clean up the pod/job
err = DeletePod(ctx, client, cleanup_pod, nil)
if err != nil {
return err
}
log.Trace("Deleted cleanup pod")
}
return nil
}

Expand Down

0 comments on commit 16417bf

Please sign in to comment.